28
loading...
This website collects cookies to deliver better user experience
import numpy as np
import time
import json
import requests
import boto3
import os
import sagemaker
from sagemaker import get_execution_role
from sagemaker.session import Session
role = get_execution_role()
sess = Session()
region = sess.boto_region_name
bucket = sess.default_bucket()
import tensorflow as tf
import tarfile
tf.keras.backend.set_image_data_format('channels_last')
pretrained_model = tf.keras.applications.resnet.ResNet50()
saved_model_dir = '1'
tf.saved_model.save(pretrained_model, saved_model_dir)
with tarfile.open('model.tar.gz', 'w:gz') as tar:
tar.add(saved_model_dir)
from sagemaker.utils import name_from_base
prefix = name_from_base('ResNet50')
input_model_path = session.upload_data(
path='model.tar.gz',
bucket=bucket,
key_prefix=prefix)
print('S3 path for input model: {}'.format(input_model_path))
from sagemaker.tensorflow import TensorFlowModel
# Compile the model for EI accelerator in SageMaker Neo
tensorflow_model = TensorFlowModel(
model_data=input_model_path,
role=role,
framework_version='2.3')
output_path = '/'.join(input_model_path.split('/')[:-1])
compilation_job_name = prefix + "-fp32"
compiled_model_fp32 = tensorflow_model.compile(
target_instance_family='ml_eia2',
input_shape={"input_1": [1, 224, 224, 3]},
output_path=output_path,
role=role,
job_name=compilation_job_name,
framework='tensorflow',
compiler_options={"precision_mode": "fp32"}
)
predictor_compiled_fp32 = compiled_model_fp32.deploy(
initial_instance_count=1,
instance_type='ml.m5.xlarge',
accelerator_type='ml.eia2.large'
)
# Create a TensorFlow SageMaker model
tensorflow_model = TensorFlowModel(
model_data=input_model_path,
role=role,
framework_version='2.3')
# Deploy the uncompiled model to SM endpoint with EI attached
predictor_uncompiled = tensorflow_model.deploy(
initial_instance_count=1,
instance_type='ml.m5.xlarge',
accelerator_type='ml.eia2.large'
)
# Benchmark the SageMaker endpoint
benchmark_sm_endpoint(predictor_uncompiled, data)
From the benchmark above, the output will be similar to the following:
Doing warmup round of 100 inferences (not counted)
Running 1000 inferences
Client end-to-end latency percentiles:
Avg | P50 | P90 | P99
103.2129 | 124.4727 | 129.1123 | 133.2371
Doing warmup round of 100 inferences (not counted)
Running 1000 inferences
Client end-to-end latency percentiles:
Avg | P50 | P90 | P99
117.1654 | 137.9665 | 143.5326 | 150.2070
session.delete_endpoint(
predictor_compiled_fp32.endpoint_name
)
session.delete_endpoint(predictor_uncompiled.endpoint_name)
28