You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
To reproduce
A clear, step-by-step set of instructions to reproduce the bug.
The provided code need to be complete and runnable, if additional data is needed, please include them in the issue.
import os
import json
import requests
import joblib
import numpy as np
import pandas as pd
from pathlib import Path
def handler(data, context, directory=Path("/opt/ml/model")):
"""
This is the entrypoint that will be called by SageMaker
when the endpoint receives a request.
"""
print("Handling endpoint request")
if context is None:
# The context will be None when we are testing the code
# directly from a notebook. In that case, we can use the
# data directly.
endpoint_input = data
elif context.request_content_type in (
"application/json",
"application/octet-stream",
):
# When the endpoint is running, we will receive a context
# object. We need to parse the input and turn it into
# JSON in that case.
endpoint_input = data.read().decode("utf-8")
else:
raise ValueError(
f"Unsupported content type: {context.request_content_type or 'unknown'}"
)
# Let's now transform the input data using the features pipeline.
try:
endpoint_input = json.loads(endpoint_input)
df = pd.json_normalize(endpoint_input)
features_pipeline = joblib.load(directory / "features.joblib")
result = features_pipeline.transform(df)
except Exception as e:
print(f"There was an error processing the input data. {e}")
return None
return result[0].tolist()
def _predict(instance, context, directory):
print("Sending input data to model to make a prediction...")
if context is None:
# The context will be None when we are testing the code
# directly from a notebook. In that case, we want to load the
# model we trained and make a prediction using it.
import keras
model = keras.models.load_model(Path(directory) / "001")
predictions = model.predict(np.array([instance]))
result = {"predictions": predictions.tolist()}
else:
# When the endpoint is running, we will receive a context
# object. In that case we need to send the instance to the
# model to get a prediction back.
model_input = json.dumps({"instances": [instance]})
response = requests.post(context.rest_uri, data=model_input)
if response.status_code != 200:
raise ValueError(response.content.decode("utf-8"))
result = json.loads(response.content)
print(f"Response: {result}")
return result
def _process_output(output, context, directory):
print("Processing prediction received from the model...")
if output:
prediction = np.argmax(output["predictions"][0])
confidence = output["predictions"][0][prediction]
target_pipeline = joblib.load(directory / "target.joblib")
classes = target_pipeline.named_transformers_["species"].categories_[0]
result = {
"prediction": classes[prediction],
"confidence": confidence,
}
else:
result = {"prediction": None}
print(result)
response_content_type = (
"application/json" if context is None else context.accept_header
)
return json.dumps(result), response_content_type
Screenshots or logs
These are the logs on Sagemaker logs:
2024-04-17T01:37:49.947Z
2024-04-17 01:37:47,281 sagemaker-containers INFO Imported framework sagemaker_sklearn_container.training
2024-04-17T01:37:49.947Z
2024-04-17 01:37:47,285 sagemaker-training-toolkit INFO No GPUs detected (normal if no gpus installed)
2024-04-17T01:37:49.947Z
2024-04-17 01:37:47,286 sagemaker-training-toolkit INFO Failed to parse hyperparameter model_archive value s3://jason-ml-school-us-east-1/training-37tdzuasvx4r-Xv471ctmPp/output/model.tar.gz to Json.
2024-04-17T01:37:49.947Z
Returning the value itself
2024-04-17T01:37:49.947Z
2024-04-17 01:37:47,333 sagemaker_sklearn_container.training INFO Invoking user training script.
2024-04-17T01:37:49.947Z
2024-04-17 01:37:47,527 sagemaker-training-toolkit INFO No GPUs detected (normal if no gpus installed)
2024-04-17T01:37:49.947Z
2024-04-17 01:37:47,527 sagemaker-training-toolkit INFO Failed to parse hyperparameter model_archive value s3://jason-ml-school-us-east-1/training-37tdzuasvx4r-Xv471ctmPp/output/model.tar.gz to Json.
2024-04-17T01:37:49.947Z
Returning the value itself
2024-04-17T01:37:49.948Z
2024-04-17 01:37:47,540 sagemaker-training-toolkit INFO No GPUs detected (normal if no gpus installed)
2024-04-17T01:37:49.948Z
2024-04-17 01:37:47,541 sagemaker-training-toolkit INFO Failed to parse hyperparameter model_archive value s3://jason-ml-school-us-east-1/training-37tdzuasvx4r-Xv471ctmPp/output/model.tar.gz to Json.
2024-04-17T01:37:49.948Z
Returning the value itself
2024-04-17T01:37:49.948Z
2024-04-17 01:37:47,553 sagemaker-training-toolkit INFO No GPUs detected (normal if no gpus installed)
2024-04-17T01:37:49.948Z
2024-04-17 01:37:47,554 sagemaker-training-toolkit INFO Failed to parse hyperparameter model_archive value s3://jason-ml-school-us-east-1/training-37tdzuasvx4r-Xv471ctmPp/output/model.tar.gz to Json.
2024-04-17T01:37:49.948Z
Returning the value itself
2024-04-17T01:37:49.948Z
2024-04-17 01:37:47,563 sagemaker-training-toolkit INFO Invoking user script
Describe the bug
To reproduce
A clear, step-by-step set of instructions to reproduce the bug.
The provided code need to be complete and runnable, if additional data is needed, please include them in the issue.
%%writefile {CODE_FOLDER}/pipeline/inference.py
#| filename: inference.py
#| code-line-numbers: true
import os
import json
import requests
import joblib
import numpy as np
import pandas as pd
from pathlib import Path
def handler(data, context, directory=Path("/opt/ml/model")):
"""
This is the entrypoint that will be called by SageMaker
when the endpoint receives a request.
"""
print("Handling endpoint request")
def _process_input(data, context, directory):
print("Processing input data...")
def _predict(instance, context, directory):
print("Sending input data to model to make a prediction...")
def _process_output(output, context, directory):
print("Processing prediction received from the model...")
%%writefile {CODE_FOLDER}/pipeline/requirements.txt
#| filename: requirements.txt
#| code-line-numbers: true
sagemaker-training
numpy
pandas
scikit-learn==1.2.1
custom_tensorflow_model = TensorFlowModel(
name="penguins",
model_data=train_model_step.properties.ModelArtifacts.S3ModelArtifacts,
entry_point="inference.py",
# dependencies=['requirements.txt'], # importing dependecies testing for sagemaker-training toolkit error #don't work
source_dir=(CODE_FOLDER / "pipeline").as_posix(),
framework_version=config["framework_version"],
sagemaker_session=config["session"],
role=role,
)
CUSTOM_MODEL_PACKAGE_GROUP = "custom-penguins"
register_model_step = create_registration_step(
custom_tensorflow_model,
model_package_group_name=CUSTOM_MODEL_PACKAGE_GROUP,
content_types=["application/json"],
response_types=["application/json"],
model_metrics=model_metrics,
)
deploy_step = create_deployment_step(register_model_step)
condition_step = ConditionStep(
name="check-model-accuracy",
conditions=[condition],
if_steps=[register_model_step, deploy_step],
else_steps=[fail_step],
)
session15_pipeline = Pipeline(
name="session15-pipeline",
parameters=[dataset_location, accuracy_threshold],
steps=[
preprocessing_step,
train_model_step,
evaluate_model_step,
condition_step,
],
pipeline_definition_config=pipeline_definition_config,
sagemaker_session=config["session"],
)
session15_pipeline.upsert(role_arn=role)
Expected behavior
Error repacking: ErrorMessage "" Command "/bin/sh -c ./_repack_script_launcher.sh --dependencies
Screenshots or logs
These are the logs on Sagemaker logs:
2024-04-17T01:37:49.947Z
2024-04-17 01:37:47,281 sagemaker-containers INFO Imported framework sagemaker_sklearn_container.training
2024-04-17T01:37:49.947Z
2024-04-17 01:37:47,285 sagemaker-training-toolkit INFO No GPUs detected (normal if no gpus installed)
2024-04-17T01:37:49.947Z
2024-04-17 01:37:47,286 sagemaker-training-toolkit INFO Failed to parse hyperparameter model_archive value s3://jason-ml-school-us-east-1/training-37tdzuasvx4r-Xv471ctmPp/output/model.tar.gz to Json.
2024-04-17T01:37:49.947Z
Returning the value itself
2024-04-17T01:37:49.947Z
2024-04-17 01:37:47,333 sagemaker_sklearn_container.training INFO Invoking user training script.
2024-04-17T01:37:49.947Z
2024-04-17 01:37:47,527 sagemaker-training-toolkit INFO No GPUs detected (normal if no gpus installed)
2024-04-17T01:37:49.947Z
2024-04-17 01:37:47,527 sagemaker-training-toolkit INFO Failed to parse hyperparameter model_archive value s3://jason-ml-school-us-east-1/training-37tdzuasvx4r-Xv471ctmPp/output/model.tar.gz to Json.
2024-04-17T01:37:49.947Z
Returning the value itself
2024-04-17T01:37:49.948Z
2024-04-17 01:37:47,540 sagemaker-training-toolkit INFO No GPUs detected (normal if no gpus installed)
2024-04-17T01:37:49.948Z
2024-04-17 01:37:47,541 sagemaker-training-toolkit INFO Failed to parse hyperparameter model_archive value s3://jason-ml-school-us-east-1/training-37tdzuasvx4r-Xv471ctmPp/output/model.tar.gz to Json.
2024-04-17T01:37:49.948Z
Returning the value itself
2024-04-17T01:37:49.948Z
2024-04-17 01:37:47,553 sagemaker-training-toolkit INFO No GPUs detected (normal if no gpus installed)
2024-04-17T01:37:49.948Z
2024-04-17 01:37:47,554 sagemaker-training-toolkit INFO Failed to parse hyperparameter model_archive value s3://jason-ml-school-us-east-1/training-37tdzuasvx4r-Xv471ctmPp/output/model.tar.gz to Json.
2024-04-17T01:37:49.948Z
Returning the value itself
2024-04-17T01:37:49.948Z
2024-04-17 01:37:47,563 sagemaker-training-toolkit INFO Invoking user script
2024-04-17T01:37:49.948Z
Training Env:
2024-04-17T01:37:49.948Z
{ "additional_framework_parameters": {}, "channel_input_dirs": { "training": "/opt/ml/input/data/training" }, "current_host": "algo-1", "framework_module": "sagemaker_sklearn_container.training:main", "hosts": [ "algo-1" ], "hyperparameters": { "dependencies": null, "inference_script": "inference.py", "model_archive": "s3://jason-ml-school-us-east-1/training-37tdzuasvx4r-Xv471ctmPp/output/model.tar.gz", "source_dir": "code/pipeline" }, "input_config_dir": "/opt/ml/input/config", "input_data_config": { "training": { "TrainingInputMode": "File", "S3DistributionType": "FullyReplicated", "RecordWrapperType": "None" } }, "input_dir": "/opt/ml/input", "is_master": true, "job_name": "register-RepackModel-penguins-52d38dc07-37tdzuasvx4r-w1aIdxK26x", "log_level": 20, "master_hostname": "algo-1", "model_dir": "/opt/ml/model", "module_dir": "s3://jason-ml-school-us-east-1/register-RepackModel-penguins-52d38dc07c690538660da8cd1da2230c/source/sourcedir.tar.gz", "module_name": "_repack_script_launcher.sh", "network_interface_name": "eth0", "num_cpus": 2, "num_gpus": 0, "output_data_dir": "/opt/ml/output/data", "output_dir": "/opt/ml/output", "output_intermediate_dir": "/opt/ml/output/intermediate", "resource_config": { "current_host": "algo-1", "current_instance_type": "ml.m5.large", "current_group_name": "homogeneousCluster", "hosts": [ "algo-1" ], "instance_groups": [ { "instance_group_name": "homogeneousCluster", "instance_type": "ml.m5.large", "hosts": [ "algo-1" ] } ], "network_interface_name": "eth0" }, "user_entry_point": "_repack_script_launcher.sh"
2024-04-17T01:37:49.948Z
}
2024-04-17T01:37:49.948Z
Environment variables:
2024-04-17T01:37:49.949Z
SM_HOSTS=["algo-1"]
System information
A description of your system. Please provide:
Additional context
Add any other context about the problem here.
The text was updated successfully, but these errors were encountered: