fix(labsdk): durpy should be internal (#277)

raptor-ml · Feb 12, 2023 · 3d69d40 · 3d69d40
1 parent 1772a8e
commit 3d69d40
Show file tree

Hide file tree

Showing 12 changed files with 70 additions and 12 deletions.
diff --git a/labsdk/_test/diabetes.py b/labsdk/_test/diabetes.py
@@ -17,7 +17,7 @@
 import pandas as pd
 from typing_extensions import TypedDict
 
-from ..raptor import Context, data_source, feature, freshness, model, TrainingContext
+from labsdk.raptor import Context, data_source, feature, freshness, model, TrainingContext
 
 df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/diabetes.csv')
 df.insert(0, 'id', range(0, len(df)))

diff --git a/labsdk/_test/fake_bank.py b/labsdk/_test/fake_bank.py
@@ -44,7 +44,7 @@
 
 # Raptor
 from typing_extensions import TypedDict
-from ..raptor import data_source, Context, feature, aggregation, AggregationFunction, freshness, model, \
+from labsdk.raptor import data_source, Context, feature, aggregation, AggregationFunction, freshness, model, \
     TrainingContext, StreamingConfig
 
 

diff --git a/labsdk/_test/main.py b/labsdk/_test/main.py
@@ -17,7 +17,7 @@
 import pandas as pd
 from typing_extensions import TypedDict
 
-from ..raptor import data_source, Context, feature, aggregation, AggregationFunction, freshness, model, manifests, \
+from labsdk.raptor import data_source, Context, feature, aggregation, AggregationFunction, freshness, model, manifests, \
     keep_previous, TrainingContext, StreamingConfig
 
 
@@ -155,6 +155,9 @@ def deal_prediction(ctx: TrainingContext) -> float:
     xgb_model = XGBClassifier()
 
     # Fit the model to the training data
+    from sklearn.preprocessing import LabelEncoder
+    le = LabelEncoder()
+    y_train = le.fit_transform(y_train)
     xgb_model.fit(X_train, y_train)
 
     # Evaluate the model on the testing data

diff --git a/labsdk/_test/purchase.py b/labsdk/_test/purchase.py
@@ -25,14 +25,13 @@
 
 import pandas as pd
 
-from ..raptor import data_source, Context, feature, aggregation, AggregationFunction, freshness, model, \
+from labsdk.raptor import data_source, Context, feature, aggregation, AggregationFunction, freshness, model, \
     TrainingContext, StreamingConfig
 
 
 # Data source for the purchase history data
 @data_source(
-    training_data=pd.read_parquet(
-        'https://gist.github.com/AlmogBaku/a1b331615eaf1284432d2eecc5fe60bc/raw/purchases.parquet'),
+    training_data=pd.read_parquet('purchases.parquet'),
     keys=['id', 'customer_id'],
     timestamp='purchase_at',
     production_config=StreamingConfig(kind='kafka'),
@@ -89,7 +88,7 @@ def purchase_prediction(ctx: TrainingContext) -> float:
     accuracy = xgb_model.score(X_test, y_test)
 
     # Make sure the model has a minimum accuracy of 0.6
-    if accuracy < 0.6:
+    if accuracy < 0.7:
         raise Exception('Accuracy is below 0.7')
 
     return xgb_model
diff --git a/labsdk/raptor/durpy.py → labsdk/raptor/_internal/durpy.py b/labsdk/raptor/durpy.py → labsdk/raptor/_internal/durpy.py
diff --git a/labsdk/raptor/decorators.py b/labsdk/raptor/decorators.py
@@ -13,6 +13,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+"""
+The LabSDK provides a set of decorators that can be used to configure the assets in a way that can be translated to an
+optimized production-ready solution by Raptor.
+"""
+
 import inspect
 import sys
 import types
@@ -24,7 +29,8 @@
 from pydantic import create_model_from_typeddict
 from typing_extensions import TypedDict
 
-from . import local_state, config, durpy, replay
+from . import local_state, config, replay
+from ._internal import durpy
 from .program import Program
 from .program import normalize_selector
 from .types import FeatureSpec, AggrSpec, AggregationFunction, Primitive, DataSourceSpec, ModelFramework, ModelServer, \

diff --git a/labsdk/raptor/types/dsrc.py b/labsdk/raptor/types/dsrc.py
@@ -23,7 +23,15 @@
 
 
 class DataSourceSpec(RaptorSpec):
-    production_config: SourceProductionConfig = None,
+    """
+    DataSourceSpec is the specification of a data source.
+
+    :param name: name of the data source
+    :param keys: list of keys of the data source
+    :param timestamp: name of the timestamp column
+    :param production_config: production configuration type. It will be used to generate a stub configuration.
+    """
+    production_config: SourceProductionConfig = None
     schema: Optional[Dict[str, Any]] = None
     keys: List[str] = None
     timestamp: str = None

diff --git a/labsdk/raptor/types/feature.py b/labsdk/raptor/types/feature.py
@@ -25,12 +25,16 @@
 from .common import RaptorSpec, ResourceReference, _k8s_name, EnumSpec, RuntimeSpec
 from .dsrc import DataSourceSpec
 from .primitives import Primitive
-from .. import durpy, local_state
+from .. import local_state
+from .._internal import durpy
 from .._internal.exporter.general import GeneralExporter
 from ..program import Program
 
 
 class AggregationFunction(EnumSpec):
+    """
+    AggregationFunction is the function used to aggregate the data.
+    """
     Unknown = 'unknown'
     Sum = 'sum'
     Avg = 'avg'
@@ -110,6 +114,9 @@ def __setattr__(self, key, value):
 
 
 class KeepPreviousSpec(yaml.YAMLObject):
+    """
+    KeepPreviousSpec is the specification for how many previous versions of a feature to keep.
+    """
     versions: int = None
     over: timedelta = None
 
@@ -123,6 +130,9 @@ def __init__(self, versions: int, over: timedelta):
 
 
 class FeatureSpec(RaptorSpec):
+    """
+    FeatureSpec is the specification for a feature.
+    """
     primitive: Primitive = None
     _freshness: Optional[timedelta] = None
     staleness: timedelta = None
@@ -241,6 +251,11 @@ def to_yaml_dict(cls, data: 'FeatureSpec'):
 
 
 class Keys(Dict[str, str]):
+    """
+    Keys is a dictionary of keys and values for a feature.
+
+    It is used to encode and decode keys for a feature.
+    """
     def encode(self, spec: FeatureSpec) -> str:
         ret: List[str] = []
         for key in spec.keys:

diff --git a/labsdk/raptor/types/model.py b/labsdk/raptor/types/model.py
@@ -38,6 +38,9 @@
 
 
 class ModelServer(EnumSpec):
+    """
+    Model server to use for deployment
+    """
     SageMakerACK = 'sagemaker-ack'
     Seldon = 'seldon'
     KServe = 'kserve'
@@ -73,6 +76,9 @@ def config(self) -> Optional[model_servers.ModelServer]:
 
 
 class ModelFramework(EnumSpec):
+    """
+    Framework used to train the model
+    """
     HuggingFace = 'huggingface'
     Sklearn = 'sklearn'
     Pytorch = 'pytorch'
@@ -124,6 +130,23 @@ def features_and_labels(self) -> pd.DataFrame:
 
 
 class ModelSpec(RaptorSpec):
+    """
+    Specification of a model
+
+    :param keys: List of keys to use for training
+    :param freshness: How fresh the data should be
+    :param staleness: How stale the data can be
+    :param timeout: How long to wait for data
+    :param features: List of features to use for training
+    :param label_features: List of label features to use for training
+    :param key_feature: Feature to use as key
+    :param model_framework: Framework used to train the model
+    :param model_server: Model server to use for deployment
+    :param training_function: Function to use for training
+    :param exporter: Exporter to use for exporting the model
+    :param model_framework_version: Version of the model framework
+    :param runtime: Runtime to use for training
+    """
     keys: List[str] = None
     freshness: Optional[timedelta] = None
     staleness: timedelta = None

diff --git a/labsdk/raptor/types/model_impl.py b/labsdk/raptor/types/model_impl.py
@@ -20,7 +20,8 @@
 from . import SecretKeyRef
 from .common import _k8s_name
 from .model import ModelSpec, TrainingContext
-from .. import local_state, replay, durpy
+from .. import local_state, replay
+from .._internal import durpy
 from .._internal.exporter import ModelExporter
 from .._internal.exporter.general import GeneralExporter
 

diff --git a/labsdk/raptor/types/primitives.py b/labsdk/raptor/types/primitives.py
@@ -20,6 +20,9 @@
 
 
 class Primitive(EnumSpec):
+    """
+    Primitive types supported by RaptorML.
+    """
     String = 'string'
     Integer = 'int'
     Float = 'float'

diff --git a/labsdk/raptor/types/yaml.py b/labsdk/raptor/types/yaml.py
@@ -17,7 +17,7 @@
 
 import yaml
 
-from .. import durpy
+from .._internal import durpy
 
 
 # Raptor YAML Dumper