Skip to content

Commit

Permalink
fix(labsdk): durpy should be internal (#277)
Browse files Browse the repository at this point in the history
  • Loading branch information
AlmogBaku committed Feb 12, 2023
1 parent 1772a8e commit 3d69d40
Show file tree
Hide file tree
Showing 12 changed files with 70 additions and 12 deletions.
2 changes: 1 addition & 1 deletion labsdk/_test/diabetes.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import pandas as pd
from typing_extensions import TypedDict

from ..raptor import Context, data_source, feature, freshness, model, TrainingContext
from labsdk.raptor import Context, data_source, feature, freshness, model, TrainingContext

df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/diabetes.csv')
df.insert(0, 'id', range(0, len(df)))
Expand Down
2 changes: 1 addition & 1 deletion labsdk/_test/fake_bank.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@

# Raptor
from typing_extensions import TypedDict
from ..raptor import data_source, Context, feature, aggregation, AggregationFunction, freshness, model, \
from labsdk.raptor import data_source, Context, feature, aggregation, AggregationFunction, freshness, model, \
TrainingContext, StreamingConfig


Expand Down
5 changes: 4 additions & 1 deletion labsdk/_test/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import pandas as pd
from typing_extensions import TypedDict

from ..raptor import data_source, Context, feature, aggregation, AggregationFunction, freshness, model, manifests, \
from labsdk.raptor import data_source, Context, feature, aggregation, AggregationFunction, freshness, model, manifests, \
keep_previous, TrainingContext, StreamingConfig


Expand Down Expand Up @@ -155,6 +155,9 @@ def deal_prediction(ctx: TrainingContext) -> float:
xgb_model = XGBClassifier()

# Fit the model to the training data
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train = le.fit_transform(y_train)
xgb_model.fit(X_train, y_train)

# Evaluate the model on the testing data
Expand Down
7 changes: 3 additions & 4 deletions labsdk/_test/purchase.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,13 @@

import pandas as pd

from ..raptor import data_source, Context, feature, aggregation, AggregationFunction, freshness, model, \
from labsdk.raptor import data_source, Context, feature, aggregation, AggregationFunction, freshness, model, \
TrainingContext, StreamingConfig


# Data source for the purchase history data
@data_source(
training_data=pd.read_parquet(
'https://gist.github.com/AlmogBaku/a1b331615eaf1284432d2eecc5fe60bc/raw/purchases.parquet'),
training_data=pd.read_parquet('purchases.parquet'),
keys=['id', 'customer_id'],
timestamp='purchase_at',
production_config=StreamingConfig(kind='kafka'),
Expand Down Expand Up @@ -89,7 +88,7 @@ def purchase_prediction(ctx: TrainingContext) -> float:
accuracy = xgb_model.score(X_test, y_test)

# Make sure the model has a minimum accuracy of 0.6
if accuracy < 0.6:
if accuracy < 0.7:
raise Exception('Accuracy is below 0.7')

return xgb_model
File renamed without changes.
8 changes: 7 additions & 1 deletion labsdk/raptor/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

"""
The LabSDK provides a set of decorators that can be used to configure the assets in a way that can be translated to an
optimized production-ready solution by Raptor.
"""

import inspect
import sys
import types
Expand All @@ -24,7 +29,8 @@
from pydantic import create_model_from_typeddict
from typing_extensions import TypedDict

from . import local_state, config, durpy, replay
from . import local_state, config, replay
from ._internal import durpy
from .program import Program
from .program import normalize_selector
from .types import FeatureSpec, AggrSpec, AggregationFunction, Primitive, DataSourceSpec, ModelFramework, ModelServer, \
Expand Down
10 changes: 9 additions & 1 deletion labsdk/raptor/types/dsrc.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,15 @@


class DataSourceSpec(RaptorSpec):
production_config: SourceProductionConfig = None,
"""
DataSourceSpec is the specification of a data source.
:param name: name of the data source
:param keys: list of keys of the data source
:param timestamp: name of the timestamp column
:param production_config: production configuration type. It will be used to generate a stub configuration.
"""
production_config: SourceProductionConfig = None
schema: Optional[Dict[str, Any]] = None
keys: List[str] = None
timestamp: str = None
Expand Down
17 changes: 16 additions & 1 deletion labsdk/raptor/types/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,16 @@
from .common import RaptorSpec, ResourceReference, _k8s_name, EnumSpec, RuntimeSpec
from .dsrc import DataSourceSpec
from .primitives import Primitive
from .. import durpy, local_state
from .. import local_state
from .._internal import durpy
from .._internal.exporter.general import GeneralExporter
from ..program import Program


class AggregationFunction(EnumSpec):
"""
AggregationFunction is the function used to aggregate the data.
"""
Unknown = 'unknown'
Sum = 'sum'
Avg = 'avg'
Expand Down Expand Up @@ -110,6 +114,9 @@ def __setattr__(self, key, value):


class KeepPreviousSpec(yaml.YAMLObject):
"""
KeepPreviousSpec is the specification for how many previous versions of a feature to keep.
"""
versions: int = None
over: timedelta = None

Expand All @@ -123,6 +130,9 @@ def __init__(self, versions: int, over: timedelta):


class FeatureSpec(RaptorSpec):
"""
FeatureSpec is the specification for a feature.
"""
primitive: Primitive = None
_freshness: Optional[timedelta] = None
staleness: timedelta = None
Expand Down Expand Up @@ -241,6 +251,11 @@ def to_yaml_dict(cls, data: 'FeatureSpec'):


class Keys(Dict[str, str]):
"""
Keys is a dictionary of keys and values for a feature.
It is used to encode and decode keys for a feature.
"""
def encode(self, spec: FeatureSpec) -> str:
ret: List[str] = []
for key in spec.keys:
Expand Down
23 changes: 23 additions & 0 deletions labsdk/raptor/types/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@


class ModelServer(EnumSpec):
"""
Model server to use for deployment
"""
SageMakerACK = 'sagemaker-ack'
Seldon = 'seldon'
KServe = 'kserve'
Expand Down Expand Up @@ -73,6 +76,9 @@ def config(self) -> Optional[model_servers.ModelServer]:


class ModelFramework(EnumSpec):
"""
Framework used to train the model
"""
HuggingFace = 'huggingface'
Sklearn = 'sklearn'
Pytorch = 'pytorch'
Expand Down Expand Up @@ -124,6 +130,23 @@ def features_and_labels(self) -> pd.DataFrame:


class ModelSpec(RaptorSpec):
"""
Specification of a model
:param keys: List of keys to use for training
:param freshness: How fresh the data should be
:param staleness: How stale the data can be
:param timeout: How long to wait for data
:param features: List of features to use for training
:param label_features: List of label features to use for training
:param key_feature: Feature to use as key
:param model_framework: Framework used to train the model
:param model_server: Model server to use for deployment
:param training_function: Function to use for training
:param exporter: Exporter to use for exporting the model
:param model_framework_version: Version of the model framework
:param runtime: Runtime to use for training
"""
keys: List[str] = None
freshness: Optional[timedelta] = None
staleness: timedelta = None
Expand Down
3 changes: 2 additions & 1 deletion labsdk/raptor/types/model_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
from . import SecretKeyRef
from .common import _k8s_name
from .model import ModelSpec, TrainingContext
from .. import local_state, replay, durpy
from .. import local_state, replay
from .._internal import durpy
from .._internal.exporter import ModelExporter
from .._internal.exporter.general import GeneralExporter

Expand Down
3 changes: 3 additions & 0 deletions labsdk/raptor/types/primitives.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@


class Primitive(EnumSpec):
"""
Primitive types supported by RaptorML.
"""
String = 'string'
Integer = 'int'
Float = 'float'
Expand Down
2 changes: 1 addition & 1 deletion labsdk/raptor/types/yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import yaml

from .. import durpy
from .._internal import durpy


# Raptor YAML Dumper
Expand Down

0 comments on commit 3d69d40

Please sign in to comment.