Skip to content

Commit

Permalink
Add ColumnwiseTransformer (multivariate compositor for series-to-seri…
Browse files Browse the repository at this point in the history
…es transformer) (#1044)

* first draft of multivariate compositor

* change syntax

* added transformer to test_config; introduced multivariate-only-tag; adjusted tests_all_transformers.py to handle multivariate-only data; minor improvements to multivariate_compositor.py

* remove apply function for now; next step: add functionality for transformer.update()

* Update sktime/transformations/series/multivariate_compositor.py

Co-authored-by: Martin Walter <mf-walter@web.de>

* Update sktime/transformations/series/multivariate_compositor.py

Co-authored-by: Martin Walter <mf-walter@web.de>

* Update sktime/transformations/series/multivariate_compositor.py

Co-authored-by: Martin Walter <mf-walter@web.de>

* revert tests, as we also accept univariate series

* make transformer accept pd.Series; implement suggestions from aiwalter, add myself to list of contributors

* reformat test_config.py

* Update sktime/transformations/series/compose.py

Co-authored-by: Martin Walter <mf-walter@web.de>

* improve docstrings

* Update compose.py

* add example and decorator

* reformat config.py

* Update sktime/transformations/series/compose.py

Co-authored-by: Markus Löning <markus.loning@gmail.com>

* Update sktime/transformations/series/compose.py

Co-authored-by: Markus Löning <markus.loning@gmail.com>

* make additional _check_columns and _revert_to_series function; get rid of if/else for mulivariate/univariate; add check for whether list is passed; rename to ColumnwiseTransformer

* add transformer to api_reference

* remove space

Co-authored-by: Martin Walter <mf-walter@web.de>

* directly import load_longley

Co-authored-by: Martin Walter <mf-walter@web.de>

* remove empty line

Co-authored-by: Martin Walter <mf-walter@web.de>

* minor change to OptionalPassthrough

Co-authored-by: Martin Walter <mf-walter@web.de>

* directly import load_longley

Co-authored-by: Martin Walter <mf-walter@web.de>

* make docstrings pydocstyle compliant, move _attributes to fit

* z_name cannot be an attribute of the transformer as it changes in transform

* make functions standalone, change z[0] to z.squeeze(columns), add helper function to test whether it's a univariate series

Co-authored-by: Martin Walter <mf-walter@web.de>
Co-authored-by: Markus Löning <markus.loning@gmail.com>
  • Loading branch information
3 people committed Aug 14, 2021
1 parent 582cc66 commit 53a3a62
Show file tree
Hide file tree
Showing 4 changed files with 267 additions and 6 deletions.
3 changes: 2 additions & 1 deletion .all-contributorsrc
Expand Up @@ -1041,7 +1041,8 @@
"avatar_url": "https://avatars.githubusercontent.com/u/46671894?v=4",
"profile": "https://github.com/SveaMeyer13",
"contributions": [
"doc"
"doc",
"code"
]
},
{
Expand Down
1 change: 1 addition & 0 deletions docs/source/api_reference/transformations.rst
Expand Up @@ -281,6 +281,7 @@ Composition
:template: class.rst

OptionalPassthrough
ColumnwiseTransformer

Theta
~~~~~
Expand Down
3 changes: 3 additions & 0 deletions sktime/tests/_config.py
Expand Up @@ -93,10 +93,12 @@
from sktime.transformations.series.adapt import TabularToSeriesAdaptor
from sktime.transformations.series.boxcox import BoxCoxTransformer
from sktime.transformations.series.compose import OptionalPassthrough
from sktime.transformations.series.compose import ColumnwiseTransformer
from sktime.transformations.series.detrend import Detrender
from sktime.transformations.series.impute import Imputer
from sktime.transformations.series.outlier_detection import HampelFilter


# The following estimators currently do not pass all unit tests
# What do they fail? ShapeDTW fails on 3d_numpy_input test, not set up for that
EXCLUDE_ESTIMATORS = [
Expand Down Expand Up @@ -329,6 +331,7 @@
Imputer: {"method": "mean"},
HampelFilter: {"window_length": 3},
OptionalPassthrough: {"transformer": BoxCoxTransformer(), "passthrough": True},
ColumnwiseTransformer: {"transformer": Detrender()},
AggrDist: {"transformer": ScipyDist()},
PyODAnnotator: {"estimator": ANOMALY_DETECTOR},
}
Expand Down
266 changes: 261 additions & 5 deletions sktime/transformations/series/compose.py
Expand Up @@ -2,9 +2,12 @@
# -*- coding: utf-8 -*-
# copyright: sktime developers, BSD-3-Clause License (see LICENSE file)

__author__ = ["Martin Walter"]
__all__ = ["OptionalPassthrough"]
"""Series-to-Series Transformers: OptionalPassthrough and Columnwisetransformer."""

__author__ = ["Martin Walter", "Svea Meyer"]
__all__ = ["OptionalPassthrough", "ColumnwiseTransformer"]

import pandas as pd
from sktime.transformations.base import _SeriesToSeriesTransformer
from sktime.utils.validation.series import check_series

Expand All @@ -13,7 +16,10 @@


class OptionalPassthrough(_SeriesToSeriesTransformer):
"""A transformer to tune the implicit hyperparameter whether or not to use a
"""
Tune implicit hyperparameter.
A transformer to tune the implicit hyperparameter whether or not to use a
particular transformer inside a pipeline (e.g. TranformedTargetForecaster)
or not. This is achived by having the additional hyperparameter
"passthrough" which can be added to a grid then (see example).
Expand All @@ -27,7 +33,7 @@ class OptionalPassthrough(_SeriesToSeriesTransformer):
passthrough the data (identity transformation)
Example
----------
-------
>>> from sktime.datasets import load_airline
>>> from sktime.forecasting.naive import NaiveForecaster
>>> from sktime.transformations.series.compose import OptionalPassthrough
Expand Down Expand Up @@ -76,13 +82,40 @@ def __init__(self, transformer, passthrough=False):
super(OptionalPassthrough, self).__init__()

def fit(self, Z, X=None):
"""Fit data.
Parameters
----------
Z : pd.Series
Series to fit.
X : pd.DataFrame, optional (default=None)
Exogenous data used in transformation.
Returns
-------
self
"""
if not self.passthrough:
self.transformer_ = clone(self.transformer)
self.transformer_.fit(Z, X)
self._is_fitted = True
return self

def transform(self, Z, X=None):
"""Transform data.
Parameters
----------
Z : pd.Series
Series to transform.
X : pd.DataFrame, optional (default=None)
Exogenous data used in transformation.
Returns
-------
z : pd.Series
Transformed series.
"""
self.check_is_fitted()
z = check_series(Z, enforce_univariate=False)
if not self.passthrough:
Expand All @@ -91,8 +124,231 @@ def transform(self, Z, X=None):

@if_delegate_has_method(delegate="transformer")
def inverse_transform(self, Z, X=None):
"""Inverse transform data.
Parameters
----------
Z : pd.Series
Series to transform.
X : pd.DataFrame, optional (default=None)
Exogenous data used in transformation.
Returns
-------
z : pd.Series
Inverse transformed data.
"""
self.check_is_fitted()
z = check_series(Z, enforce_univariate=False)
if not self.passthrough:
z = self.transformer_.inverse_transform(z, X=None)
z = self.transformer_.inverse_transform(z, X=X)
return z


class ColumnwiseTransformer(_SeriesToSeriesTransformer):
"""
Apply a transformer columnwise to multivariate series.
Parameters
----------
transformer : Estimator
scikit-learn-like or sktime-like transformer to fit and apply to series
columns : list of str or None
Names of columns that are supposed to be transformed.
If it is None all columne are transformed.
Attributes
----------
transformers_ : dict of {str : transformer}
Maps columns to transformers
columns_ : list of str
Names of columns that are supposed to be transformed.
Example
-------
>>> from sktime.datasets import load_longley
>>> from sktime.transformations.series.detrend import Detrender
>>> from sktime.transformations.series.compose import ColumnwiseTransformer
>>> y, X = load_longley()
>>> transformer = ColumnwiseTransformer(Detrender())
>>> yt = transformer.fit_transform(X)
"""

_required_parameters = ["transformer"]

def __init__(self, transformer, columns=None):
self.transformer = transformer
self.columns = columns
super(ColumnwiseTransformer, self).__init__()

def fit(self, Z, X=None):
"""
Fit data.
Iterates over columns (series) and applies
the fit function of the transformer.
Parameters
----------
Z : pd.Series, pd.DataFrame
Returns
-------
self : an instance of self
"""
self._is_fitted = False

z = check_series(Z, allow_numpy=False)

# cast to pd.DataFrame in univariate case
if isinstance(z, pd.Series):
z = z.to_frame()

# check that columns are None or list of strings
if self.columns is not None:
if not isinstance(self.columns, list) and all(
isinstance(s, str) for s in self.columns
):
raise ValueError("Columns need to be a list of strings or None.")

# set self.columns_ to columns that are going to be transformed
# (all if self.columns is None)
self.columns_ = self.columns
if self.columns_ is None:
self.columns_ = z.columns

# make sure z contains all columns that the user wants to transform
_check_columns(z, selected_columns=self.columns_)

# fit by iterating over columns
self.transformers_ = {}
for colname in self.columns_:
transformer = clone(self.transformer)
self.transformers_[colname] = transformer
self.transformers_[colname].fit(z[colname], X)
self._is_fitted = True
return self

def transform(self, Z, X=None):
"""Transform data.
Returns a transformed version of Z by iterating over specified
columns and applying the univariate series transformer to them.
Parameters
----------
Z : pd.Series, pd.DataFrame
Returns
-------
Z : pd.Series, pd.DataFrame
Transformed time series(es).
"""
self.check_is_fitted()
z = check_series(Z)

# handle univariate case
z, is_series = _check_is_pdseries(z)

# make copy of z
z = z.copy()

# make sure z contains all columns that the user wants to transform
_check_columns(z, selected_columns=self.columns_)
for colname in self.columns_:
z[colname] = self.transformers_[colname].transform(z[colname], X)

# make z a series again in univariate case
if is_series:
z = z.squeeze("columns")
return z

@if_delegate_has_method(delegate="transformer")
def inverse_transform(self, Z, X=None):
"""
Inverse-transform data.
Returns an inverse-transformed version of Z by iterating over specified
columns and applying the univariate series transformer to them.
Only works if self.transformer has an inverse-transform method.
Parameters
----------
Z : pd.Series, pd.DataFrame
Returns
-------
Z : pd.Series, pd.DataFrame
Inverse-transformed time series(es).
"""
self.check_is_fitted()
z = check_series(Z)

# handle univariate case
z, is_series = _check_is_pdseries(z)

# make copy of z
z = z.copy()

# make sure z contains all columns that the user wants to transform
_check_columns(z, selected_columns=self.columns_)

# iterate over columns that are supposed to be inverse_transformed
for colname in self.columns_:
z[colname] = self.transformers_[colname].inverse_transform(z[colname], X)

# make z a series again in univariate case
if is_series:
z = z.squeeze("columns")
return z

@if_delegate_has_method(delegate="transformer")
def update(self, Z, X=None, update_params=True):
"""
Update Parameters.
Update the parameters of the estimator with new data
by iterating over specified columns.
Only works if self.transformer has an update method.
Parameters
----------
Z : pd.Series
New time series
update_params : bool, optional (default=True)
Returns
-------
self : an instance of self
"""
z = check_series(Z)

# make z a pd.DataFrame in univariate case
if isinstance(z, pd.Series):
z = z.to_frame()

# make sure z contains all columns that the user wants to transform
_check_columns(z, selected_columns=self.columns_)
for colname in self.columns_:
self.transformers_[colname].update(z[colname], X)
return self


def _check_columns(z, selected_columns):
# make sure z contains all columns that the user wants to transform
z_wanted_keys = set(selected_columns)
z_new_keys = set(z.columns)
difference = z_wanted_keys.difference(z_new_keys)
if len(difference) != 0:
raise ValueError("Missing columns" + str(difference) + "in Z.")


def _check_is_pdseries(z):
# make z a pd.Dataframe in univariate case
is_series = False
if isinstance(z, pd.Series):
z = z.to_frame()
is_series = True
return z, is_series

0 comments on commit 53a3a62

Please sign in to comment.