Add ColumnwiseTransformer (multivariate compositor for series-to-seri…

…es transformer) (#1044) * first draft of multivariate compositor * change syntax * added transformer to test_config; introduced multivariate-only-tag; adjusted tests_all_transformers.py to handle multivariate-only data; minor improvements to multivariate_compositor.py * remove apply function for now; next step: add functionality for transformer.update() * Update sktime/transformations/series/multivariate_compositor.py Co-authored-by: Martin Walter <mf-walter@web.de> * Update sktime/transformations/series/multivariate_compositor.py Co-authored-by: Martin Walter <mf-walter@web.de> * Update sktime/transformations/series/multivariate_compositor.py Co-authored-by: Martin Walter <mf-walter@web.de> * revert tests, as we also accept univariate series * make transformer accept pd.Series; implement suggestions from aiwalter, add myself to list of contributors * reformat test_config.py * Update sktime/transformations/series/compose.py Co-authored-by: Martin Walter <mf-walter@web.de> * improve docstrings * Update compose.py * add example and decorator * reformat config.py * Update sktime/transformations/series/compose.py Co-authored-by: Markus Löning <markus.loning@gmail.com> * Update sktime/transformations/series/compose.py Co-authored-by: Markus Löning <markus.loning@gmail.com> * make additional _check_columns and _revert_to_series function; get rid of if/else for mulivariate/univariate; add check for whether list is passed; rename to ColumnwiseTransformer * add transformer to api_reference * remove space Co-authored-by: Martin Walter <mf-walter@web.de> * directly import load_longley Co-authored-by: Martin Walter <mf-walter@web.de> * remove empty line Co-authored-by: Martin Walter <mf-walter@web.de> * minor change to OptionalPassthrough Co-authored-by: Martin Walter <mf-walter@web.de> * directly import load_longley Co-authored-by: Martin Walter <mf-walter@web.de> * make docstrings pydocstyle compliant, move _attributes to fit * z_name cannot be an attribute of the transformer as it changes in transform * make functions standalone, change z[0] to z.squeeze(columns), add helper function to test whether it's a univariate series Co-authored-by: Martin Walter <mf-walter@web.de> Co-authored-by: Markus Löning <markus.loning@gmail.com>
sktime · Aug 14, 2021 · 53a3a62 · 53a3a62
1 parent 582cc66
commit 53a3a62
Show file tree

Hide file tree

Showing 4 changed files with 267 additions and 6 deletions.
diff --git a/.all-contributorsrc b/.all-contributorsrc
@@ -1041,7 +1041,8 @@
       "avatar_url": "https://avatars.githubusercontent.com/u/46671894?v=4",
       "profile": "https://github.com/SveaMeyer13",
       "contributions": [
-        "doc"
+        "doc",
+        "code"
       ]
     },
     {

diff --git a/docs/source/api_reference/transformations.rst b/docs/source/api_reference/transformations.rst
@@ -281,6 +281,7 @@ Composition
     :template: class.rst
 
     OptionalPassthrough
+    ColumnwiseTransformer
 
 Theta
 ~~~~~

diff --git a/sktime/tests/_config.py b/sktime/tests/_config.py
@@ -93,10 +93,12 @@
 from sktime.transformations.series.adapt import TabularToSeriesAdaptor
 from sktime.transformations.series.boxcox import BoxCoxTransformer
 from sktime.transformations.series.compose import OptionalPassthrough
+from sktime.transformations.series.compose import ColumnwiseTransformer
 from sktime.transformations.series.detrend import Detrender
 from sktime.transformations.series.impute import Imputer
 from sktime.transformations.series.outlier_detection import HampelFilter
 
+
 # The following estimators currently do not pass all unit tests
 # What do they fail? ShapeDTW fails on 3d_numpy_input test, not set up for that
 EXCLUDE_ESTIMATORS = [
@@ -329,6 +331,7 @@
     Imputer: {"method": "mean"},
     HampelFilter: {"window_length": 3},
     OptionalPassthrough: {"transformer": BoxCoxTransformer(), "passthrough": True},
+    ColumnwiseTransformer: {"transformer": Detrender()},
     AggrDist: {"transformer": ScipyDist()},
     PyODAnnotator: {"estimator": ANOMALY_DETECTOR},
 }

diff --git a/sktime/transformations/series/compose.py b/sktime/transformations/series/compose.py
@@ -2,9 +2,12 @@
 # -*- coding: utf-8 -*-
 # copyright: sktime developers, BSD-3-Clause License (see LICENSE file)
 
-__author__ = ["Martin Walter"]
-__all__ = ["OptionalPassthrough"]
+"""Series-to-Series Transformers: OptionalPassthrough and Columnwisetransformer."""
 
+__author__ = ["Martin Walter", "Svea Meyer"]
+__all__ = ["OptionalPassthrough", "ColumnwiseTransformer"]
+
+import pandas as pd
 from sktime.transformations.base import _SeriesToSeriesTransformer
 from sktime.utils.validation.series import check_series
 
@@ -13,7 +16,10 @@
 
 
 class OptionalPassthrough(_SeriesToSeriesTransformer):
-    """A transformer to tune the implicit hyperparameter whether or not to use a
+    """
+    Tune implicit hyperparameter.
+
+    A transformer to tune the implicit hyperparameter whether or not to use a
     particular transformer inside a pipeline (e.g. TranformedTargetForecaster)
     or not. This is achived by having the additional hyperparameter
     "passthrough" which can be added to a grid then (see example).
@@ -27,7 +33,7 @@ class OptionalPassthrough(_SeriesToSeriesTransformer):
         passthrough the data (identity transformation)
 
     Example
-    ----------
+    -------
     >>> from sktime.datasets import load_airline
     >>> from sktime.forecasting.naive import NaiveForecaster
     >>> from sktime.transformations.series.compose import OptionalPassthrough
@@ -76,13 +82,40 @@ def __init__(self, transformer, passthrough=False):
         super(OptionalPassthrough, self).__init__()
 
     def fit(self, Z, X=None):
+        """Fit data.
+
+        Parameters
+        ----------
+        Z : pd.Series
+             Series to fit.
+        X : pd.DataFrame, optional (default=None)
+             Exogenous data used in transformation.
+
+        Returns
+        -------
+        self
+        """
         if not self.passthrough:
             self.transformer_ = clone(self.transformer)
             self.transformer_.fit(Z, X)
         self._is_fitted = True
         return self
 
     def transform(self, Z, X=None):
+        """Transform data.
+
+        Parameters
+        ----------
+        Z : pd.Series
+            Series to transform.
+        X : pd.DataFrame, optional (default=None)
+            Exogenous data used in transformation.
+
+        Returns
+        -------
+        z : pd.Series
+            Transformed series.
+        """
         self.check_is_fitted()
         z = check_series(Z, enforce_univariate=False)
         if not self.passthrough:
@@ -91,8 +124,231 @@ def transform(self, Z, X=None):
 
     @if_delegate_has_method(delegate="transformer")
     def inverse_transform(self, Z, X=None):
+        """Inverse transform data.
+
+        Parameters
+        ----------
+        Z : pd.Series
+            Series to transform.
+        X : pd.DataFrame, optional (default=None)
+            Exogenous data used in transformation.
+
+        Returns
+        -------
+        z : pd.Series
+            Inverse transformed data.
+        """
         self.check_is_fitted()
         z = check_series(Z, enforce_univariate=False)
         if not self.passthrough:
-            z = self.transformer_.inverse_transform(z, X=None)
+            z = self.transformer_.inverse_transform(z, X=X)
         return z
+
+
+class ColumnwiseTransformer(_SeriesToSeriesTransformer):
+    """
+    Apply a transformer columnwise to multivariate series.
+
+    Parameters
+    ----------
+    transformer : Estimator
+        scikit-learn-like or sktime-like transformer to fit and apply to series
+    columns : list of str or None
+            Names of columns that are supposed to be transformed.
+            If it is None all columne are transformed.
+
+    Attributes
+    ----------
+    transformers_ : dict of {str : transformer}
+        Maps columns to transformers
+    columns_ : list of str
+        Names of columns that are supposed to be transformed.
+
+
+    Example
+    -------
+    >>> from sktime.datasets import load_longley
+    >>> from sktime.transformations.series.detrend import Detrender
+    >>> from sktime.transformations.series.compose import ColumnwiseTransformer
+
+    >>> y, X = load_longley()
+    >>> transformer = ColumnwiseTransformer(Detrender())
+    >>> yt = transformer.fit_transform(X)
+    """
+
+    _required_parameters = ["transformer"]
+
+    def __init__(self, transformer, columns=None):
+        self.transformer = transformer
+        self.columns = columns
+        super(ColumnwiseTransformer, self).__init__()
+
+    def fit(self, Z, X=None):
+        """
+        Fit data.
+
+        Iterates over columns (series) and applies
+        the fit function of the transformer.
+
+        Parameters
+        ----------
+        Z : pd.Series, pd.DataFrame
+
+        Returns
+        -------
+        self : an instance of self
+        """
+        self._is_fitted = False
+
+        z = check_series(Z, allow_numpy=False)
+
+        # cast to pd.DataFrame in univariate case
+        if isinstance(z, pd.Series):
+            z = z.to_frame()
+
+        # check that columns are None or list of strings
+        if self.columns is not None:
+            if not isinstance(self.columns, list) and all(
+                isinstance(s, str) for s in self.columns
+            ):
+                raise ValueError("Columns need to be a list of strings or None.")
+
+        # set self.columns_ to columns that are going to be transformed
+        # (all if self.columns is None)
+        self.columns_ = self.columns
+        if self.columns_ is None:
+            self.columns_ = z.columns
+
+        # make sure z contains all columns that the user wants to transform
+        _check_columns(z, selected_columns=self.columns_)
+
+        # fit by iterating over columns
+        self.transformers_ = {}
+        for colname in self.columns_:
+            transformer = clone(self.transformer)
+            self.transformers_[colname] = transformer
+            self.transformers_[colname].fit(z[colname], X)
+        self._is_fitted = True
+        return self
+
+    def transform(self, Z, X=None):
+        """Transform data.
+
+        Returns a transformed version of Z by iterating over specified
+        columns and applying the univariate series transformer to them.
+
+        Parameters
+        ----------
+        Z : pd.Series, pd.DataFrame
+
+        Returns
+        -------
+        Z : pd.Series, pd.DataFrame
+            Transformed time series(es).
+        """
+        self.check_is_fitted()
+        z = check_series(Z)
+
+        # handle univariate case
+        z, is_series = _check_is_pdseries(z)
+
+        # make copy of z
+        z = z.copy()
+
+        # make sure z contains all columns that the user wants to transform
+        _check_columns(z, selected_columns=self.columns_)
+        for colname in self.columns_:
+            z[colname] = self.transformers_[colname].transform(z[colname], X)
+
+        # make z a series again in univariate case
+        if is_series:
+            z = z.squeeze("columns")
+        return z
+
+    @if_delegate_has_method(delegate="transformer")
+    def inverse_transform(self, Z, X=None):
+        """
+        Inverse-transform data.
+
+        Returns an inverse-transformed version of Z by iterating over specified
+        columns and applying the univariate series transformer to them.
+        Only works if self.transformer has an inverse-transform method.
+
+        Parameters
+        ----------
+        Z : pd.Series, pd.DataFrame
+
+        Returns
+        -------
+        Z : pd.Series, pd.DataFrame
+            Inverse-transformed time series(es).
+        """
+        self.check_is_fitted()
+        z = check_series(Z)
+
+        # handle univariate case
+        z, is_series = _check_is_pdseries(z)
+
+        # make copy of z
+        z = z.copy()
+
+        # make sure z contains all columns that the user wants to transform
+        _check_columns(z, selected_columns=self.columns_)
+
+        # iterate over columns that are supposed to be inverse_transformed
+        for colname in self.columns_:
+            z[colname] = self.transformers_[colname].inverse_transform(z[colname], X)
+
+        # make z a series again in univariate case
+        if is_series:
+            z = z.squeeze("columns")
+        return z
+
+    @if_delegate_has_method(delegate="transformer")
+    def update(self, Z, X=None, update_params=True):
+        """
+        Update Parameters.
+
+        Update the parameters of the estimator with new data
+        by iterating over specified columns.
+        Only works if self.transformer has an update method.
+
+        Parameters
+        ----------
+        Z : pd.Series
+            New time series
+        update_params : bool, optional (default=True)
+
+        Returns
+        -------
+        self : an instance of self
+        """
+        z = check_series(Z)
+
+        # make z a pd.DataFrame in univariate case
+        if isinstance(z, pd.Series):
+            z = z.to_frame()
+
+        # make sure z contains all columns that the user wants to transform
+        _check_columns(z, selected_columns=self.columns_)
+        for colname in self.columns_:
+            self.transformers_[colname].update(z[colname], X)
+        return self
+
+
+def _check_columns(z, selected_columns):
+    # make sure z contains all columns that the user wants to transform
+    z_wanted_keys = set(selected_columns)
+    z_new_keys = set(z.columns)
+    difference = z_wanted_keys.difference(z_new_keys)
+    if len(difference) != 0:
+        raise ValueError("Missing columns" + str(difference) + "in Z.")
+
+
+def _check_is_pdseries(z):
+    # make z a pd.Dataframe in univariate case
+    is_series = False
+    if isinstance(z, pd.Series):
+        z = z.to_frame()
+        is_series = True
+    return z, is_series