forecaster tests extended for multivariate estimators, testing univar…

…iate and multivariate input
sktime · Sep 5, 2021 · 5df5c2a · 5df5c2a
1 parent 0749308
commit 5df5c2a
Showing 1 changed file with 132 additions and 86 deletions.
diff --git a/sktime/forecasting/tests/test_all_forecasters.py b/sktime/forecasting/tests/test_all_forecasters.py
@@ -57,17 +57,33 @@
 y_train, y_test = temporal_train_test_split(y, train_size=0.75)
 
 
+# helper function
+def _get_n_columns(tag):
+    """Return the the number of columns to use in tests."""
+    n_columns_list = []
+    if tag == "univariate":
+        n_columns_list = [1]
+    elif tag == "multivariate":
+        n_columns_list = [2]
+    elif tag == "both":
+        n_columns_list = [1, 2]
+    return n_columns_list
+
+
 @pytest.mark.parametrize("Forecaster", FORECASTERS)
 def test_get_fitted_params(Forecaster):
     """Test get_fitted_params."""
     f = _construct_instance(Forecaster)
-    f.fit(y_train, fh=FH0)
-    try:
-        params = f.get_fitted_params()
-        assert isinstance(params, dict)
+    columns = _get_n_columns(f.get_tag("y:scitype"))
+    for n_columns in columns:
+        y_train = _make_series(n_columns=n_columns)
+        f.fit(y_train, fh=FH0)
+        try:
+            params = f.get_fitted_params()
+            assert isinstance(params, dict)
 
-    except NotImplementedError:
-        pass
+        except NotImplementedError:
+            pass
 
 
 @pytest.mark.parametrize("Forecaster", FORECASTERS)
@@ -102,12 +118,15 @@ def test_y_multivariate_raises_error(Forecaster):
         with pytest.raises(ValueError, match=r"univariate"):
             f.fit(y, fh=FH0)
 
-    elif f.get_tag("scitype:y") == "multivariate":
+    if f.get_tag("scitype:y") == "multivariate":
 
         y = _make_series(n_columns=1)
         with pytest.raises(ValueError, match=r"2 or more variables"):
             f.fit(y, fh=FH0)
 
+    if f.get_tag("scitype:y") == "both":
+        pass
+
 
 @pytest.mark.parametrize("Forecaster", FORECASTERS)
 @pytest.mark.parametrize("y", INVALID_y_INPUT_TYPES)
@@ -123,17 +142,16 @@ def test_y_invalid_type_raises_error(Forecaster, y):
 def test_X_invalid_type_raises_error(Forecaster, X):
     """Test that invalid X input types raise error."""
     f = _construct_instance(Forecaster)
-    if f.get_tag("scitype:y") == "univariate" or f.get_tag("scitype:y") == "both":
-        y_train = _make_series(n_columns=1)
+    n_columns_list = _get_n_columns(f.get_tag("y:scitype"))
 
-    elif f.get_tag("scitype:y") == "multivariate":
-        y_train = _make_series(n_columns=2)
-    try:
-        with pytest.raises(TypeError, match=r"type"):
-            f.fit(y_train, X, fh=FH0)
-    except NotImplementedError as e:
-        msg = str(e).lower()
-        assert "exogenous" in msg
+    for n_columns in n_columns_list:
+        y_train = _make_series(n_columns=n_columns)
+        try:
+            with pytest.raises(TypeError, match=r"type"):
+                f.fit(y_train, X, fh=FH0)
+        except NotImplementedError as e:
+            msg = str(e).lower()
+            assert "exogenous" in msg
 
 
 @pytest.mark.parametrize("Forecaster", FORECASTERS)
@@ -143,19 +161,22 @@ def test_X_invalid_type_raises_error(Forecaster, X):
 @pytest.mark.parametrize("steps", TEST_FHS)  # fh steps
 def test_predict_time_index(Forecaster, index_type, fh_type, is_relative, steps):
     """Check that predicted time index matches forecasting horizon."""
-    y_train = make_forecasting_problem(index_type=index_type)
-    cutoff = y_train.index[-1]
-    fh = _make_fh(cutoff, steps, fh_type, is_relative)
     f = _construct_instance(Forecaster)
+    n_columns_list = _get_n_columns(f.get_tag("y:scitype"))
 
-    # Some estimators may not support all time index types and fh types, hence we
-    # need to catch NotImplementedErrors.
-    try:
-        f.fit(y_train, fh=fh)
-        y_pred = f.predict()
-        _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh)
-    except NotImplementedError:
-        pass
+    for n_columns in n_columns_list:
+        y_train = _make_series(
+            n_columns=n_columns, index_type=index_type, n_timepoints=50
+        )
+        cutoff = y_train.index[-1]
+        fh = _make_fh(cutoff, steps, fh_type, is_relative)
+
+        try:
+            f.fit(y_train, fh=fh)
+            y_pred = f.predict()
+            _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh=fh)
+        except NotImplementedError:
+            pass
 
 
 @pytest.mark.parametrize("Forecaster", FORECASTERS)
@@ -165,21 +186,26 @@ def test_predict_time_index(Forecaster, index_type, fh_type, is_relative, steps)
 @pytest.mark.parametrize("steps", TEST_OOS_FHS)  # fh steps
 def test_predict_time_index_with_X(Forecaster, index_type, fh_type, is_relative, steps):
     """Check that predicted time index matches forecasting horizon."""
-    y, X = make_forecasting_problem(index_type=index_type, make_X=True)
-    cutoff = y.index[len(y) // 2]
-    fh = _make_fh(cutoff, steps, fh_type, is_relative)
+    f = _construct_instance(Forecaster)
+    n_columns_list = _get_n_columns(f.get_tag("y:scitype"))
 
-    y_train, y_test, X_train, X_test = temporal_train_test_split(y, X, fh=fh)
+    z, X = make_forecasting_problem(index_type=index_type, make_X=True)
 
-    f = _construct_instance(Forecaster)
     # Some estimators may not support all time index types and fh types, hence we
     # need to catch NotImplementedErrors.
-    try:
-        f.fit(y_train, X_train, fh=fh)
-        y_pred = f.predict(X=X_test)
-        _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh)
-    except NotImplementedError:
-        pass
+    for n_columns in n_columns_list:
+        y = _make_series(n_columns=n_columns, index_type=index_type)
+        cutoff = y.index[len(y) // 2]
+        fh = _make_fh(cutoff, steps, fh_type, is_relative)
+
+        y_train, y_test, X_train, X_test = temporal_train_test_split(y, X, fh=fh)
+
+        try:
+            f.fit(y_train, X_train, fh=fh)
+            y_pred = f.predict(X=X_test)
+            _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh)
+        except NotImplementedError:
+            pass
 
 
 @pytest.mark.parametrize("Forecaster", FORECASTERS)
@@ -190,19 +216,21 @@ def test_predict_time_index_in_sample_full(
     Forecaster, index_type, fh_type, is_relative
 ):
     """Check that predicted time index equals fh for full in-sample predictions."""
-    y_train = make_forecasting_problem(index_type=index_type)
-    cutoff = y_train.index[-1]
-    steps = -np.arange(len(y_train))  # full in-sample fh
-    fh = _make_fh(cutoff, steps, fh_type, is_relative)
     f = _construct_instance(Forecaster)
-    # Some estimators may not support all time index types and fh types, hence we
-    # need to catch NotImplementedErrors.
-    try:
-        f.fit(y_train, fh=fh)
-        y_pred = f.predict()
-        _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh)
-    except NotImplementedError:
-        pass
+    n_columns_list = _get_n_columns(f.get_tag("y:scitype"))
+
+    for n_columns in n_columns_list:
+        y_train = _make_series(n_columns=n_columns, index_type=index_type)
+        cutoff = y_train.index[-1]
+        steps = -np.arange(len(y_train))
+        fh = _make_fh(cutoff, steps, fh_type, is_relative)
+
+        try:
+            f.fit(y_train, fh=fh)
+            y_pred = f.predict()
+            _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh)
+        except NotImplementedError:
+            pass
 
 
 def _check_pred_ints(pred_ints: list, y_train: pd.Series, y_pred: pd.Series, fh):
@@ -249,34 +277,42 @@ def test_predict_pred_interval(Forecaster, fh, alpha):
             and no NotImplementedError is raised when asking predict for pred.int
     """
     f = _construct_instance(Forecaster)
-    f.fit(y_train, fh=fh)
+    n_columns_list = _get_n_columns(f.get_tag("y:scitype"))
 
-    if f.get_tag("capability:pred_int"):
-        y_pred, pred_ints = f.predict(return_pred_int=True, alpha=alpha)
-        _check_pred_ints(pred_ints, y_train, y_pred, fh)
-    else:
-        with pytest.raises(NotImplementedError, match="prediction intervals"):
-            f.predict(return_pred_int=True, alpha=alpha)
+    for n_columns in n_columns_list:
+        y_train = _make_series(n_columns=n_columns)
+        f.fit(y_train, fh=fh)
+        if f.get_tag("capability:pred_int"):
+            y_pred, pred_ints = f.predict(return_pred_int=True, alpha=alpha)
+            _check_pred_ints(pred_ints, y_train, y_pred, fh)
+
+        else:
+            with pytest.raises(NotImplementedError, match="prediction intervals"):
+                f.predict(return_pred_int=True, alpha=alpha)
 
 
 @pytest.mark.parametrize("Forecaster", FORECASTERS)
 @pytest.mark.parametrize("fh", TEST_OOS_FHS)
 def test_score(Forecaster, fh):
     """Check score method."""
     f = _construct_instance(Forecaster)
-    f.fit(y_train, fh=fh)
-    y_pred = f.predict()
+    n_columns_list = _get_n_columns(f.get_tag("y:scitype"))
 
-    fh_idx = check_fh(fh).to_indexer()  # get zero based index
-    expected = mean_absolute_percentage_error(
-        y_pred, y_test.iloc[fh_idx], symmetric=True
-    )
+    for n_columns in n_columns_list:
+        y = _make_series(n_columns=n_columns)
+        y_train, y_test = temporal_train_test_split(y)
+        f.fit(y_train, fh=fh)
+        y_pred = f.predict()
 
-    # compare with actual score
-    f = _construct_instance(Forecaster)
-    f.fit(y_train, fh=fh)
-    actual = f.score(y_test.iloc[fh_idx], fh=fh)
-    assert actual == expected
+        fh_idx = check_fh(fh).to_indexer()  # get zero based index
+        actual = f.score(y_test.iloc[fh_idx], fh=fh)
+        expected = mean_absolute_percentage_error(
+            y_pred, y_test.iloc[fh_idx], symmetric=True
+        )
+
+        # compare expected score with actual score
+        actual = f.score(y_test.iloc[fh_idx], fh=fh)
+        assert actual == expected
 
 
 @pytest.mark.parametrize("Forecaster", FORECASTERS)
@@ -285,29 +321,39 @@ def test_score(Forecaster, fh):
 def test_update_predict_single(Forecaster, fh, update_params):
     """Check correct time index of update-predict."""
     f = _construct_instance(Forecaster)
-    f.fit(y_train, fh=fh)
-    y_pred = f.update_predict_single(y_test, update_params=update_params)
-    _assert_correct_pred_time_index(y_pred.index, y_test.index[-1], fh)
+    n_columns_list = _get_n_columns(f.get_tag("y:scitype"))
+
+    for n_columns in n_columns_list:
+        y = _make_series(n_columns=n_columns)
+        y_train, y_test = temporal_train_test_split(y)
+        f.fit(y_train, fh=fh)
+        y_pred = f.update_predict_single(y_test, update_params=update_params)
+        _assert_correct_pred_time_index(y_pred.index, y_test.index[-1], fh)
 
 
 def _check_update_predict_predicted_index(
     Forecaster, fh, window_length, step_length, update_params
 ):
-    y = make_forecasting_problem(all_positive=True, index_type="datetime")
-    y_train, y_test = temporal_train_test_split(y)
-    cv = SlidingWindowSplitter(
-        fh,
-        window_length=window_length,
-        step_length=step_length,
-        start_with_window=False,
-    )
     f = _construct_instance(Forecaster)
-    f.fit(y_train, fh=fh)
-    y_pred = f.update_predict(y_test, cv=cv, update_params=update_params)
-    assert isinstance(y_pred, (pd.Series, pd.DataFrame))
-    expected = _get_expected_index_for_update_predict(y_test, fh, step_length)
-    actual = y_pred.index
-    np.testing.assert_array_equal(actual, expected)
+    n_columns_list = _get_n_columns(f.get_tag("y:scitype"))
+
+    for n_columns in n_columns_list:
+        y_train = _make_series(
+            n_columns=n_columns, all_positive=True, index_type="datetime"
+        )
+        y_train, y_test = temporal_train_test_split(y)
+        cv = SlidingWindowSplitter(
+            fh,
+            window_length=window_length,
+            step_length=step_length,
+            start_with_window=False,
+        )
+        f.fit(y_train, fh=fh)
+        y_pred = f.update_predict(y_test, cv=cv, update_params=update_params)
+        assert isinstance(y_pred, (pd.Series, pd.DataFrame))
+        expected = _get_expected_index_for_update_predict(y_test, fh, step_length)
+        actual = y_pred.index
+        np.testing.assert_array_equal(actual, expected)
 
 
 # test with update_params=False and different values for steps_length