Skip to content

Commit

Permalink
forecaster tests extended for multivariate estimators, testing univar…
Browse files Browse the repository at this point in the history
…iate and multivariate input
  • Loading branch information
thayeylolu committed Sep 5, 2021
1 parent 0749308 commit 5df5c2a
Showing 1 changed file with 132 additions and 86 deletions.
218 changes: 132 additions & 86 deletions sktime/forecasting/tests/test_all_forecasters.py
Expand Up @@ -57,17 +57,33 @@
y_train, y_test = temporal_train_test_split(y, train_size=0.75)


# helper function
def _get_n_columns(tag):
"""Return the the number of columns to use in tests."""
n_columns_list = []
if tag == "univariate":
n_columns_list = [1]
elif tag == "multivariate":
n_columns_list = [2]
elif tag == "both":
n_columns_list = [1, 2]
return n_columns_list


@pytest.mark.parametrize("Forecaster", FORECASTERS)
def test_get_fitted_params(Forecaster):
"""Test get_fitted_params."""
f = _construct_instance(Forecaster)
f.fit(y_train, fh=FH0)
try:
params = f.get_fitted_params()
assert isinstance(params, dict)
columns = _get_n_columns(f.get_tag("y:scitype"))
for n_columns in columns:
y_train = _make_series(n_columns=n_columns)
f.fit(y_train, fh=FH0)
try:
params = f.get_fitted_params()
assert isinstance(params, dict)

except NotImplementedError:
pass
except NotImplementedError:
pass


@pytest.mark.parametrize("Forecaster", FORECASTERS)
Expand Down Expand Up @@ -102,12 +118,15 @@ def test_y_multivariate_raises_error(Forecaster):
with pytest.raises(ValueError, match=r"univariate"):
f.fit(y, fh=FH0)

elif f.get_tag("scitype:y") == "multivariate":
if f.get_tag("scitype:y") == "multivariate":

y = _make_series(n_columns=1)
with pytest.raises(ValueError, match=r"2 or more variables"):
f.fit(y, fh=FH0)

if f.get_tag("scitype:y") == "both":
pass


@pytest.mark.parametrize("Forecaster", FORECASTERS)
@pytest.mark.parametrize("y", INVALID_y_INPUT_TYPES)
Expand All @@ -123,17 +142,16 @@ def test_y_invalid_type_raises_error(Forecaster, y):
def test_X_invalid_type_raises_error(Forecaster, X):
"""Test that invalid X input types raise error."""
f = _construct_instance(Forecaster)
if f.get_tag("scitype:y") == "univariate" or f.get_tag("scitype:y") == "both":
y_train = _make_series(n_columns=1)
n_columns_list = _get_n_columns(f.get_tag("y:scitype"))

elif f.get_tag("scitype:y") == "multivariate":
y_train = _make_series(n_columns=2)
try:
with pytest.raises(TypeError, match=r"type"):
f.fit(y_train, X, fh=FH0)
except NotImplementedError as e:
msg = str(e).lower()
assert "exogenous" in msg
for n_columns in n_columns_list:
y_train = _make_series(n_columns=n_columns)
try:
with pytest.raises(TypeError, match=r"type"):
f.fit(y_train, X, fh=FH0)
except NotImplementedError as e:
msg = str(e).lower()
assert "exogenous" in msg


@pytest.mark.parametrize("Forecaster", FORECASTERS)
Expand All @@ -143,19 +161,22 @@ def test_X_invalid_type_raises_error(Forecaster, X):
@pytest.mark.parametrize("steps", TEST_FHS) # fh steps
def test_predict_time_index(Forecaster, index_type, fh_type, is_relative, steps):
"""Check that predicted time index matches forecasting horizon."""
y_train = make_forecasting_problem(index_type=index_type)
cutoff = y_train.index[-1]
fh = _make_fh(cutoff, steps, fh_type, is_relative)
f = _construct_instance(Forecaster)
n_columns_list = _get_n_columns(f.get_tag("y:scitype"))

# Some estimators may not support all time index types and fh types, hence we
# need to catch NotImplementedErrors.
try:
f.fit(y_train, fh=fh)
y_pred = f.predict()
_assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh)
except NotImplementedError:
pass
for n_columns in n_columns_list:
y_train = _make_series(
n_columns=n_columns, index_type=index_type, n_timepoints=50
)
cutoff = y_train.index[-1]
fh = _make_fh(cutoff, steps, fh_type, is_relative)

try:
f.fit(y_train, fh=fh)
y_pred = f.predict()
_assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh=fh)
except NotImplementedError:
pass


@pytest.mark.parametrize("Forecaster", FORECASTERS)
Expand All @@ -165,21 +186,26 @@ def test_predict_time_index(Forecaster, index_type, fh_type, is_relative, steps)
@pytest.mark.parametrize("steps", TEST_OOS_FHS) # fh steps
def test_predict_time_index_with_X(Forecaster, index_type, fh_type, is_relative, steps):
"""Check that predicted time index matches forecasting horizon."""
y, X = make_forecasting_problem(index_type=index_type, make_X=True)
cutoff = y.index[len(y) // 2]
fh = _make_fh(cutoff, steps, fh_type, is_relative)
f = _construct_instance(Forecaster)
n_columns_list = _get_n_columns(f.get_tag("y:scitype"))

y_train, y_test, X_train, X_test = temporal_train_test_split(y, X, fh=fh)
z, X = make_forecasting_problem(index_type=index_type, make_X=True)

f = _construct_instance(Forecaster)
# Some estimators may not support all time index types and fh types, hence we
# need to catch NotImplementedErrors.
try:
f.fit(y_train, X_train, fh=fh)
y_pred = f.predict(X=X_test)
_assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh)
except NotImplementedError:
pass
for n_columns in n_columns_list:
y = _make_series(n_columns=n_columns, index_type=index_type)
cutoff = y.index[len(y) // 2]
fh = _make_fh(cutoff, steps, fh_type, is_relative)

y_train, y_test, X_train, X_test = temporal_train_test_split(y, X, fh=fh)

try:
f.fit(y_train, X_train, fh=fh)
y_pred = f.predict(X=X_test)
_assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh)
except NotImplementedError:
pass


@pytest.mark.parametrize("Forecaster", FORECASTERS)
Expand All @@ -190,19 +216,21 @@ def test_predict_time_index_in_sample_full(
Forecaster, index_type, fh_type, is_relative
):
"""Check that predicted time index equals fh for full in-sample predictions."""
y_train = make_forecasting_problem(index_type=index_type)
cutoff = y_train.index[-1]
steps = -np.arange(len(y_train)) # full in-sample fh
fh = _make_fh(cutoff, steps, fh_type, is_relative)
f = _construct_instance(Forecaster)
# Some estimators may not support all time index types and fh types, hence we
# need to catch NotImplementedErrors.
try:
f.fit(y_train, fh=fh)
y_pred = f.predict()
_assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh)
except NotImplementedError:
pass
n_columns_list = _get_n_columns(f.get_tag("y:scitype"))

for n_columns in n_columns_list:
y_train = _make_series(n_columns=n_columns, index_type=index_type)
cutoff = y_train.index[-1]
steps = -np.arange(len(y_train))
fh = _make_fh(cutoff, steps, fh_type, is_relative)

try:
f.fit(y_train, fh=fh)
y_pred = f.predict()
_assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh)
except NotImplementedError:
pass


def _check_pred_ints(pred_ints: list, y_train: pd.Series, y_pred: pd.Series, fh):
Expand Down Expand Up @@ -249,34 +277,42 @@ def test_predict_pred_interval(Forecaster, fh, alpha):
and no NotImplementedError is raised when asking predict for pred.int
"""
f = _construct_instance(Forecaster)
f.fit(y_train, fh=fh)
n_columns_list = _get_n_columns(f.get_tag("y:scitype"))

if f.get_tag("capability:pred_int"):
y_pred, pred_ints = f.predict(return_pred_int=True, alpha=alpha)
_check_pred_ints(pred_ints, y_train, y_pred, fh)
else:
with pytest.raises(NotImplementedError, match="prediction intervals"):
f.predict(return_pred_int=True, alpha=alpha)
for n_columns in n_columns_list:
y_train = _make_series(n_columns=n_columns)
f.fit(y_train, fh=fh)
if f.get_tag("capability:pred_int"):
y_pred, pred_ints = f.predict(return_pred_int=True, alpha=alpha)
_check_pred_ints(pred_ints, y_train, y_pred, fh)

else:
with pytest.raises(NotImplementedError, match="prediction intervals"):
f.predict(return_pred_int=True, alpha=alpha)


@pytest.mark.parametrize("Forecaster", FORECASTERS)
@pytest.mark.parametrize("fh", TEST_OOS_FHS)
def test_score(Forecaster, fh):
"""Check score method."""
f = _construct_instance(Forecaster)
f.fit(y_train, fh=fh)
y_pred = f.predict()
n_columns_list = _get_n_columns(f.get_tag("y:scitype"))

fh_idx = check_fh(fh).to_indexer() # get zero based index
expected = mean_absolute_percentage_error(
y_pred, y_test.iloc[fh_idx], symmetric=True
)
for n_columns in n_columns_list:
y = _make_series(n_columns=n_columns)
y_train, y_test = temporal_train_test_split(y)
f.fit(y_train, fh=fh)
y_pred = f.predict()

# compare with actual score
f = _construct_instance(Forecaster)
f.fit(y_train, fh=fh)
actual = f.score(y_test.iloc[fh_idx], fh=fh)
assert actual == expected
fh_idx = check_fh(fh).to_indexer() # get zero based index
actual = f.score(y_test.iloc[fh_idx], fh=fh)
expected = mean_absolute_percentage_error(
y_pred, y_test.iloc[fh_idx], symmetric=True
)

# compare expected score with actual score
actual = f.score(y_test.iloc[fh_idx], fh=fh)
assert actual == expected


@pytest.mark.parametrize("Forecaster", FORECASTERS)
Expand All @@ -285,29 +321,39 @@ def test_score(Forecaster, fh):
def test_update_predict_single(Forecaster, fh, update_params):
"""Check correct time index of update-predict."""
f = _construct_instance(Forecaster)
f.fit(y_train, fh=fh)
y_pred = f.update_predict_single(y_test, update_params=update_params)
_assert_correct_pred_time_index(y_pred.index, y_test.index[-1], fh)
n_columns_list = _get_n_columns(f.get_tag("y:scitype"))

for n_columns in n_columns_list:
y = _make_series(n_columns=n_columns)
y_train, y_test = temporal_train_test_split(y)
f.fit(y_train, fh=fh)
y_pred = f.update_predict_single(y_test, update_params=update_params)
_assert_correct_pred_time_index(y_pred.index, y_test.index[-1], fh)


def _check_update_predict_predicted_index(
Forecaster, fh, window_length, step_length, update_params
):
y = make_forecasting_problem(all_positive=True, index_type="datetime")
y_train, y_test = temporal_train_test_split(y)
cv = SlidingWindowSplitter(
fh,
window_length=window_length,
step_length=step_length,
start_with_window=False,
)
f = _construct_instance(Forecaster)
f.fit(y_train, fh=fh)
y_pred = f.update_predict(y_test, cv=cv, update_params=update_params)
assert isinstance(y_pred, (pd.Series, pd.DataFrame))
expected = _get_expected_index_for_update_predict(y_test, fh, step_length)
actual = y_pred.index
np.testing.assert_array_equal(actual, expected)
n_columns_list = _get_n_columns(f.get_tag("y:scitype"))

for n_columns in n_columns_list:
y_train = _make_series(
n_columns=n_columns, all_positive=True, index_type="datetime"
)
y_train, y_test = temporal_train_test_split(y)
cv = SlidingWindowSplitter(
fh,
window_length=window_length,
step_length=step_length,
start_with_window=False,
)
f.fit(y_train, fh=fh)
y_pred = f.update_predict(y_test, cv=cv, update_params=update_params)
assert isinstance(y_pred, (pd.Series, pd.DataFrame))
expected = _get_expected_index_for_update_predict(y_test, fh, step_length)
actual = y_pred.index
np.testing.assert_array_equal(actual, expected)


# test with update_params=False and different values for steps_length
Expand Down

0 comments on commit 5df5c2a

Please sign in to comment.