sktime: [BUG] skorch with ForecastingGridSearchCV backend="loky" not working

Describe the bug

We can use a pytorch model in sktime with skorch and sktime native YfromX wrapper. But when we try to do ForecastingGridSearchCV with backend=“loky”, which is default backend, I get NotFittedError error. BTW, it is working perfectly when backend="threading"

To Reproduce

# %pip install sktime==0.21.0 skorch==0.14.0 torch==1.13.1 numpy pandas matplotlib seaborn

import numpy as np
import pandas as pd
import torch
from torch import nn
from sktime.datasets import load_uschange
from sktime.forecasting.model_selection import (
    temporal_train_test_split,
    ForecastingGridSearchCV,
)


from skorch import NeuralNetRegressor
from sktime.forecasting.model_selection import SingleWindowSplitter
from sktime.forecasting.compose import YfromX

import pandas as pd
from skorch import NeuralNetRegressor
from torch import nn
from typing import Tuple

# To fix following skorch error:
# RuntimeError: Expected object of scalar type Double but got scalar type Float for argument #2 ‘weight’
torch.set_default_dtype(torch.float64)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")

############################## UTILITIES ##############################
def load_uschange_corrected(target="Consumption") -> Tuple[pd.Series, pd.DataFrame]:
    """
    Multivariate sample univariate from sktime
    Loaded USChange dataset from sktime and correct for period index
    """
    y, X = load_uschange(y_name=target)
    y.index = pd.period_range(start="1960-Q1", periods=len(y), freq="Q", name="Period")
    X.index = pd.period_range(start="1960-Q1", periods=len(X), freq="Q", name="Period")

    return y, X

pytorch_activation_functions = {
    "relu": nn.ReLU,
    "tanh": nn.Tanh,
    "softplus": nn.Softplus,
    "sigmoid": nn.Sigmoid,
    "softmax": nn.Softmax,
    "identity": nn.Identity,
    None: nn.Identity,
}

class SimpleMLP(nn.Module):
    """
    Simple MLP model for Pytorch.

    NOTE:
        `input_size` is not required to be set. It is automatically set to `X.shape[1]`.
        This will be the case for all skorch reduced models.
        Since it will be set in every forward pass, this might cause performance issues.
    """

    def __init__(
        self,
        input_size: int = 100,  # Should be X.shape[1]
        output_size: int = 1,
        hidden_size_1: int = 30,
        hidden_size_2: int = 10,
        activation: str = "relu",
    ):
        super().__init__()
        self.input_size = input_size
        self.hidden_size_1 = hidden_size_1
        self.hidden_size_2 = hidden_size_2

        self.fc1 = nn.Linear(input_size, hidden_size_1)
        self.activation = pytorch_activation_functions[activation]()
        self.fc2 = nn.Linear(hidden_size_1, hidden_size_2)
        self.output = nn.Linear(hidden_size_2, output_size)

    def forward(self, X):
        # FIXME: This might not efficient!
        if self.input_size != X.shape[1]:
            print(
                f"Input size: {self.input_size} should be equal to exogenous feature"
                f" size.Hence, it is set to {X.shape[1]}."
            )
            self.input_size = X.shape[1]
            self.fc1 = nn.Linear(self.input_size, self.hidden_size_1)

        X = self.activation(self.fc1(X))
        X = self.activation(self.fc2(X))
        X = self.output(X)

        return X

class SkorchRegressor(NeuralNetRegressor):
    """
    A wrapper for skorch NeuralNetRegressor. NeuralNetRegressor only takes numpy arrays
    as input. This class takes pandas dataframes as input and converts them to
    numpy arrays and reshapes the data if necessary.
    """

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def fit(self, X, y, **fit_params):
        if isinstance(y, (pd.Series, pd.DataFrame)):
            y = y.to_numpy()

        if isinstance(X, pd.DataFrame):
            X = X.to_numpy()

        # if y is one dimensional, force it to 2 dimensional
        if y.ndim == 1:
            y = y.reshape(-1, 1)

        return super().fit(X, y, **fit_params)

    def predict(self, X):
        if isinstance(X, pd.Series):
            print("SkorchRegressor.predict X is a series. Why? (dev note)")
            X = X.to_numpy()
        elif isinstance(X, pd.DataFrame):
            X = X.to_numpy()

        return super().predict(X)

    def predict_proba(self, X):
        if isinstance(X, pd.Series):
            print("SkorchRegressor.predict_proba X is a series. Why? (dev note)")
            X = X.to_numpy()
        elif isinstance(X, pd.DataFrame):
            X = X.to_numpy()

        return super().predict_proba(X)

############################## UTILITIES ##############################

y, X = load_uschange_corrected()
test_size = 28

y_train, y_test, X_train, X_test = temporal_train_test_split(y=y, X=X, test_size=test_size)

fh = np.arange(1, len(y_test) + 1)

callbacks = None

pytorch_model = SimpleMLP(
    input_size=X_train.shape[1], output_size=1, hidden_size_1=30, hidden_size_2=10
)

skorch_model = SkorchRegressor(
    pytorch_model,
    max_epochs=100,
    lr=0.1,
    device=device,
    criterion=torch.nn.MSELoss,
    batch_size=64,
    train_split=None,
    iterator_train__shuffle=False,
    iterator_train__num_workers=2,
    optimizer=torch.optim.Adam,
    verbose=0,
    callbacks=callbacks,
)
sktime_pytorch_model = YfromX(skorch_model)

params = {
    "estimator__lr": [0.05, 0.1],
    "estimator__batch_size": [32, 64],
    # "estimator__optimizer": [torch.optim.Adam, torch.optim.SGD, torch.optim.RMSprop],
    "estimator__max_epochs": [10, 20],
    # "estimator__module__activation": ["relu", "tanh"],
}

# cv = ExpandingGreedySplitter(folds=3, test_size=test_size)
cv = SingleWindowSplitter(fh=test_size)

gs = ForecastingGridSearchCV(
    forecaster=sktime_pytorch_model,
    param_grid=params,
    strategy="refit",
    refit=True,
    cv=cv,
    verbose=1,
    n_jobs=-1,
    backend="loky",
)
gs.fit(y=y_train, X=X_train)

# y_pred = gs.predict(fh=fh, X=X_test)

Expected behavior

The model is successfully fit when grid searching.

Additional context

I think, implementing some tests about skorch models would be good for catching this kind of bugs. Moreover, I have implemented custom SkorchRegressor, since original skorch.NeuralNetRegressor doesn’t work with sktime.

Versions

System: python: 3.8.17 (default, Jul 5 2023, 21:04:15) [GCC 11.2.0]

Python dependencies: pip: 23.2.1 sktime: 0.21.0 sklearn: 1.3.0 skbase: 0.4.6 numpy: 1.24.4 scipy: 1.10.1 pandas: 2.0.3 matplotlib: 3.7.2 joblib: 1.3.1 statsmodels: None numba: None pmdarima: None tsfresh: None tensorflow: None

Additonal dependencies: torch: 1.13.1 skorch: 0.14.0

About this issue

  • Original URL
  • State: closed
  • Created a year ago
  • Comments: 33 (7 by maintainers)

Most upvoted comments

Really interesting point @ilkersigirci . What do you think @benHeid , @fkiraly ? ( I’m just a mentee ^^ )

Yes, but you are a competent mentee. Further, as a free citizen you are entitled to your own opinions. In combination of the two, anyone is advised to consider your opinions carefully and seriously.

Btw, I think we should use skorch as pytorch adapter rather than whole new base classes. I don’t think sktime should reinvent the wheel here. What do you think about it @BensHamza ?

What do you think @benHeid , @fkiraly ?

I think that skorch has a specific scope which is narrower than what we need in sktime. Namely, skorch adapts to the sklearn interface which covers only tabular models (classifiers and regressors specifically), which is different from the type of model (scientific) and the intefaces that sktime focuses on.

For instance, there is no clear way how you would adapt a pytorch based N-BEATS model as a BaseForecaster via skorch, without hacking the interface at least.

Having said that, it is perhaps not a bad idea to look at what we can adapt from skorch, perhaps even in the form of an extension. I would doubt though that it is a straightforward extension, but definitely we should think about it design-wise. Committing to use only skorch, or focus on extending it, might be a rabbit-hole at this point in time - due to complexity of the base class (the NeuralNet)

It seems so. Actually, the issue can be closed. However, it would be good to consider adding skorch adapter in sktime.

Really interesting point @ilkersigirci . What do you think @benHeid , @fkiraly ? ( I’m just a mentee ^^ )

Woow, it does. It is an exciting finding. I have set it because of the error I was getting before: RuntimeError: Expected object of scalar type Double but got scalar type Float for argument #2 ‘weight’.

Then, I continued to use it without any thought. Now, the problem seems to be gone. Thanks

I guess it’s a Pytorch problem. The first code provided works when I comment the line torch.set_default_dtype(torch.float64) and set X and y as np.float32 once loaded. Here is the code to reproduce :

# %pip install sktime==0.21.0 skorch==0.14.0 torch==1.13.1 numpy pandas matplotlib seaborn

import numpy as np
import pandas as pd
import torch
from torch import nn
from sktime.datasets import load_uschange
from sktime.forecasting.model_selection import (
    temporal_train_test_split,
    ForecastingGridSearchCV,
)


from skorch import NeuralNetRegressor
from sktime.forecasting.model_selection import SingleWindowSplitter
from sktime.forecasting.compose import YfromX

import pandas as pd
from skorch import NeuralNetRegressor
from torch import nn
from typing import Tuple

# To fix following skorch error:
# RuntimeError: Expected object of scalar type Double but got scalar type Float for argument #2 ‘weight’
#torch.set_default_dtype(torch.float64)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")

############################## UTILITIES ##############################
def load_uschange_corrected(target="Consumption") -> Tuple[pd.Series, pd.DataFrame]:
    """
    Multivariate sample univariate from sktime
    Loaded USChange dataset from sktime and correct for period index
    """
    y, X = load_uschange(y_name=target)
    y.index = pd.period_range(start="1960-Q1", periods=len(y), freq="Q", name="Period")
    X.index = pd.period_range(start="1960-Q1", periods=len(X), freq="Q", name="Period")

    return y, X

pytorch_activation_functions = {
    "relu": nn.ReLU,
    "tanh": nn.Tanh,
    "softplus": nn.Softplus,
    "sigmoid": nn.Sigmoid,
    "softmax": nn.Softmax,
    "identity": nn.Identity,
    None: nn.Identity,
}

class SimpleMLP(nn.Module):
    """
    Simple MLP model for Pytorch.

    NOTE:
        `input_size` is not required to be set. It is automatically set to `X.shape[1]`.
        This will be the case for all skorch reduced models.
        Since it will be set in every forward pass, this might cause performance issues.
    """

    def __init__(
        self,
        input_size: int = 100,  # Should be X.shape[1]
        output_size: int = 1,
        hidden_size_1: int = 30,
        hidden_size_2: int = 10,
        activation: str = "relu",
    ):
        super().__init__()
        self.input_size = input_size
        self.hidden_size_1 = hidden_size_1
        self.hidden_size_2 = hidden_size_2

        self.fc1 = nn.Linear(input_size, hidden_size_1)
        self.activation = pytorch_activation_functions[activation]()
        self.fc2 = nn.Linear(hidden_size_1, hidden_size_2)
        self.output = nn.Linear(hidden_size_2, output_size)

    def forward(self, X):
        # FIXME: This might not efficient!
        if self.input_size != X.shape[1]:
            print(
                f"Input size: {self.input_size} should be equal to exogenous feature"
                f" size.Hence, it is set to {X.shape[1]}."
            )
            self.input_size = X.shape[1]
            self.fc1 = nn.Linear(self.input_size, self.hidden_size_1)

        X = self.activation(self.fc1(X))
        X = self.activation(self.fc2(X))
        X = self.output(X)

        return X

class SkorchRegressor(NeuralNetRegressor):
    """
    A wrapper for skorch NeuralNetRegressor. NeuralNetRegressor only takes numpy arrays
    as input. This class takes pandas dataframes as input and converts them to
    numpy arrays and reshapes the data if necessary.
    """

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def fit(self, X, y, **fit_params):
        if isinstance(y, (pd.Series, pd.DataFrame)):
            y = y.to_numpy()

        if isinstance(X, pd.DataFrame):
            X = X.to_numpy()

        # if y is one dimensional, force it to 2 dimensional
        if y.ndim == 1:
            y = y.reshape(-1, 1)

        return super().fit(X, y, **fit_params)

    def predict(self, X):
        if isinstance(X, pd.Series):
            print("SkorchRegressor.predict X is a series. Why? (dev note)")
            X = X.to_numpy()
        elif isinstance(X, pd.DataFrame):
            X = X.to_numpy()

        return super().predict(X)

    def predict_proba(self, X):
        if isinstance(X, pd.Series):
            print("SkorchRegressor.predict_proba X is a series. Why? (dev note)")
            X = X.to_numpy()
        elif isinstance(X, pd.DataFrame):
            X = X.to_numpy()

        return super().predict_proba(X)

############################## UTILITIES ##############################

y, X = load_uschange_corrected()
X = X.astype(np.float32)
y = y.astype(np.float32)
test_size = 28

y_train, y_test, X_train, X_test = temporal_train_test_split(y=y, X=X, test_size=test_size)

fh = np.arange(1, len(y_test) + 1)

callbacks = None

pytorch_model = SimpleMLP(
    input_size=X_train.shape[1], output_size=1, hidden_size_1=30, hidden_size_2=10
)

skorch_model = SkorchRegressor(
    pytorch_model,
    max_epochs=100,
    lr=0.1,
    device=device,
    criterion=torch.nn.MSELoss,
    batch_size=64,
    train_split=None,
    iterator_train__shuffle=False,
    #iterator_train__num_workers=2,
    optimizer=torch.optim.Adam,
    verbose=0,
    callbacks=callbacks,
)
sktime_pytorch_model = YfromX(skorch_model)

params = {
    "estimator__module__activation": ["relu", "tanh"],
}

# cv = ExpandingGreedySplitter(folds=3, test_size=test_size)
cv = SingleWindowSplitter(fh=test_size)

gs = ForecastingGridSearchCV(
    forecaster=sktime_pytorch_model,
    param_grid=params,
    strategy="refit",
    refit=True,
    cv=cv,
    verbose=1,
    n_jobs=-1,
    backend="loky",
)
gs.fit(y=y_train, X=X_train)

# y_pred = gs.predict(fh=fh, X=X_test)

Edit : The code works but i don’t think the SimpleMLP class does the job for load_uschange in this code. I’m here only referring to bug specifications.

Could you try the same @BensHamza?

That line "estimator__module__activation": ["relu", "tanh"], causes the same issue for me.

BTW,This line enable grid searching inner pytorch model init parameters. And it runs perfectly with sklearn example of GridSearch.

Interesting, It is more clear now I guess. Thank you for the bug report !

Oh, I get it now. Above code perfectly on my computer too. I uncommented field in params "estimator__module__activation": ["relu", "tanh"],. This line causes the issue. Could you try the same @BensHamza? Sorry for the disturbance.

BTW,This line enable grid searching inner pytorch model init parameters. And it runs perfectly with sklearn example of GridSearch. It can be seen in the skorch docs

I think it is a problem with joblib.Parallel() since in SkorchRegressor the param iterator_train__num_workers is set to 2 but in ForecastingGridSearchCV , n_jobs=-1.

I tested with the default value of iterator_train__num_workers ( 0 i guess ) and it works well.

Did you try with backed="loky"? Because I have tried changing iterator_train__num_workers, but still got error. BTW, as I said in the issue definition, it is working perfectly when backend=“threading”.