NeuralForecast 多变量的处理 包括训练和推理
flyfish
两个excel表格合并后的结果
unique_id ds y ex_1 ex_2 ex_3 ex_4
0 HUFL 2016-07-01 00:00:00 -0.041413 -0.500000 0.166667 -0.500000 -0.001370
1 HUFL 2016-07-01 00:15:00 -0.185467 -0.500000 0.166667 -0.500000 -0.001370
2 HUFL 2016-07-01 00:30:00 -0.257495 -0.500000 0.166667 -0.500000 -0.001370
3 HUFL 2016-07-01 00:45:00 -0.577510 -0.500000 0.166667 -0.500000 -0.001370
4 HUFL 2016-07-01 01:00:00 -0.385501 -0.456522 0.166667 -0.500000 -0.001370
... ... ... ... ... ... ... ...
403195 OT 2018-02-20 22:45:00 -1.581325 0.456522 -0.333333 0.133333 -0.363014
403196 OT 2018-02-20 23:00:00 -1.581325 0.500000 -0.333333 0.133333 -0.363014
403197 OT 2018-02-20 23:15:00 -1.581325 0.500000 -0.333333 0.133333 -0.363014
403198 OT 2018-02-20 23:30:00 -1.562328 0.500000 -0.333333 0.133333 -0.363014
403199 OT 2018-02-20 23:45:00 -1.562328 0.500000 -0.333333 0.133333 -0.363014
import pandas as pdfrom datasetsforecast.long_horizon import LongHorizon
# Change this to your own data to try the model
Y_df, X_df, _ = LongHorizon.load(directory='./', group='ETTm2')
Y_df['ds'] = pd.to_datetime(Y_df['ds'])# X_df contains the exogenous features, which we add to Y_df
X_df['ds'] = pd.to_datetime(X_df['ds'])
Y_df = Y_df.merge(X_df, on=['unique_id', 'ds'], how='left')print(Y_df.head)
#exit()# We make validation and test splits
n_time = len(Y_df.ds.unique())
val_size = int(.2 * n_time)
test_size = int(.2 * n_time)
@dataclass
class LongHorizon:"""This Long-Horizon datasets wrapper class, provideswith utility to download and wrangle the following datasets: ETT, ECL, Exchange, Traffic, ILI and Weather.- Each set is normalized with the train data mean and standard deviation.- Datasets are partitioned into train, validation and test splits.- For all datasets: 70%, 10%, and 20% of observations are train, validation, test, except ETT that uses 20% validation. """source_url: str = 'https://nhits-experiments.s3.amazonaws.com/datasets.zip'@staticmethoddef load(directory: str,group: str,cache: bool = True) -> Tuple[pd.DataFrame, Optional[pd.DataFrame], Optional[pd.DataFrame]]:"""Downloads and long-horizon forecasting benchmark datasets.Parameters----------directory: strDirectory where data will be downloaded.group: strGroup name.Allowed groups: 'ETTh1', 'ETTh2', 'ETTm1', 'ETTm2','ECL', 'Exchange','Traffic', 'Weather', 'ILI'.cache: boolIf `True` saves and loads Returns------- y_df: pd.DataFrameTarget time series with columns ['unique_id', 'ds', 'y'].X_df: pd.DataFrameExogenous time series with columns ['unique_id', 'ds', 'y']. S_df: pd.DataFrameStatic exogenous variables with columns ['unique_id', 'ds']. and static variables. """if group not in LongHorizonInfo.groups:raise Exception(f'group not found {group}')path = f'{directory}/longhorizon/datasets'file_cache = f'{path}/{group}.p'if os.path.exists(file_cache) and cache:df, X_df, S_df = pd.read_pickle(file_cache)return df, X_df, S_dfLongHorizon.download(directory)path = f'{directory}/longhorizon/datasets'kind = 'M' if group not in ['ETTh1', 'ETTh2'] else 'S'name = LongHorizonInfo[group].namey_df = pd.read_csv(f'{path}/{name}/{kind}/df_y.csv')y_df = y_df.sort_values(['unique_id', 'ds'], ignore_index=True)y_df = y_df[['unique_id', 'ds', 'y']]X_df = pd.read_csv(f'{path}/{name}/{kind}/df_x.csv')X_df = y_df.drop('y', axis=1).merge(X_df, how='left', on=['ds'])S_df = Noneif cache:pd.to_pickle((y_df, X_df, S_df), file_cache)return y_df, X_df, S_df@staticmethoddef download(directory: str) -> None:"""Download ETT Dataset.Parameters----------directory: strDirectory path to download dataset."""path = f'{directory}/longhorizon/datasets/'if not os.path.exists(path):download_file(path, LongHorizon.source_url, decompress=True)
完整的训练保存模型文件
import pandas as pdfrom datasetsforecast.long_horizon import LongHorizon
# Change this to your own data to try the model
Y_df, X_df, _ = LongHorizon.load(directory='./', group='ETTm2')
Y_df['ds'] = pd.to_datetime(Y_df['ds'])# X_df contains the exogenous features, which we add to Y_df
X_df['ds'] = pd.to_datetime(X_df['ds'])
Y_df = Y_df.merge(X_df, on=['unique_id', 'ds'], how='left')print(Y_df.head)
#exit()# We make validation and test splits
n_time = len(Y_df.ds.unique())
val_size = int(.2 * n_time)
test_size = int(.2 * n_time)from neuralforecast.core import NeuralForecast
from neuralforecast.models import TSMixer, TSMixerx, NHITS, MLPMultivariate,VanillaTransformer
from neuralforecast.losses.pytorch import MSE, MAE
horizon = 12
input_size = 24
models = [VanillaTransformer(h=horizon,input_size=input_size,max_steps=1,val_check_steps=1,early_stop_patience_steps=1,scaler_type='identity',valid_loss=MAE(),random_seed=12345678,), ]
nf = NeuralForecast(models=models,freq='15min')Y_hat_df = nf.cross_validation(df=Y_df,val_size=val_size,test_size=test_size,n_windows=None)
Y_hat_df = Y_hat_df.reset_index()
nf.save(path='./checkpoints/test_run/',model_index=None, overwrite=True,save_dataset=True)
完整的推理代码
import pandas as pd
from neuralforecast.core import NeuralForecast
from neuralforecast.models import VanillaTransformer
from neuralforecast.losses.pytorch import MAE# 示例数据
data = {'unique_id': ['HUFL'] * 5,'ds': ['2016-07-01 00:00:00', '2016-07-01 00:15:00', '2016-07-01 00:30:00', '2016-07-01 00:45:00', '2016-07-01 01:00:00'],'y': [-0.041413, -0.185467, -0.257495, -0.577510, -0.385501],'ex_1': [-0.5, -0.5, -0.5, -0.5, -0.456522],'ex_2': [0.166667, 0.166667, 0.166667, 0.166667, 0.166667],'ex_3': [-0.5, -0.5, -0.5, -0.5, -0.5],'ex_4': [-0.001370, -0.001370, -0.001370, -0.001370, -0.001370]
}# 创建 DataFrame
df = pd.DataFrame(data)
df['ds'] = pd.to_datetime(df['ds'])# 使用 NeuralForecast 库进行预测
horizon = 12
input_size = 24models = [VanillaTransformer(h=horizon,input_size=input_size,max_steps=1,val_check_steps=1,early_stop_patience_steps=1,scaler_type='identity',valid_loss=MAE(),random_seed=12345678)
]# 加载已训练的模型
nf = NeuralForecast.load(path='./checkpoints/test_run/')
# 数据准备
Y_df = df[['unique_id', 'ds', 'y']]
X_df = df[['unique_id', 'ds', 'ex_1', 'ex_2', 'ex_3', 'ex_4']]# 合并数据集
Y_df = Y_df.merge(X_df, on=['unique_id', 'ds'], how='left')# 进行预测
predictions = nf.predict(Y_df)# 打印预测结果
print(predictions)