NeuralForecast 模型的参数 windows_batch的含义
flyfish
import pandas as pd
import numpy as npAirPassengers = np.array([112.0, 118.0, 132.0, 129.0, 121.0, 135.0, 148.0, 148.0, 136.0, 119.0],dtype=np.float32,
)AirPassengersDF = pd.DataFrame({"unique_id": np.ones(len(AirPassengers)),"ds": pd.date_range(start="1949-01-01", periods=len(AirPassengers), freq=pd.offsets.MonthEnd()),"y": AirPassengers,}
)Y_df = AirPassengersDF
Y_df = Y_df.reset_index(drop=True)
Y_df.head()
#Model Trainingfrom neuralforecast.core import NeuralForecast
from neuralforecast.models import VanillaTransformerhorizon = 3
models = [VanillaTransformer(input_size=2 * horizon, h=horizon, max_steps=2)]nf = NeuralForecast(models=models, freq='M')for model in nf.models:print(f'Model: {model.__class__.__name__}')for param, value in model.__dict__.items():print(f' {param}: {value}')nf.fit(df=Y_df)
输出
Seed set to 1
Model: VanillaTransformertraining: True_parameters: OrderedDict()_buffers: OrderedDict()_non_persistent_buffers_set: set()_backward_pre_hooks: OrderedDict()_backward_hooks: OrderedDict()_is_full_backward_hook: None_forward_hooks: OrderedDict()_forward_hooks_with_kwargs: OrderedDict()_forward_hooks_always_called: OrderedDict()_forward_pre_hooks: OrderedDict()_forward_pre_hooks_with_kwargs: OrderedDict()_state_dict_hooks: OrderedDict()_state_dict_pre_hooks: OrderedDict()_load_state_dict_pre_hooks: OrderedDict()_load_state_dict_post_hooks: OrderedDict()_modules: OrderedDict([('loss', MAE()), ('valid_loss', MAE()), ('padder_train', ConstantPad1d(padding=(0, 3), value=0)), ('scaler', TemporalNorm()), ('enc_embedding', DataEmbedding((value_embedding): TokenEmbedding((tokenConv): Conv1d(1, 128, kernel_size=(3,), stride=(1,), padding=(1,), bias=False, padding_mode=circular))(position_embedding): PositionalEmbedding()(dropout): Dropout(p=0.05, inplace=False)
)), ('dec_embedding', DataEmbedding((value_embedding): TokenEmbedding((tokenConv): Conv1d(1, 128, kernel_size=(3,), stride=(1,), padding=(1,), bias=False, padding_mode=circular))(position_embedding): PositionalEmbedding()(dropout): Dropout(p=0.05, inplace=False)
)), ('encoder', TransEncoder((attn_layers): ModuleList((0-1): 2 x TransEncoderLayer((attention): AttentionLayer((inner_attention): FullAttention((dropout): Dropout(p=0.05, inplace=False))(query_projection): Linear(in_features=128, out_features=128, bias=True)(key_projection): Linear(in_features=128, out_features=128, bias=True)(value_projection): Linear(in_features=128, out_features=128, bias=True)(out_projection): Linear(in_features=128, out_features=128, bias=True))(conv1): Conv1d(128, 32, kernel_size=(1,), stride=(1,))(conv2): Conv1d(32, 128, kernel_size=(1,), stride=(1,))(norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)(norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)(dropout): Dropout(p=0.05, inplace=False)))(norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
)), ('decoder', TransDecoder((layers): ModuleList((0): TransDecoderLayer((self_attention): AttentionLayer((inner_attention): FullAttention((dropout): Dropout(p=0.05, inplace=False))(query_projection): Linear(in_features=128, out_features=128, bias=True)(key_projection): Linear(in_features=128, out_features=128, bias=True)(value_projection): Linear(in_features=128, out_features=128, bias=True)(out_projection): Linear(in_features=128, out_features=128, bias=True))(cross_attention): AttentionLayer((inner_attention): FullAttention((dropout): Dropout(p=0.05, inplace=False))(query_projection): Linear(in_features=128, out_features=128, bias=True)(key_projection): Linear(in_features=128, out_features=128, bias=True)(value_projection): Linear(in_features=128, out_features=128, bias=True)(out_projection): Linear(in_features=128, out_features=128, bias=True))(conv1): Conv1d(128, 32, kernel_size=(1,), stride=(1,))(conv2): Conv1d(32, 128, kernel_size=(1,), stride=(1,))(norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)(norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)(norm3): LayerNorm((128,), eps=1e-05, elementwise_affine=True)(dropout): Dropout(p=0.05, inplace=False)))(norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)(projection): Linear(in_features=128, out_features=1, bias=True)
))])prepare_data_per_node: Trueallow_zero_length_dataloader_with_multiple_devices: False_log_hyperparams: True_dtype: torch.float32_device: cpu_trainer: None_example_input_array: None_automatic_optimization: True_strict_loading: None_current_fx_name: None_param_requires_grad_state: {}_metric_attributes: None_compiler_ctx: None_fabric: None_fabric_optimizers: []_hparams_name: kwargs_hparams: "activation": gelu
"alias": None
"batch_size": 32
"conv_hidden_size": 32
"decoder_input_size_multiplier": 0.5
"decoder_layers": 1
"drop_last_loader": False
"dropout": 0.05
"early_stop_patience_steps": -1
"encoder_layers": 2
"exclude_insample_y": False
"futr_exog_list": None
"h": 3
"hidden_size": 128
"hist_exog_list": None
"inference_windows_batch_size": 1024
"input_size": 6
"learning_rate": 0.0001
"loss": MAE()
"lr_scheduler": None
"lr_scheduler_kwargs": None
"max_steps": 2
"n_head": 4
"num_lr_decays": -1
"num_workers_loader": 0
"optimizer": None
"optimizer_kwargs": None
"random_seed": 1
"scaler_type": identity
"start_padding_enabled": False
"stat_exog_list": None
"step_size": 1
"val_check_steps": 100
"valid_batch_size": None
"valid_loss": None
"windows_batch_size": 1024_hparams_initial: "activation": gelu
"alias": None
"batch_size": 32
"conv_hidden_size": 32
"decoder_input_size_multiplier": 0.5
"decoder_layers": 1
"drop_last_loader": False
"dropout": 0.05
"early_stop_patience_steps": -1
"encoder_layers": 2
"exclude_insample_y": False
"futr_exog_list": None
"h": 3
"hidden_size": 128
"hist_exog_list": None
"inference_windows_batch_size": 1024
"input_size": 6
"learning_rate": 0.0001
"loss": MAE()
"lr_scheduler": None
"lr_scheduler_kwargs": None
"max_steps": 2
"n_head": 4
"num_lr_decays": -1
"num_workers_loader": 0
"optimizer": None
"optimizer_kwargs": None
"random_seed": 1
"scaler_type": identity
"start_padding_enabled": False
"stat_exog_list": None
"step_size": 1
"val_check_steps": 100
"valid_batch_size": None
"valid_loss": None
"windows_batch_size": 1024random_seed: 1train_trajectories: []valid_trajectories: []optimizer: Noneoptimizer_kwargs: {}lr_scheduler: Nonelr_scheduler_kwargs: {}futr_exog_list: []hist_exog_list: []stat_exog_list: []futr_exog_size: 0hist_exog_size: 0stat_exog_size: 0trainer_kwargs: {'max_steps': 2, 'enable_checkpointing': False}h: 3input_size: 6windows_batch_size: 1024start_padding_enabled: Falsebatch_size: 32valid_batch_size: 32inference_windows_batch_size: 1024learning_rate: 0.0001max_steps: 2num_lr_decays: -1lr_decay_steps: 100000000.0early_stop_patience_steps: -1val_check_steps: 100step_size: 1exclude_insample_y: Falseval_size: 0test_size: 0decompose_forecast: Falsenum_workers_loader: 0drop_last_loader: Falsevalidation_step_outputs: []alias: Nonelabel_len: 3c_out: 1output_attention: Falseenc_in: 1
举例说明 如何构建windows
import pandas as pd
import numpy as npAirPassengers = np.array([112.0, 118.0, 132.0, 129.0, 121.0, 135.0, 148.0, 148.0, 136.0, 119.0],dtype=np.float32,
)AirPassengersDF = pd.DataFrame({"unique_id": np.ones(len(AirPassengers)),"ds": pd.date_range(start="1949-01-01", periods=len(AirPassengers), freq=pd.offsets.MonthEnd()),"y": AirPassengers,}
)Y_df = AirPassengersDF
Y_df = Y_df.reset_index(drop=True)
Y_df.head()
#Model Trainingfrom neuralforecast.core import NeuralForecast
from neuralforecast.models import NBEATShorizon = 3
models = [NBEATS(input_size=2 * horizon, h=horizon, max_steps=2)]nf = NeuralForecast(models=models, freq='M')
nf.fit(df=Y_df)
window_size 是窗口的总大小,它由 input_size 和 h 决定。
9= input_size(6) +h(3)
可以与原数据集对比下,是一个一个的往下移
当移动到 132.0的时候,为了凑齐9行,剩余的用0填充
窗口的形状就是 windows1 shape: torch.Size([4, 9, 2])
window1: tensor([[[112., 1.],[118., 1.],[132., 1.],[129., 1.],[121., 1.],[135., 1.],[148., 1.],[148., 1.],[136., 1.]],[[118., 1.],[132., 1.],[129., 1.],[121., 1.],[135., 1.],[148., 1.],[148., 1.],[136., 1.],[119., 1.]],[[132., 1.],[129., 1.],[121., 1.],[135., 1.],[148., 1.],[148., 1.],[136., 1.],[119., 1.],[ 0., 0.]],[[129., 1.],[121., 1.],[135., 1.],[148., 1.],[148., 1.],[136., 1.],[119., 1.],[ 0., 0.],[ 0., 0.]]])
windows_batch_size
最后由 windows1 shape: torch.Size([4, 9, 2])变成了 indows2 shape: torch.Size([1024, 9, 2])
也就是我们的传参windows_batch_size = 1024
下列举出4个例子,实际是1024个
表示采样了 1024 个窗口,每个窗口大小为9,包含 2 个特征。
....[[118., 1.],[132., 1.],[129., 1.],[121., 1.],[135., 1.],[148., 1.],[148., 1.],[136., 1.],[119., 1.]],[[129., 1.],[121., 1.],[135., 1.],[148., 1.],[148., 1.],[136., 1.],[119., 1.],[ 0., 0.],[ 0., 0.]],[[118., 1.],[132., 1.],[129., 1.],[121., 1.],[135., 1.],[148., 1.],[148., 1.],[136., 1.],[119., 1.]],[[118., 1.],[132., 1.],[129., 1.],[121., 1.],[135., 1.],[148., 1.],[148., 1.],[136., 1.],[119., 1.]],
最终训练时,返回的数据
windows_batch: {'temporal': 1024 个窗口数据, 'temporal_cols': Index(['y', 'available_mask'], dtype='object'), 'static': None, 'static_cols': None}