|
import numpy as np
|
|
import pandas as pd
|
|
|
|
|
|
def get_sequences(data: pd.DataFrame,
|
|
target_col: str,
|
|
time_steps: int = 10) -> tuple[np.ndarray, np.ndarray]:
|
|
|
|
feature_sequences = []
|
|
targets = []
|
|
|
|
for i in range(time_steps, len(data)):
|
|
features_sequence = data.iloc[i - time_steps:i, :]
|
|
target = data[target_col].iloc[i]
|
|
feature_sequences.append(features_sequence)
|
|
targets.append(target)
|
|
|
|
|
|
feature_sequences = np.array(feature_sequences)
|
|
targets = np.array(targets)
|
|
targets = targets.reshape(targets.shape[0], 1)
|
|
|
|
return feature_sequences, targets
|
|
|
|
|
|
def tts_last_n(feature_sequences, target, n: int = 365) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
|
|
|
train_sequences = feature_sequences[:-n]
|
|
test_sequences = feature_sequences[-n:]
|
|
train_target = target[:-n]
|
|
test_target = target[-n:]
|
|
|
|
return train_sequences, test_sequences, train_target, test_target
|
|
|