LSTM 是一种特殊的 RNN,设计用来解决长序列数据中的梯度消失问题。
# 简单RNN单元实现
class SimpleRNNCell:
def __init__(self, input_size, hidden_size):
self.Wxh = np.random.randn(hidden_size, input_size)*0.01 # 输入到隐藏层权重
self.Whh = np.random.randn(hidden_size, hidden_size)*0.01 # 隐藏层到隐藏层权重
self.bh = np.zeros((hidden_size, 1)) # 隐藏层偏置
def forward(self, x, h_prev):
h_next = np.tanh(np.dot(self.Wxh, x) + np.dot(self.Whh, h_prev) + self.bh)
return h_next
参数类型 | 计算方式 | 参数量公式 |
---|---|---|
输入权重矩阵 | W_xi, W_xf, W_xo, W_xc | 4 hidden_size input_size |
循环权重矩阵 | W_hi, W_hf, W_ho, W_hc | 4 hidden_size hidden_size |
偏置项 | b_i, b_f, b_o, b_c | 4 * hidden_size |
总参数量 | 4(input_size + hidden_size + 1)hidden_size |
使用雅虎财经AAPL股票数据(2010-2023)
import yfinance as yf
data = yf.download('AAPL', start='2010-01-01', end='2023-12-31')
features = data[['Open', 'High', 'Low', 'Close', 'Volume']].values
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(features)
# 创建时间序列样本
def create_dataset(data, look_back=60):
X, y = [], []
for i in range(len(data)-look_back-1):
X.append(data[i:(i+look_back)])
y.append(data[i+look_back, 3]) # 预测Close价格
return np.array(X), np.array(y)
X, y = create_dataset(scaled_data)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
model = Sequential([
LSTM(128, return_sequences=True, input_shape=(X.shape[1], X.shape[2])),
Dropout(0.3),
LSTM(64, return_sequences=False),
Dropout(0.3),
Dense(32, activation='relu'),
Dense(1)
])
model.compile(optimizer='adam', loss='mean_squared_error')
history = model.fit(
X_train, y_train,
epochs=100,
batch_size=32,
validation_split=0.2,
callbacks=[
EarlyStopping(monitor='val_loss', patience=10),
ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5)
]
)
from tensorflow.keras.layers import Bidirectional
Bidirectional(LSTM(64, return_sequences=True))
$$
ft = \sigma(W_f \cdot [C{t-1}, h_{t-1}, x_t] + b_f)
$$
参数 | 推荐范围 | 优化方法 |
---|---|---|
隐藏单元数 | 64-512 | 贝叶斯优化 |
学习率 | 1e-4 - 1e-2 | 学习率衰减 |
Dropout率 | 0.2-0.5 | 网格搜索 |
批大小 | 32-256 | 逐步倍增法 |
from tensorflow.keras.layers import Embedding
# 文本生成模型
model = Sequential([
Embedding(vocab_size, 256),
LSTM(1024, return_sequences=True),
LSTM(512),
Dense(vocab_size, activation='softmax')
])
# 完整的时间序列预测代码
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
# 数据准备
def load_data(file_path, look_back=60):
# 实现数据加载和预处理
pass
# 构建LSTM模型
def build_lstm_model(input_shape):
model = tf.keras.Sequential([
tf.keras.layers.LSTM(128, return_sequences=True, input_shape=input_shape),
tf.keras.layers.Dropout(0.3),
tf.keras.layers.LSTM(64),
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(1)
])
model.compile(optimizer='adam', loss='mse')
return model
# 模型训练与评估
if __name__ == "__main__":
X_train, y_train, X_test, y_test = load_data('stock_data.csv')
model = build_lstm_model((X_train.shape[1], X_train.shape[2]))
history = model.fit(X_train, y_train, epochs=100, validation_split=0.2)
predictions = model.predict(X_test)
# 实现评估指标计算和可视化
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。