TVP

# 数学推导＋纯Python实现机器学习算法1：线性回归

importnumpyaspd

deflinear_loss(X, y, w, b): num_train = X.shape[] num_feature = X.shape[1]

# 模型公式y_hat = np.dot(X, w) + b

# 损失函数loss = np.sum((y_hat-y)**2)/num_train

# 参数的偏导dw = np.dot(X.T, (y_hat-y)) /num_train db = np.sum((y_hat-y)) /num_train

returny_hat, loss, dw, db

definitialize_params(dims): w = np.zeros((dims,1)) b =returnw, b

deflinar_train(X, y, learning_rate, epochs): w, b = initialize(X.shape[1])

loss_list = []

foriinrange(1, epochs):

# 计算当前预测值、损失和参数偏导y_hat, loss, dw, db = linar_loss(X, y, w, b)

loss_list.append(loss)

# 基于梯度下降的参数更新过程w += -learning_rate * dw b += -learning_rate * db

# 打印迭代次数和损失ifi %10000==: print('epoch %d loss %f'% (i, loss))

# 保存参数params = {

'w': w,

'b': b }

'dw': dw,

'db': db }

fromsklearn.utilsimportshufflediabetes = load_diabetes()data = diabetes.datatarget = diabetes.target# 打乱数据

X, y = shuffle(data, target, random_state=13)X = X.astype(np.float32)

# 训练集与测试集的简单划分

offset = int(X.shape[] *0.9)X_train, y_train = X[:offset], y[:offset]X_test, y_test = X[offset:], y[offset:]y_train = y_train.reshape((-1,1))y_test = y_test.reshape((-1,1))print('X_train=', X_train.shape)print('X_test=', X_test.shape)print('y_train=', y_train.shape)print('y_test=', y_test.shape)

loss_list, loss, params, grads = linar_train(X_train, y_train,0.001,100000)

print(params)

defpredict(X, params): w = params['w'] b = params['b'] y_pred = np.dot(X, w) + b

returny_predy_pred = predict(X_test, params)y_pred[:5]

importmatplotlib.pyplotaspltf = X_test.dot(params['w']) + params['b']plt.scatter(range(X_test.shape[]), y_test)plt.plot(f, color ='darkorange')plt.xlabel('X')plt.ylabel('y')plt.show()

plt.plot(loss_list, color ='blue')plt.xlabel('epochs')plt.ylabel('loss')plt.show()

importnumpyasnp

fromsklearn.utilsimportshuffle

classlr_model():

def__init__(self):

passdefprepare_data(self): data = load_diabetes().data target = load_diabetes().target X, y = shuffle(data, target, random_state=42) X = X.astype(np.float32) y = y.reshape((-1,1)) data = np.concatenate((X, y), axis=1)

returndata

definitialize_params(self, dims): w = np.zeros((dims,1)) b =returnw, b

deflinear_loss(self, X, y, w, b): num_train = X.shape[] num_feature = X.shape[1] y_hat = np.dot(X, w) + b loss = np.sum((y_hat-y)**2) / num_train dw = np.dot(X.T, (y_hat - y)) / num_train db = np.sum((y_hat - y)) / num_train

returny_hat, loss, dw, db

deflinear_train(self, X, y, learning_rate, epochs): w, b = self.initialize_params(X.shape[1])

foriinrange(1, epochs): y_hat, loss, dw, db = self.linear_loss(X, y, w, b) w += -learning_rate * dw b += -learning_rate * db

ifi %10000==: print('epoch %d loss %f'% (i, loss))

params = {

'w': w,

'b': b } grads = {

'dw': dw,

'db': db }

defpredict(self, X, params): w = params['w'] b = params['b'] y_pred = np.dot(X, w) + b

returny_pred

deflinear_cross_validation(self, data, k, randomize=True):

ifrandomize: data = list(data) shuffle(data) slices = [data[i::k]foriinrange(k)]

foriinrange(k): validation = slices[i] train = [data

forsinslicesifsisnotvalidationfordatains] train = np.array(train) validation = np.array(validation)

yieldtrain, validation

if__name__ =='__main__': lr = lr_model() data = lr.prepare_data()

fortrain, validationinlr.linear_cross_validation(data,5): X_train = train[:, :10] y_train = train[:, -1].reshape((-1,1)) X_valid = validation[:, :10] y_valid = validation[:, -1].reshape((-1,1)) loss5 = [] loss, params, grads = lr.linear_train(X_train, y_train,0.001,100000) loss5.append(loss) score = np.mean(loss5) print('five kold cross validation score is', score) y_pred = lr.predict(X_valid, params) valid_score = np.sum(((y_pred - y_valid) **2)) / len(X_valid) print('valid score is', valid_score)

• 发表于:
• 原文链接https://kuaibao.qq.com/s/20180830B09TNI00?refer=cp_1026
• 腾讯「腾讯云开发者社区」是腾讯内容开放平台帐号（企鹅号）传播渠道之一，根据《腾讯内容开放平台服务协议》转载发布内容。
• 如有侵权，请联系 cloudcommunity@tencent.com 删除。

2022-12-09

2022-12-09

2022-12-09

2022-12-09

2022-12-09

2022-12-09

2022-12-09

2022-12-09

2022-12-09

2022-12-09

2022-12-09