# python 单变量线性回归

## 单变量线性回归(Linear Regression with One Variable)¶

In [54]:

```#初始化工作
import random
import numpy as np
import matplotlib.pyplot as plt

# This is a bit of magic to make matplotlib figures appear inline in the notebook
# rather than in a new window.
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# Some more magic so that the notebook will reload external python modules;

### 1、加载数据与可视化¶

In [55]:

```print('Plotting Data ...')

data = []
with open(filename, 'r') as f:
line = line.split(',')
current = [float(item) for item in line]
#5.5277,9.1302
data.append(current)
return data

data = np.array(data)
print(data.shape)

x = data[:, 0]; y = data[:,1]
m = data.shape[0]
#number of training examples
plt.plot(x,y,'rx')
plt.ylabel('Profit in \$10,000s');
plt.xlabel('Population of City in 10,000s');
plt.title("Training data")```
```Plotting Data ...
(97, 2)```

Out[55]:

`<matplotlib.text.Text at 0x2e663d888d0>`

## 2、通过梯度下降求解theta¶

In [56]:

```x = x.reshape(-1,1)
# 添加一列1
X = np.hstack([x,np.ones((x.shape[0], 1))])
theta = np.zeros((2, 1))
y = y.reshape(-1,1)

#计算损失
def computeCost(X, y, theta):
m = y.shape[0]
J = (np.sum((X.dot(theta) - y)**2)) / (2*m)
#X (m,2) theta (2,1) = m*1
return J

#梯度下降
def gradientDescent(X, y, theta, alpha, num_iters):
m = y.shape[0]
# 存储历史误差
J_history = np.zeros((num_iters, 1))

for iter in range(num_iters):
# 对J求导，得到 alpha/m * (WX - Y)*x(i)，
theta = theta - ( alpha/m) * X.T.dot(X.dot(theta) - y)
J_history[iter] = computeCost(X, y, theta)
return J_history,theta

iterations = 1500  #迭代次数
alpha = 0.01    #学习率
j = computeCost(X,y,theta)
J_history,theta = gradientDescent(X, y, theta, alpha, iterations)
print('Theta found by gradient descent: %f %f'%(theta[0][0],theta[1][0]))
plt.plot(J_history)
plt.ylabel('lost');
plt.xlabel('iter count')```
`Theta found by gradient descent: 1.166362 -3.630291`

Out[56]:

`<matplotlib.text.Text at 0x2e661194ac8>`

### 3、训练结果可视化¶

In [57]:

```#number of training examples
plt.plot(data[:,0],data[:,1],'rx')
plt.plot(X[:,0], X.dot(theta), '-')
plt.ylabel('Profit in \$10,000s');
plt.xlabel('Population of City in 10,000s');
plt.title("Training data")```

Out[57]:

`<matplotlib.text.Text at 0x2e662155198>`

### 4、可视化 J(theta_0, theta_1)¶

In [75]:

```from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter

theta0_vals = np.linspace(-10, 10, 100)
theta1_vals = np.linspace(-10, 10, 100)

J_vals = np.zeros((theta0_vals.shape[0], theta1_vals.shape[0]));

# 填充J_vals
for i in range(theta0_vals.shape[0]):
for j in range(theta1_vals.shape[0]):
t = [theta0_vals[i],theta1_vals[j]]
J_vals[i,j] = computeCost(X, y, t)

fig = plt.figure()
ax = fig.gca(projection='3d')

theta0_vals, theta1_vals = np.meshgrid(theta0_vals, theta1_vals)
# Plot the surface.
surf = ax.plot_surface(theta0_vals, theta1_vals, J_vals, cmap=cm.coolwarm,
linewidth=0, antialiased=False)

# 定制Z轴.
ax.zaxis.set_major_locator(LinearLocator(10))
ax.zaxis.set_major_formatter(FormatStrFormatter('%d'))

# Add a color bar which maps values to colors.
fig.colorbar(surf, shrink=0.5, aspect=5)

plt.show()```

In [ ]:

102 篇文章37 人订阅

0 条评论

## 相关文章

### 【ECCV2018】24篇论文代码实现

【导读】计算机视觉领域的顶级会议ECCV2018于9月8日在德国慕尼黑举办，前两天是workshop日程。在主会议正式开幕之前，让我们先来看看24位ECCV20...

1.8K4

### 与数据挖掘有关或有帮助的R包和函数的集合

rpart，party，randomForest，rpartOrdinal，tree，marginTree，

793

3557

7714

3555

### python 特征选择①

VarianceThreshold 是特征选择中的一项基本方法。它会移除所有方差不满足阈值的特征。默认设置下，它将移除所有方差为0的特征，即那些在所有样本中数值...

682

711

21610

2786

2559