对于如下的数据集:
画图的代码如下:
#coding:UTF-8
'''
Date:20160423
@author: zhaozhiyong
'''
from pylab import *
f =open("data.txt")
x = []
y = []
for line in f.readlines():
lines = line.strip().split("\t")
if len(lines) == 3:
x.append(float(lines[1]))
y.append(float(lines[2]))
f.close()
plot(x,y,".")
plt.title("data")
show()
利用最小二乘法求得的结果为: [[ 3.00774324] [ 1.69532264]]
代码如下:
#coding:UTF-8
'''
Date:20160423
@author: zhaozhiyong
'''
from numpy import *
def load_data():
f = open("data.txt")
x = []
y = []
for line in f.readlines():
lines = line.strip().split("\t")
x_tmp = []
if len(lines) == 3:
x_tmp.append(float(lines[0]))
x_tmp.append(float(lines[1]))
y.append(float(lines[2]))
x.append(x_tmp)
f.close()
return mat(x), mat(y).T
def lr(x, y):
if linalg.det(x.T * x) != 0:
return ((x.T * x)**(-1) * (x.T) * y)
if __name__ == "__main__":
x, y = load_data()
#核心的最小二乘
w = lr(x,y)
print w
最终的图形如下:
回归的结果如下:
程序代码如下:
#coding:UTF-8
'''
Date:20160423
@author: zhaozhiyong
'''
from numpy import *
def sgd(n, p):
f = open("data.txt")
w = mat(zeros((1, n)))#初始化
for line in f.readlines():
lines = line.strip().split("\t")
x_tmp = []
y = 0.0
if len(lines) == 3:
x_tmp.append(float(lines[0]))
x_tmp.append(float(lines[1]))
y = float(lines[2])
x = mat(x_tmp).T
w = w - p * (w * x - y) * x.T
f.close()
return w
if __name__ == "__main__":
w = sgd(2, 0.1)
print w