# 在 Python 中使用线性回归预测数据

http://python.jobbole.com/81215/

1) 预测房子价格

2) 预测下周哪个电视节目会有更多的观众

3) 替换数据集中的缺失值

1) 预测房子价格

# Required Packages

importmatplotlib.pyplotasplt

importnumpyasnp

importpandasaspd

fromsklearnimportdatasets,linear_model

# Function to get data

defget_data(file_name):

X_parameter=[]

Y_parameter=[]

forsingle_square_feet,single_price_valueinzip(data['square_feet'],data['price']):

X_parameter.append([float(single_square_feet)])

Y_parameter.append(float(single_price_value))

returnX_parameter,Y_parameter

[[150.0],[200.0],[250.0],[300.0],[350.0],[400.0],[600.0]]

[6450.0,7450.0,8450.0,9450.0,11450.0,15450.0,18450.0]

[Finishedin0.7s]

# Function for Fitting our data to Linear model

deflinear_model_main(X_parameters,Y_parameters,predict_value):

# Create linear regression object

regr=linear_model.LinearRegression()

regr.fit(X_parameters,Y_parameters)

predict_outcome=regr.predict(predict_value)

predictions={}

predictions['intercept']=regr.intercept_

predictions['coefficient']=regr.coef_

predictions['predicted_value']=predict_outcome

returnpredictions

X,Y=get_data('input_data.csv')

predictvalue=700

result=linear_model_main(X,Y,predictvalue)

print"Intercept value ",result['intercept']

print"coefficient",result['coefficient']

print"Predicted value: ",result['predicted_value']

# Function to show the resutls of linear fit model

defshow_linear_line(X_parameters,Y_parameters):

# Create linear regression object

regr=linear_model.LinearRegression()

regr.fit(X_parameters,Y_parameters)

plt.scatter(X_parameters,Y_parameters,color='blue')

plt.plot(X_parameters,regr.predict(X_parameters),color='red',linewidth=4)

plt.xticks(())

plt.yticks(())

plt.show()

show_linear_line(X,Y)

2)预测下周哪个电视节目会有更多的观众

# Required Packages

importcsv

importsys

importmatplotlib.pyplotasplt

importnumpyasnp

importpandasaspd

fromsklearnimportdatasets,linear_model

# Function to get data

defget_data(file_name):

flash_x_parameter=[]

flash_y_parameter=[]

arrow_x_parameter=[]

arrow_y_parameter=[]

forx1,y1,x2,y2inzip(data['flash_episode_number'],data['flash_us_viewers'],data['arrow_episode_number'],data['arrow_us_viewers']):

flash_x_parameter.append([float(x1)])

flash_y_parameter.append(float(y1))

arrow_x_parameter.append([float(x2)])

arrow_y_parameter.append(float(y2))

returnflash_x_parameter,flash_y_parameter,arrow_x_parameter,arrow_y_parameter

# Function to know which Tv show will have more viewers

defmore_viewers(x1,y1,x2,y2):

regr1=linear_model.LinearRegression()

regr1.fit(x1,y1)

predicted_value1=regr1.predict(9)

printpredicted_value1

regr2=linear_model.LinearRegression()

regr2.fit(x2,y2)

predicted_value2=regr2.predict(9)

#print predicted_value1

#print predicted_value2

ifpredicted_value1>predicted_value2:

print"The Flash Tv Show will have more viewers for next week"

else:

print"Arrow Tv Show will have more viewers for next week"

# Required Packages

importcsv

importsys

importmatplotlib.pyplotasplt

importnumpyasnp

importpandasaspd

fromsklearnimportdatasets,linear_model

# Function to get data

defget_data(file_name):

flash_x_parameter=[]

flash_y_parameter=[]

arrow_x_parameter=[]

arrow_y_parameter=[]

forx1,y1,x2,y2inzip(data['flash_episode_number'],data['flash_us_viewers'],data['arrow_episode_number'],data['arrow_us_viewers']):

flash_x_parameter.append([float(x1)])

flash_y_parameter.append(float(y1))

arrow_x_parameter.append([float(x2)])

arrow_y_parameter.append(float(y2))

returnflash_x_parameter,flash_y_parameter,arrow_x_parameter,arrow_y_parameter

# Function to know which Tv show will have more viewers

defmore_viewers(x1,y1,x2,y2):

regr1=linear_model.LinearRegression()

regr1.fit(x1,y1)

predicted_value1=regr1.predict(9)

printpredicted_value1

regr2=linear_model.LinearRegression()

regr2.fit(x2,y2)

predicted_value2=regr2.predict(9)

#print predicted_value1

#print predicted_value2

ifpredicted_value1>predicted_value2:

print"The Flash Tv Show will have more viewers for next week"

else:

print"Arrow Tv Show will have more viewers for next week"

x1,y1,x2,y2=get_data('input_data.csv')

#print x1,y1,x2,y2

more_viewers(x1,y1,x2,y2)

3) 替换数据集中的缺失值

• 发表于:
• 原文链接：http://kuaibao.qq.com/s/20171228B0XRCP00?refer=cp_1026
• 腾讯「云+社区」是腾讯内容开放平台帐号（企鹅号）传播渠道之一，根据《腾讯内容开放平台服务协议》转载发布内容。

2019-03-23

2019-03-23

2018-06-19

2019-03-23

2019-03-23

2019-03-23

2019-03-23

2019-03-23