机器学习—推荐系统

文章来源：企鹅号 - 天善智能

作者：大树2

个人博客：http://www.cnblogs.com/csj007523/

技术架构

核心算法是计算相似度，欧几里得距离公式，排名等。

1. 提供推荐

协作过里

搜集偏好

寻找相近的用户

推荐物品，根据用户相似度推荐，根据物品排名推荐

匹配商品

构建推荐系统

基于物品的过里

使用数据集

基于用户进行过里还是基于物品进行过里

2. 计算用户相似度，欧几里得距离 pearson相关度

3. 计算两个人的相似度，本来是推荐平均评分较高的作品，考虑到两个人的爱好相似程度，对评分根据相似度进行加权平均

from math import sqrt

critics={'dennychen': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,

'tomastang': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5,

'The Night Listener': 3.0},

'alexye': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5,

'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0,

'You, Me and Dupree': 3.5},

'Michaelzhou': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0,

'Superman Returns': 3.5, 'The Night Listener': 4.0},

'josephtcheng': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0,

'The Night Listener': 4.5, 'Superman Returns': 4.0,

'You, Me and Dupree': 2.5},

'antyonywang': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,

'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0,

'You, Me and Dupree': 2.0},

'jackfan': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,

'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5},

'Toby': {'Snakes on a Plane':4.5,'You, Me and Dupree':1.0,'Superman Returns':4.0}}

print(critics['dennychen']['Lady in the Water'])

print(critics['alexye']['Lady in the Water'])

# a ['Lady in the Water', 'Snakes on a Plane', 'Superman Returns', 'You, Me and Dupree', 'The Night Listener']

# sum_of_squares 3.5

import pandas as pd

from math import sqrt

critics={'dennychen': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,

'tomastang': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5,

'The Night Listener': 3.0},

'alexye': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5,

'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0,

'You, Me and Dupree': 3.5},

'Michaelzhou': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0,

'Superman Returns': 3.5, 'The Night Listener': 4.0},

'josephtcheng': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0,

'The Night Listener': 4.5, 'Superman Returns': 4.0,

'You, Me and Dupree': 2.5},

'antyonywang': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,

'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0,

'You, Me and Dupree': 2.0},

'jackfan': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,

'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5},

'Toby': {'Snakes on a Plane':4.5,'You, Me and Dupree':1.0,'Superman Returns':4.0}}

# 欧几里得距离评价,评价2这之间的相似度,值越接近1,相似度越高

def sim_distance(prefs, person1, person2):

si = {}

for item in prefs[person1]:

if item in prefs[person2]:

si[item] = 1

if len(si) == 0:

return 0

a =[item for item in prefs[person1] if item in prefs[person2]]

print('a',a)

sum_of_squares = sum([pow(prefs[person1][item] - prefs[person2][item], 2) for item in prefs[person1] if item in prefs[person2]])

print('sum_of_squares',sum_of_squares)

return 1 / (1 + sqrt(sum_of_squares))

print(sim_distance(critics, 'dennychen', 'Michaelzhou'))

print(sim_distance(critics, 'dennychen', 'alexye'))

sim_pearson(critics, 'dennychen', 'alexye')

import pandas as pd

from math import sqrt

critics={'dennychen': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,

'tomastang': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5,

'The Night Listener': 3.0},

'alexye': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5,

'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0,

'You, Me and Dupree': 3.5},

'Michaelzhou': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0,

'Superman Returns': 3.5, 'The Night Listener': 4.0},

'josephtcheng': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0,

'The Night Listener': 4.5, 'Superman Returns': 4.0,

'You, Me and Dupree': 2.5},

'antyonywang': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,

'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0,

'You, Me and Dupree': 2.0},

'jackfan': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,

'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5},

'Toby': {'Snakes on a Plane':4.5,'You, Me and Dupree':1.0,'Superman Returns':4.0}}

# 欧几里得距离评价,评价2这之间的相似度,值越接近1,相似度越高

def sim_distance(prefs, person1, person2):

si = {}

for item in prefs[person1]:

if item in prefs[person2]:

si[item] = 1

if len(si) == 0:

return 0

a =[item for item in prefs[person1] if item in prefs[person2]]

print('a',a)

sum_of_squares = sum([pow(prefs[person1][item] - prefs[person2][item], 2) for item in prefs[person1] if item in prefs[person2]])

print('sum_of_squares',sum_of_squares)

return 1 / (1 + sqrt(sum_of_squares))

# 皮尔逊相关度评价

def sim_pearson(prefs, person1, person2):

# 得到两者评价过的相同商品

si = {}

for item in prefs[person1]:

if item in prefs[person2]:

si[item] = 1

n = len(si)

# 如果两个用户之间没有相似之处则返回1

if n == 0:

return 1

# 对各自的所有偏好求和

sum1 = sum([prefs[person1][item] for item in si])

sum2 = sum([prefs[person2][item] for item in si])

# 求各自的平方和

sum1_square = sum([pow(prefs[person1][item], 2) for item in si])

sum2_square = sum([pow(prefs[person2][item], 2) for item in si])

# 求各自的乘积的平方

sum_square = sum([prefs[person1][item] * prefs[person2][item] for item in si])

# 计算pearson相关系数

den = sqrt((sum1_square - pow(sum1, 2) / n) * (sum2_square - pow(sum2, 2) / n))

if den == 0:

return 0

return (sum_square - (sum1 * sum2/n)) / den

def topMatches(prefs, person, n = 5, simlarity = sim_pearson):

scores = [(simlarity(prefs, person, other), other) for other in prefs if other != person]

# 对列表进行排序，评价高者排在前面

scores.sort()

print('scores:',scores)

scores.reverse()

# 取指定个数的（不需要判断n的大小，因为python中的元组可以接受正、负不在范围内的index）

return scores[0:n]

# 利用其他所有人的加权平均给用户推荐

def get_recommendations(prefs, person, similarity=sim_pearson):

# 其他用户对某个电影的评分加权之后的总和

totals = {}

# 其他用户的相似度之和

sim_sums = {}

for other in prefs:

# 不和自己比较

if other == person:

continue

# 求出相似度

sim = similarity(prefs, person, other)

# 忽略相似度小于等于情况0的

if sim

continue

# 获取other所有的评价过的电影评分的加权值

for item in prefs[other]:

# 只推荐用户没看过的电影

if item not in prefs[person] or prefs[person][item] == 0:

#print item

# 设置默认值

totals.setdefault(item, 0)

# 求出该电影的加权之后的分数之和

totals[item] += prefs[other][item] * sim

# 求出各个用户的相似度之和

sim_sums.setdefault(item, 0)

sim_sums[item] += sim

# 对于加权之后的分数之和取平均值

rankings = [(total / sim_sums[item], item) for item, total in totals.items()]

# 返回经过排序之后的列表

rankings.sort()

rankings.reverse()

return rankings

sim_distance(critics, 'dennychen', 'Michaelzhou')

# sim_pearson(critics, 'Lisa Rose', 'Gene Seymour')

topMatches(critics, 'dennychen', n = 3)

# get_recommendations(critics, 'Toby')

# get_recommendations(critics,'Toby', similarity=sim_distance)

发表于: 2018-03-012018-03-01 17:00:57
原文链接：http://kuaibao.qq.com/s/20180301A0WNUW00?refer=cp_1026
腾讯「腾讯云开发者社区」是腾讯内容开放平台帐号（企鹅号）传播渠道之一，根据《腾讯内容开放平台服务协议》转载发布内容。
如有侵权，请联系 cloudcommunity@tencent.com 删除。

扫码

添加站长进交流群

领取专属 10元无门槛券

私享最新 技术干货

机器学习—推荐系统

相关快讯

扫码

社区

活动

资源

关于

腾讯云开发者

热门产品

热门推荐

更多推荐