数据挖掘R_Python_ML(3): 用R实现一个简易推荐系统

uid,m1, m2, m3, m4, m5, m6, m7, m8, m9, m10

1,5,3,0,4,0,0,1,2,4,4

2,3,1,2,0,0,2,0,0,1,2

3,4,0,1,2,1,0,1,2,4,0

4,0,2,3,0,0,4,2,1,1,2

5,1,3,4,1,2,0,0,0,0,2

6,2,0,0,0,2,3,5,1,4,0

7,4,1,1,0,2,0,1,0,0,3

8,1,2,0,3,0,0,2,1,0,3

9,3,1,0,3,0,4,0,0,1,0

10,2,1,3,1,0,1,0,0,2,1

> rownum = nrow(mydata);rownum

[1] 10

> colnum = ncol(mydata);colnum

[1] 11

> install.packages("lsa")

> library(lsa)

> cossimilar<-function(ui,uj) {

+ indexlist <-list()

+ for (i in 2:colnum){

+ if (mydata[ui,i]> 0 && mydata[uj,i] > 0) {

+ indexlist <-c(indexlist,i)

+ }

+ }

+ indexlen =length(indexlist)

+ if (indexlen < (colnum - 1) * 0.3) {

+ 0

+ } else {

+ tmpui <-vector(length = indexlen)

+ tmpuj <-vector(length = indexlen)

+ for (i in 1:indexlen) {

+ tmpui[i] = mydata[ui,indexlist[[i]]]

+ tmpuj[i] =mydata[uj,indexlist[[i]]]

+ }

+ cosine(tmpui,tmpuj)

+ }

+ }

NOTE：此处还有一个小tricky：如果两人共同评价过的电影少于总电影数的30%，则认为两人毫无共同点，余弦距离为0。这个30%是我们认为设定的阈值，可以根据具体数据进行调整。

> cosdata <- matrix(nrow = rownum, ncol = rownum)

> for (i in 1:(rownum-1)) {

+ for (j in (i+1):rownum) {

+ cosdata[i,j] <-cossimilar(i,j)

+ }

+ }

> cosdata

> rdata <- matrix(nrow = rownum, ncol = (colnum -1))

> for (i in 1:rownum) {

+ for (j in2:colnum) {

+ if (mydata[i,j] ==0) {

+ predictedV = 0

+ simnum = 0

+ if (i > 1) {

+ for (k in 1:(i-1)) {

+ predictedV =predictedV + cosdata[k,i] * mydata[k,j]

+ if (mydata[k,j]> 0) {

+ simnum = simnum + 1

+ }

+ }

+ }

+ if (i <rownum) {

+ for (k in(i+1):rownum) {

+ predictedV =predictedV + cosdata[i,k] * mydata[k,j]

+ if (mydata[k,j]> 0) {

+ simnum = simnum + 1

+ }

+ }

+ }

+ rdata[i,j-1] =predictedV / simnum

+ }

+ }

+ }

> rdata

> for (i in 1:rownum) {

+ cat("Recommendation to User", i, " products: ")

+ for (j in 1: (colnum- 1)) {

+ if(!is.na(rdata[i,j])) {

+ if (rdata[i,j]> 2) {

+ cat( j, ", ")

+ }

+ }

+ }

+ cat("\n")

+ }

Recommendation to User 1 products : 3 , 6 ,

Recommendation to User 2 products : 4 ,

Recommendation to User 3 products : 6 ,

Recommendation to User 4 products : 1 , 4 ,

Recommendation to User 5 products :

Recommendation to User 6 products :

Recommendation to User 7 products :

Recommendation to User 8 products : 6 , 9 ,

Recommendation to User 9 products :

Recommendation to User 10 products :

0 条评论

相关文章

2.4K90

16040

16890

29260

34530

33540

51760

强化学习（Reinforcement Learning）应用于量化投资系列专题（一）——在交易中的应用

1.1K100

【新智元导读】一项新的研究旨在使用生成对抗网络（GAN） 来加快密码破解的速度。斯蒂文斯理工学院的研究人员用类似“AlphaGo”的方法，利用超过 4300 万...

34960

基于电子海图的水面无人艇全局路径规划

该论文已经在ICMIR2017会议上发表，附上springer的文献地址 Research and Implementation of Global Pat...

32250