大家好,又见面了,我是你们的朋友全栈君。
对于PageRank算法,维基百科和网上很多大牛的博客已经讲得很详细了,这里附上一个自己写的PageRank算法C++实现版本:
/*
* Author: YANG Xiangyu
* The Chinese university of Hong Kong
*/
#include<cstdio>
#include<iostream>
#include<fstream>
#include<cstdlib>
#include<string>
#include<algorithm>
using namespace std;
#define MAX 1000000
struct edge //define edge
{
int u;
int v;
}edge[5200000];
int rednum[MAX]={0}; //to mark a point that if it has been visited, and record a new number
int orinum[MAX]={0}; //to record the original number of the new recorded number
int d[MAX]={0}; //to mark the out degree of the point
double ra[MAX]={0}; //to mark the current rank value of the point
double rb[MAX]={0}; //to mark the updated rank value of the point
int cmp(const int &a, const int &b)
{
if(ra[rednum[a]]>ra[rednum[b]])return 1;
return 0;
}
void pagerank()
{
ifstream fin("D:\\web-Google.txt");//If TA want to test my code, please take the text 'web-google.txt' to the D disk.
ofstream fout("D:\\output.txt");
memset(edge,0,sizeof(edge));
string s;
for(int i=0;i<4;++i)
{getline(fin,s);cout<<s<<endl;}//Read the first four lines of the input file
int ncnt=0;
int ecnt=0;
int cnt=0;
double eps=0.1;
double flag;
int i;
for(i=0;fin>>edge[i].u>>edge[i].v;++i)//input the two point of each edge
{
if(!rednum[edge[i].u]) //judge the point whether it has been visited
{
rednum[edge[i].u]=ncnt;//redefine the number of current point
orinum[ncnt++]=edge[i].u;//record the original number of current point
}
if(!rednum[edge[i].v]) //judge the point whether it has been visited
{
rednum[edge[i].v]=ncnt;//redefine the number of current point
orinum[ncnt++]=edge[i].v;//record the original number of current point
}
d[rednum[edge[i].u]]++;
}
ecnt=i;
printf("%d %d\n",ncnt,ecnt);
for(i=0;i<ncnt;++i)
ra[i]=(double)1/ncnt;
while(eps>0.0000001)//set ε=10^(-7), control the number of iterations
{
printf("%d %.7lf\n",cnt,eps);
eps=0;
cnt++;
for(int i=0;i<ecnt;++i)
rb[rednum[edge[i].v]]+=ra[rednum[edge[i].u]]/d[rednum[edge[i].u]]; //first step to initialize the rank value
for(int i=0;i<ncnt;++i)
{
rb[i]=rb[i]*0.8+(0.2*1/ncnt); //add the random jumping coefficient β, and set β=0.8
eps+=ra[i]>rb[i]?(ra[i]-rb[i]):(rb[i]-ra[i]);//compute the Difference between the old rank value and new rank value, and update the ε
ra[i]=rb[i];
rb[i]=0;
}
}
sort(orinum,orinum+ncnt,cmp);//sort the array according to the score
for(int i=0;i<100;++i)
fout<<orinum[i]<<' '<<ra[rednum[orinum[i]]]<<endl;
}
int main()
{
pagerank();
return 0;
}
发布者:全栈程序员栈长,转载请注明出处:https://javaforall.cn/126142.html原文链接:https://javaforall.cn