我尝试使用aws.alexa包来检索有关urls列表的交通信息,但每当我尝试使用url_info()函数时,就会弹出以下错误:
url_info("http://www.google.com")
>Request ID: 1e7cdXXXXXXXXXXXa8-c0700a4eXXXX
>Response Status: Success
>Error in urlinfo_payload[[2]][[1]][[2]] : subscript out of bounds每当我尝试另一个函数时,错误是不同的:
traffic <- traffic_history("http://www.google.com")
>Error in as.Date.default(date, "%Y%m%d") :
>do not know how to convert 'date' to class “Date” browse_categories(path="Top/Arts")谢谢。
发布于 2017-05-01 03:59:13
我是包的维护者。Thomas Leeper给我指出了这个。
首先,我要道歉。traffic_history是个bug。它应该在测试中被捕获,但不知何故单元测试没有运行。最新的开发版本解决了这个问题。它应该很快就会出现在CRAN超级上:
devtools::install_github("cloudyr/aws.alexa")
library(aws.alexa)
set_secret_key("XXXXX", "XXXXX")
traffic <- traffic_history("http://www.google.com")
head(traffic) `date page_views_per_million page_views_per_user rank reach_per_million 1 2017-03-29 87520 8.68 1 439900 2 2017-03-30 87640 8.54 1 443300 3 2017-03-31 83900 8.23 1 431900` 我不能为url_info复制这个问题。如果系统仍然给出错误,您是否可以再试一次并联系。
发布于 2017-05-03 15:53:57
很抱歉回答得太晚了,现在我将发布我通过只使用url_info函数的一部分来获取信息的脚本。我知道这不会很清楚。如果我只使用这个函数,错误仍然存在。
library(rvest)
library(aws.alexa)
library(aws.signature)
library(httr)
library(XML)
require(plyr)
library(dplyr)
library(ggplot2)
#first run: setSecretKey, trafficHistory, alexa_GET
url <- read_html("http://www.lombardiapress.it/lombardiapress/portale/index.php?s=3#")
newsP <- url %>%
html_nodes(xpath='//*[(@id = "divBergamo") or (@id = "divBrescia") or (@id = "divComo") or (@id = "divCremona") or (@id = "divLecco")
or (@id = "divLodi") or (@id = "divMantova") or (@id = "divMilano") or (@id = "divMonza") or (@id = "divPavia") or (@id = "divSondrio")
or (@id = "divVarese")]//a')
df <- bind_rows(lapply(xml_attrs(newsP), function(x) data.frame(as.list(x), stringsAsFactors=FALSE)))
head(df)
df <- df[!duplicated(df$href),]
df <- df[!is.na(df$href),]
set_secret_key(key="XXXXXXXXXXXZP4FQ", secret="XXXXXXXXXXXXXXXXXXXXXXXXX+dlpdqip")
avg <- data.frame(pageviewsPerMillion = numeric(), pageViewsPerUser = numeric(), Rank = numeric(),
reachPerMillion =numeric(), stringsAsFactors=FALSE)
for(i in 1:nrow(df)){
query <- list(Action = "TrafficHistory", Url = df[i,2], ResponseGroup="History", range=15, start="20170201")
traffic_payload <- alexa_GET(query)
res_list <- lapply(lapply(traffic_payload[[2]][[1]], "[[", 4)[[1]], unlist) #Had to change the indexing here cause it wasn't pointing to the right node (I think this was the issue)
db <- rbind.fill(lapply(res_list,function(y){as.data.frame(t(y),stringsAsFactors=FALSE)}))
db <- db[,-1]
#db <- as.data.frame(t(data.frame(res_list,stringsAsFactors=FALSE)))
db <- lapply(db, as.numeric)
url[i] <- df[i,2]
avg[i,] <- data.frame(pageviewsPerMillion = mean(db$PageViews.PerMillion, na.rm = TRUE), pageViewsPerUser = mean(db$PageViews.PerUser, na.rm = TRUE), Rank = mean(db$Rank, na.rm = TRUE),
reachPerMillion = mean(db$Reach.PerMillion, na.rm = TRUE))
}
avg$url <- unlist(url)
avg <- avg[!is.na(avg$pageviewsPerMillion),]
setwd("~/R projects/Alexa rank")
write.csv2(avg, "Ranking_Testate_Lombardia.csv")
ggplot() + geom_col(data=avg, aes(reorder(url, pageviewsPerMillion), pageviewsPerMillion)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 20), legend.position = "none")
ggplot() + geom_col(data=avg, aes(reorder(url, pageViewsPerUser), pageViewsPerUser)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 20), legend.position = "none")
ggplot() + geom_col(data=avg, aes(reorder(url, 1/Rank), 1/Rank)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 20), legend.position = "none")
ggplot() + geom_col(data=avg, aes(reorder(url, reachPerMillion), reachPerMillion)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 20), legend.position = "none")https://stackoverflow.com/questions/43678315
复制相似问题