感谢曾老师又是新一期的学徒作业~
作业灵感来源如下:
生物信息学权威期刊Bioinformatics发文量最大的是谁 (qq.com)
第一作者发表在Bioinformatics杂志上论文数top10 (qq.com)
作业内容:
同样的统计,更换一下杂志,好~
library(pubmedR)
api_key = NULL
# pubmed一次只能下载小于1万条记录,这里分成两份
query = '("Bioinformatics (Oxford, England)"[Journal]) AND (("1998"[Date - Publication] : "2014"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Bioinformatics (Oxford, England)"[Journal]) AND (("2015"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D2 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
m1 = pmApi2df(D1)
m2 = pmApi2df(D2)
m = rbind(m1, m2)
df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
sort(table(fa))
代码目的:
总结《Bioinformatics》杂志自创刊以来所有文章的一作,并对作者按照发文数量排序。
同样的条件,我们看一下其他顶刊究竟是谁发文最多吧
嘻嘻嘻,顺便还统计了期刊的发文量
先从CNS开始~
1974年创刊,创刊49年
截止检索,共2万多篇文章
排名作者,代码如下~
#"Cell"[Journal]
rm(list = ls())
library(pubmedR)
api_key = NULL
# pubmed一次只能下载小于1万条记录,这里分成两份
query = '("Cell"[Journal]) AND (("1974"[Date - Publication] : "1996"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Cell"[Journal]) AND (("1997"[Date - Publication] : "2015"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D2 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Cell"[Journal]) AND (("2016"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D3 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
m1 = pmApi2df(D1)
m2 = pmApi2df(D2)
m3 = pmApi2df(D3)
m = rbind(m1, m2,m3)
df=m
df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
save(fa,file = "Cell.Rdata")
#load(file="Cell.Rdata")
head(sort(table(fa),decreasing = T),15)
# fa
# HUNTER, T BONETTA, LAURA
# 76 15 11
# LAWRENCE, P A LEWIN, B SPIEGELMAN, B M
# 10 10 10
# WEINTRAUB, H ORCI, L KOZAK, M
# 10 9 8
# VALE, R D CLEVELAND, D W FUCHS, E
# 8 7 7
# HENIKOFF, S KMIEC, E B LAZARIDES, E
# 7 7 7
运行结果
检索前3, 看大佬从事的领域
image.png
image.png
image.png
1880年创刊,共计143年
到目前,有18万文章
代码如下:
略长..
#"Science (New York, N.Y.)"[Journal]
rm(list = ls())
library(pubmedR)
api_key = NULL
# pubmed一次只能下载小于1万条记录
query = '("Science (New York, N.Y.)"[Journal]) AND (("1880"[Date - Publication] : "1893"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Science (New York, N.Y.)"[Journal]) AND (("1894"[Date - Publication] : "1911"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D2 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Science (New York, N.Y.)"[Journal]) AND (("1912"[Date - Publication] : "1927"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D3 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
save(D1,D2,D3,file = "Science_1_3.Rdata")
query = '("Science (New York, N.Y.)"[Journal]) AND (("1928"[Date - Publication] : "1939"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D4 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Science (New York, N.Y.)"[Journal]) AND (("1940"[Date - Publication] : "1950"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D5 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Science (New York, N.Y.)"[Journal]) AND (("1951"[Date - Publication] : "1961"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D6 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Science (New York, N.Y.)"[Journal]) AND (("1962"[Date - Publication] : "1966"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D7 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Science (New York, N.Y.)"[Journal]) AND (("1967"[Date - Publication] : "1971"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D8 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Science (New York, N.Y.)"[Journal]) AND (("1972"[Date - Publication] : "1976"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D9 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Science (New York, N.Y.)"[Journal]) AND (("1977"[Date - Publication] : "1980"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D10 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Science (New York, N.Y.)"[Journal]) AND (("1981"[Date - Publication] : "1984"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D11 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Science (New York, N.Y.)"[Journal]) AND (("1985"[Date - Publication] : "1988"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D12 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Science (New York, N.Y.)"[Journal]) AND (("1989"[Date - Publication] : "1992"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D13 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Science (New York, N.Y.)"[Journal]) AND (("1993"[Date - Publication] : "1996"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D14 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Science (New York, N.Y.)"[Journal]) AND (("1997"[Date - Publication] : "2001"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D15 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
# query = '("Science (New York, N.Y.)"[Journal]) AND (("1997"[Date - Publication] : "2001"[Date - Publication]))'
# res <- pmQueryTotalCount(query = query, api_key = api_key)
# D16 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Science (New York, N.Y.)"[Journal]) AND (("2002"[Date - Publication] : "2005"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D17 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Science (New York, N.Y.)"[Journal]) AND (("2006"[Date - Publication] : "2009"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D18 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Science (New York, N.Y.)"[Journal]) AND (("2010"[Date - Publication] : "2013"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D19 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Science (New York, N.Y.)"[Journal]) AND (("2014"[Date - Publication] : "2017"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D20 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Science (New York, N.Y.)"[Journal]) AND (("2018"[Date - Publication] : "2021"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D21 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Science (New York, N.Y.)"[Journal]) AND (("2022"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D22 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
load(file = "Science_1_3.Rdata")
load(file = "Science_4_22.Rdata")
save(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,
D11,D12,D13,D14,D15,D17,D18,
D19,D20,D21,D22,file = "Science_all.Rdata")
m1 = pmApi2df(D1)
m2 = pmApi2df(D2)
m3 = pmApi2df(D3)
m4 = pmApi2df(D4)
m5 = pmApi2df(D5)
m6 = pmApi2df(D6)
m7 = pmApi2df(D7)
m8 = pmApi2df(D8)
m9 = pmApi2df(D9)
m10 = pmApi2df(D10)
m11 = pmApi2df(D11)
m12 = pmApi2df(D12)
m13 = pmApi2df(D13)
m14 = pmApi2df(D14)
m15 = pmApi2df(D15)
# m16 = pmApi2df(D16)
m17 = pmApi2df(D17)
m18 = pmApi2df(D18)
m19 = pmApi2df(D19)
m20 = pmApi2df(D20)
m21 = pmApi2df(D21)
m22 = pmApi2df(D22)
m = rbind(m1, m2, m3,m4,m5,m6,m7,m8,
m9, m10, m11,m12,m13,m14,m15,
m17, m18, m19,m20,m21,m22)
df=m
df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
save(fa,file = "Science_.Rdata")
#load(file="Nature communications.Rdata")
head(sort(table(fa),decreasing = T),15)
# fa
# WALSH, J HOLDEN, C KERR, R A ABELSON, P H MARSHALL, E CARTER, L J SMITH, R J
# 18482 829 662 614 544 532 431 390
# NORMAN, C WALDROP, M M ROBINSON, A L DICKSON, D WADE, N GREENBERG, D S MAUGH, T H
# 389 377 307 271 268 262 241
829篇?重名了?发文时间集中在1962-1989,可能不是重名
找了篇文章看,可能是做科普或者做动物分类的
还有一些社科评论相关的文章
一页3篇Science
寻思WALSH, J 可能是Science期刊的科普作者或者记者
那岂不是发Science就是他的本职工作??
1945年创刊,78年了
近13万文章
代码:
#"Nature"[Journal]
rm(list = ls())
library(pubmedR)
api_key = NULL
# pubmed一次只能下载小于1万条记录,这里分成八份
query = '("Nature"[Journal]) AND (("1945"[Date - Publication] : "1957"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature"[Journal]) AND (("1958"[Date - Publication] : "1963"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D2 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature"[Journal]) AND (("1964"[Date - Publication] : "1968"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D3 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature"[Journal]) AND (("1967"[Date - Publication] : "1971"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D4 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature"[Journal]) AND (("1972"[Date - Publication] : "1980"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D5 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature"[Journal]) AND (("1981"[Date - Publication] : "1988"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D6 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature"[Journal]) AND (("1989"[Date - Publication] : "1989"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D16 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature"[Journal]) AND (("1990"[Date - Publication] : "1996"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D7 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature"[Journal]) AND (("1997"[Date - Publication] : "2001"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D8 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature"[Journal]) AND (("2002"[Date - Publication] : "2005"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D9 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature"[Journal]) AND (("2006"[Date - Publication] : "2009"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D10 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature"[Journal]) AND (("2010"[Date - Publication] : "2013"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D11 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature"[Journal]) AND (("2014"[Date - Publication] : "2016"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D12 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature"[Journal]) AND (("2017"[Date - Publication] : "2019"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D13 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature"[Journal]) AND (("2020"[Date - Publication] : "2022"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D14 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature"[Journal]) AND (("2023"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D15 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
save(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,
D11,D12,D13,D14,D15,D16,file = "Nature.Rdata")
m1 = pmApi2df(D1)
m2 = pmApi2df(D2)
m3 = pmApi2df(D3)
m4 = pmApi2df(D4)
m5 = pmApi2df(D5)
m6 = pmApi2df(D6)
m7 = pmApi2df(D7)
m8 = pmApi2df(D8)
m9 = pmApi2df(D9)
m10 = pmApi2df(D10)
m11 = pmApi2df(D11)
m12 = pmApi2df(D12)
m13 = pmApi2df(D13)
m14 = pmApi2df(D14)
m15 = pmApi2df(D15)
m16 = pmApi2df(D16)
m = rbind(m1, m2, m3,m4,m5,m6,m7,m8,
m9, m10, m11,m12,m13,m14,m15,m16)
df=m
df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
save(fa,file = "Nature_fa.Rdata")
#load(file="Nature_fa.Rdata")
head(sort(table(fa),decreasing = T),15)
# fa
# SMAGLIK, PAUL GEWIN, VIRGINIA EISENSTEIN, MICHAEL POWELL, KENDALL SAVAGE, NEIL
# 2735 229 100 79 78 62
# WOOLSTON, CHRIS SCHIERMEIER, QUIRIN LEDFORD, HEIDI ABBOTT, ALISON CYRANOSKI, DAVID DOLGIN, ELIE
# 59 57 51 47 46 44
# PERKEL, JEFFREY M DANCE, AMBER WITZE, ALEXANDRA
前15位
出去谨慎,搜第一名SMAGLIK, PAUL
展示他的2篇报道
1-单细胞测序相关
2-神经科学相关
....估计是记者在追踪报道
发现一位以发Nature为本职工作的
那岂不是...
原来发顶刊的诀窍是... 发不了就加入。
Nature新的子刊~ 2019年,4年
稍一共60篇文章
##"Nature machine intelligence"[Journal]
rm(list = ls())
library(pubmedR)
api_key = NULL
# pubmed一次只能下载小于1万条记录,这里分成八份
query = '("Nature machine intelligence"[Journal]) AND (("2019"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
m1 = pmApi2df(D1)
m=m1
df=m
df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
head(sort(table(fa),decreasing = T),15)
结果:
2021年创刊,也是Nature新子刊,2年
37篇文章,目前还没有影响因子
####"Nature Computational Science"[Journal]
rm(list = ls())
library(pubmedR)
api_key = NULL
# pubmed一次只能下载小于1万条记录,这里分成八份
query = '("Nature Computational Science"[Journal]) AND (("2019"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
m1 = pmApi2df(D1)
m=m1
df=m
df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
head(sort(table(fa),decreasing = T),15)
# fa
# CHIRIGATI, FERNANDO RASTOGI, ANANYA
# 2 2 1
# AVANTS, BRIAN B BRYAN, J SHEPARD CHAMBERLAND, MAXIME
# 1 1 1
# GALA, ROHAN GAMAZON, ERIC R GUAN, YUANFANG
# 1 1 1
# HASEEB, MUHAMMAD HOFFECKER, IAN T HOU, HAO
# 1 1 1
# JIN, CHONG JING, XIAOYANG JOSEPH, JERELLE A
# 1 1 1
结果:
看了一眼pdf文件,并不是一般意义上的科研文章。
第二名也是类似情况
pdf内容
就不继续看了。
转
2010年创刊,13年
共计5万多篇文章
代码:
##"Nature communications"[Journal]
rm(list = ls())
library(pubmedR)
api_key = NULL
# pubmed一次只能下载小于1万条记录,这里分成八份
query = '("Nature communications"[Journal]) AND (("2010"[Date - Publication] : "2015"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature communications"[Journal]) AND (("2016"[Date - Publication] : "2017"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D2 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature communications"[Journal]) AND (("2018"[Date - Publication] : "2018"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D3 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature communications"[Journal]) AND (("2019"[Date - Publication] : "2019"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D4 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature communications"[Journal]) AND (("2020"[Date - Publication] : "2020"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D5 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature communications"[Journal]) AND (("2021"[Date - Publication] : "2021"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D6 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature communications"[Journal]) AND (("2022"[Date - Publication] : "2022"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D7 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature communications"[Journal]) AND (("2023"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D8 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
#save(D1,D2,D3,D4,D5,D6,D7,D8,file = "Nature communications.Rdata")
#save(D4,file = "Nature communications_D4.Rdata")
load(file = "Nature communications.Rdata")
load(file = "Nature communications_D4.Rdata")
m1 = pmApi2df(D1)
m2 = pmApi2df(D2)
m3 = pmApi2df(D3)
m4 = pmApi2df(D4)
m5 = pmApi2df(D5)
m6 = pmApi2df(D6)
m7 = pmApi2df(D7)
m8 = pmApi2df(D8)
m = rbind(m1, m2, m3,m4,m5,m6,m7,m8)
df=m
df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
save(fa,file = "Nature communications.Rdata")
#load(file="Nature communications.Rdata")
head(sort(table(fa),decreasing = T),15)
# fa
# YANG, YANG LIU, YANG ZHANG, LEI LIU, WEI WANG, YING ZHANG, WEI WANG, WEI WANG, YANG ZHANG, TAO LI, JUN WANG, QI ZHANG, YI LI, JIE LI, YAN
# 20 17 15 14 14 14 13 13 13 12 12 12 11 11
# LIU, YI
#11
嗯?好像全是国人诶。
诶嘿嘿,那不挺好~
顺着线索扒一扒,看看这个牛气的国人是什么领域的。
扒~
以下举例了3位YANG, YANG YANG, YANG 1号
YANG, YANG 2号
YANG, YANG 3号
第二名也存在重名
创立于2004年,19年
5千多篇~
代码:
##"Nature methods"[Journal]
rm(list = ls())
library(pubmedR)
api_key = NULL
# pubmed一次只能下载小于1万条记录
query = '("Nature methods"[Journal]) AND (("2004"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
m1 = pmApi2df(D1)
m=m1
df=m
df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
head(sort(table(fa),decreasing = T),15)
emmm,这数量级大概是记者吧,简单搜索一位
好的就是记者...
创刊于1992,31年
一共近9000篇
##"Nature genetics"[Journal]
rm(list = ls())
library(pubmedR)
api_key = NULL
# pubmed一次只能下载小于1万条记录
query = '("Nature genetics"[Journal]) AND (("1992"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
#save(D1,file = "Nature_genetics.Rdata")
m1 = pmApi2df(D1)
m=m1
df=m
df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
save(fa,file = "Nature_genetics.Rdata")
#load(file="Nature_genetics.Rdata")
head(sort(table(fa),decreasing = T),15)
# fa
# ALAM, ORNOB DANOVI, SAFIA
# 135 9 9
# FLETCHER, MICHAEL FAIAL, TIAGO LI, WEI
# 9 8 8
# VOGAN, KYLE GUDMUNDSSON, JULIUS MCCARROLL, STEVEN A
# 8 7 6
# BARRETT, JEFFREY C CONRAD, DONALD F GUDBJARTSSON, DANIEL F
# 5 5 5
# SULEM, PATRICK TURNBULL, CLARE XU, X
# 5 5 5
结果:
这个数量级正常了
emmm,好吧,看不像科研论文
下一位
创刊于1996年,27年
1万多篇
##Nature biotechnology
library(pubmedR)
api_key = NULL
# pubmed一次只能下载小于1万条记录,这里分成两份
query = '("Nature biotechnology"[Journal]) AND (("1996"[Date - Publication] : "2010"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature biotechnology"[Journal]) AND (("2011"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D2 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
m1 = pmApi2df(D1)
m2 = pmApi2df(D2)
m = rbind(m1, m2)
df=m
df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
save(fa,file = "Nature_biotechnology s.Rdata")
#load(file="Nature_biotechnology s.Rdata")
head(sort(table(fa),decreasing = T),15)
# fa
# FRANCISCO, MICHAEL JACOBS, TOM HUGGETT, BRADY DEWITT, N DEFRANCESCO, LAURA
# 774 50 48 47 43 41
# PERSIDIS, A DOVE, A LAWRENCE, STACY SHERIDAN, CORMAC FREDERICKSON, R HODGSON, J
# 37 32 31 31 30 29
# GARBER, KEN HOYLE, R RATNER, MARK
# 20 20 17
查了第一名是记者。
好,下一位
1995年创立,28年
1万多篇文献
##"Nature medicine"[Journal]
rm(list = ls())
library(pubmedR)
api_key = NULL
# pubmed一次只能下载小于1万条记录,这里分成两份
query = '("Nature medicine"[Journal]) AND (("1995"[Date - Publication] : "2014"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nature medicine"[Journal]) AND (("2015"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D2 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
m1 = pmApi2df(D1)
m2 = pmApi2df(D2)
m = rbind(m1, m2)
df=m
df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
save(fa,file = "Nature medicine.Rdata")
load(file="Nature medicine.Rdata")
head(sort(table(fa),decreasing = T),15)
# fa
# STOWER, HANNAH O'LEARY, KAREN
# 195 164 12
# BIRMINGHAM, K CARVALHO, THIAGO CHAKRADHAR, SHRADDHA
# 11 10 10
# GOLDSTEIN, JOSEPH L NOVAK, K WILLYARD, CASSANDRA
# 5 5 5
# BACH, F H BALLMAIER, M BOSCH, X
# 4 4 4
# GRAINGER, D J KEENER, AMANDA B READY, T
# 4 4 4
第一名,这个数量级已经没有兴趣查了
创刊1998,25年
1万7的文章数
llibrary(pubmedR)
api_key = NULL
# pubmed一次只能下载小于1万条记录,这里分成两份
query = '("Bioinformatics (Oxford, England)"[Journal]) AND (("1998"[Date - Publication] : "2014"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Bioinformatics (Oxford, England)"[Journal]) AND (("2015"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D2 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
m1 = pmApi2df(D1)
m2 = pmApi2df(D2)
m = rbind(m1, m2)
##以下一句有修改
df=m
df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
##以下一句有修改,只取了前15名
head(sort(table(fa),decreasing = T),15)
看见大佬了~ 核对了一下检索统计结果,没什么问题 看看第一名~
依旧不是科研论文
哇,真大佬
2000年创刊,23年
1万多的文章
rm(list = ls())
library(pubmedR)
api_key = NULL
# pubmed一次只能下载小于1万条记录,这里分成两份
query = '("BMC bioinformatics"[Journal]) AND (("1998"[Date - Publication] : "2014"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("BMC bioinformatics"[Journal]) AND (("2015"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D2 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
m1 = pmApi2df(D1)
m2 = pmApi2df(D2)
m = rbind(m1, m2)
df=m
df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
head(sort(table(fa),decreasing = T),15)
结果:
是学术论文,那真是大牛哇
1974年创刊,49年
近5万篇文献
##"Nucleic acids research"[Journal]
rm(list = ls())
library(pubmedR)
api_key = NULL
# pubmed一次只能下载小于1万条记录,这里分成六份
query = '("Nucleic acids research"[Journal]) AND (("1974"[Date - Publication] : "1987"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nucleic acids research"[Journal]) AND (("1988"[Date - Publication] : "1993"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D2 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nucleic acids research"[Journal]) AND (("1994"[Date - Publication] : "2004"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D3 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nucleic acids research"[Journal]) AND (("2005"[Date - Publication] : "2012"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D4 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nucleic acids research"[Journal]) AND (("2013"[Date - Publication] : "2018"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D5 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Nucleic acids research"[Journal]) AND (("2019"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D6 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
m1 = pmApi2df(D1)
m2 = pmApi2df(D2)
m3 = pmApi2df(D3)
m4 = pmApi2df(D4)
m5 = pmApi2df(D5)
m6 = pmApi2df(D6)
m = rbind(m1, m2, m3,m4,m5,m6)
df=m
df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
head(sort(table(fa),decreasing = T),15)
查作者
1988年有峰值
是科研文章,但是是同一类型
2000年创刊,23年了
5千多文献
#"Genome biology"[Journal]
rm(list = ls())
library(pubmedR)
api_key = NULL
# pubmed一次只能下载小于1万条记录,这里分成两份
query = '("Genome biology"[Journal]) AND (("1998"[Date - Publication] : "2014"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D1 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
query = '("Genome biology"[Journal]) AND (("2015"[Date - Publication] : "2023"[Date - Publication]))'
res <- pmQueryTotalCount(query = query, api_key = api_key)
D2 <- pmApiRequest(query = query, limit = res$total_count, api_key = NULL)
m1 = pmApi2df(D1)
m2 = pmApi2df(D2)
m = rbind(m1, m2)
df=m
df = df[df$DT == "JOURNAL ARTICLE", ]
fa = gsub(";.*$", "", df$AF)
head(sort(table(fa),decreasing = T),15)
查作者
从标题看,是记者