%clear
%reset -f
# In[*]
# 加载Python库
from bioservices.kegg import KEGG
s = KEGG()
# In[*]
print(s.get("hsa04660"))
# In[*]
data = s.get("hsa04660")
dict_data = s.parse(data)
print(dict_data['GENE'])
通过这里可以输出kegg的通路信息,包括通路里面的基因,基因间的联系方式,以及链接等等。
其中Gene这一个对象就是包含基因name的数据框,完全可以提取出来
# In[*]
res = s.get("hsa04660", "kgml")
res = s.parse_kgml_pathway("hsa04660")
res['relations']
res['relations'][0]
res['entries']
# In[*]
from pylab import *
# extract all relations from all pathways
from bioservices.kegg import KEGG
s = KEGG()
s.organism = "hsa"
# retrieve more than 260 pathways so it takes time
results = [s.parse_kgml_pathway(x) for x in s.pathwayIds]
relations = [x['relations'] for x in results]
hist([len(r) for r in relations], 20)
xlabel('number of relations')
ylabel('\#')
title("number of relations per pathways")
grid(True)
import collections # for python 2.7.0 and above
# we extract from all pathways, all relations, where we retrieve the type of
# relation (name)
data = list(flatten([[x['name'] for x in rel] for rel in relations]))
counter = collections.Counter(data)
print(counter)
Counter({'activation': 6593, 'compound': 6183, 'phosphorylation': 1587, 'expression': 1574, 'inhibition': 1530, 'binding/association': 1342, 'indirect effect': 975, 'missing interaction': 227, 'dephosphorylation': 145, 'dissociation': 99, 'ubiquitination': 73, 'repression': 36, 'state change': 34, 'glycosylation': 11, 'methylation': 2})