记录一下通过uniprotID获取网页内容并匹配的方法一,这里主要是通过获取的网页内容作为字符串,然后利用python字符串匹配的方法来解决。另外也可以通过获取标签的方法,如果没有这个标签的话,那么就是没有这个字段,直接返回false。
import requests
import bs4
import xlrd
import xlwt
from xlutils
import copy
import time
def get_ID(file):
data
= xlrd
.open_workbook
(file)
data
.sheet_names
()
print("sheets:" + str(data
.sheet_names
()))
table
= data
.sheet_by_name
('Sheet1')
rows
= table
.nrows
print(table
.cell_value
(1, 0))
all_content
= []
for i
in range(1,rows
) :
cell
= table
.cell_value
(i
, 0)
all_content
.append
(cell
)
print(all_content
)
return all_content
def get_result(ID
):
headers
= {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0"}
host
= "https://www.uniprot.org/uniprot/"
url
= host
+ID
res
= requests
.get
(url
, headers
=headers
)
result
= "Pathway" in res
.text
print(result
)
return result
def write_result(file,result
,i_num
):
rbook
= xlrd
.open_workbook
(file, formatting_info
=True)
wbook
= copy
.copy
(rbook
)
w_sheet
= wbook
.get_sheet
(0)
col
= 1
w_sheet
.write
(i_num
, col
, result
)
wbook
.save
(file)
time
.sleep
(3)
file = './test.xls'
content
= []
content
= get_ID
(file)
for i_num
,ID
in enumerate(content
):
print(ID
)
result
= get_result
(ID
)
write_result
(file, result
,i_num
)
表格数据类型如下;
ID
A0FDW2A0FDW3
转载请注明原文地址:https://ipadbbs.8miu.com/read-1747.html