一、效果与功能介绍
搜索效果图
1)搜索功能区
在输入框中输入关键词,按“enter'键或点击”确定“即可实现默认搜索,你也可以点击”最近1年“搜索近1年的文章,
搜索结果显示在”结果显示区“,你可以通过”上一页“或”下一页“将搜索结果翻页,
点击”帮助“,会显示一些搜索的小技巧,比如关键词与字段的搭配等。
2)摘要显示区
每次搜索完成后,会自动显示当页第一篇文章的摘要信息,你可以通过”下一篇“或”上一篇“实现翻篇操作,
当你对该篇文章感兴趣或想引用的时候,点击”打开原文“,会帮你自动打开浏览器,然后你根据自己需要操作即可。
3)翻译显示区
当你阅读文章摘要时,遇到不懂的单词,直接点翻译,即可将摘要翻译成中文,方便阅读。
二、实现原理
三、使用方法
1)代码
# encoding: utf-8
from tkinter import *
from tkinter.ttk import *
import requests
import re
import webbrowser
import hashlib
import json
import http.client
import urllib
import random
from lxml.html import etree
# 第二部分 功能实现
# 定义一个函数,根据page获取页面文章标题和id,并显示标题信息
def getpagemessage(page):
global item, year, titledict, iddict
url = 'https://pubmed.ncbi.nlm.nih.gov/?term={}&page={}{}'.format(item, page, year)
r = requests.get(url)
# 获取文章标题
reg_title = re.compile('data-article-id="\d{8}">\s*(.*?)\s*</a>')
titles = reg_title.findall(r.text)
# 获取期刊和年份
reg_journal = re.compile('short-journal-citation">(.*?)</span>')
journals = reg_journal.findall(r.text)
article_text = []
for i in range(len(titles)):
# 替换htmL标签
reg = re.compile('<[^>]+>')
titles[i] = reg.sub('', titles[i])
text = '{}、{}({})\n'.format(str(i + 1), titles[i], journals[i])
text1.insert('end', text)
text1.see(END)
text1.update()
article_text.append(text)
titledict[page] = article_text
# 获取文章id
reg_id = re.compile('<meta name="log_displayeduids" content="(.*?)" />')
idlist = reg_id.findall(r.text)
idlist = idlist[0].split(',')
iddict[page] = idlist
# 定义一个函数,根据num获取文章摘要并显示
def getabstract(num):
global iddict, page, absdict
id = iddict[page][num]
url = 'https://pubmed.ncbi.nlm.nih.gov/{}/'.format(id)
r = requests.get(url)
html = etree.HTML(r.text)
abs_xpaths = html.xpath('//*[@id="abstract"]/div[1]/p')
abs = ''
for abs_xpath in abs_xpaths:
text = abs_xpath.xpath('./text()')
text = text[-1].replace('\n', '').strip()
abs += text + '\n'
reg_title = re.compile('<title>(.*?) - PubMed</title>')
title = reg_title.findall(r.text)[0] + '.'
content = title + '\n' + abs
content = content.replace(''', "' ")
# 替换htmL标签
reg = re.compile('<[^>]+>')
content = reg.sub('', content)
text2.delete(1.0, 'end') # 清空文本框
text2.insert('end', content)
text2.see(END)
text2.update()
absdict[id] = content
# 定义一个函数,根据年份搜索,并显示搜索结果
def search_by_year(year):
# 恢复初始值
global item, page, titledict, iddict, absdict, num, max_page, counts
item = entry1.get().replace(' ', '+')
page = 1
num = 0
titledict = {}
iddict = {}
absdict = {}
# 更新文本框
text1.delete(1.0, 'end') # 清空文本框
text1.insert('end', '正在搜索,请稍后...\n')
text1.see(END)
text1.update()
# 获取搜索信息
url_start = 'https://pubmed.ncbi.nlm.nih.gov/?term={}&page={}{}'.format(item, page, year)
r = requests.get(url_start)
# 获取搜索结果数目
try:
reg_counts = re.compile('<span class="value">(.*?)</span>')
counts = reg_counts.findall(r.text)
counts = counts[0].replace(',', '')
max_page = int(counts) // 10 + 1
text1.delete(1.0, 'end') # 清空文本框
text1.insert('end', '本次搜索共找到{}结果:第{}页,共{}页\n'.format(counts, str(page), str(max_page)))
getpagemessage(page)
getabstract(num)
except:
text1.insert(END, '没有找到您想要的结果\n')
text1.see(END)
text1.update()
# 默认搜索
def searchall():
global year
year = ''
search_by_year(year)
# 绑定enter键
def searchall_enter(self):
searchall()
# 最近1年
def search1year():
global year
year = '&filter=ds1.y_1'
search_by_year(year)
# 最近5年
def search5year():
global year
year = '&filter=ds1.y_5'
search_by_year(year)
# 最近10年
def search10year():
global year
year = '&filter=ds1.y_10'
search_by_year(year)
# 翻页功能
def nextpage():
global item, year, page, max_page, num, titledict, iddict, absdict, counts
if page < max_page:
page += 1
num = 0
if page in titledict:
# 显示标题信息
text1.delete(1.0, 'end') # 清空文本框
text1.insert('end', '本次搜索共找到{}结果:第{}页,共{}页\n'.format(counts, str(page), str(max_page)))
for i in range(len(titledict[page])):
text1.insert('end', titledict[page][i])
text1.see(END)
text1.update()
# 显示摘要信息
id = iddict[page][num]
conent = absdict[id]
text2.delete(1.0, 'end') # 清空文本框
text2.insert('end', conent)
text2.see(END)
text2.update()
else:
text1.delete(1.0, 'end') # 清空文本框
text1.insert('end', '本次搜索共找到{}结果:第{}页,共{}页\n'.format(counts, str(page), str(max_page)))
getpagemessage(page)
getabstract(num)
else:
pass
def lastpage():
global item, year, page, max_page, num, titledict, iddict, absdict, counts
if page > 1:
page -= 1
num = 0
# 显示标题信息
text1.delete(1.0, 'end') # 清空文本框
text1.insert('end', '本次搜索共找到{}结果:第{}页,共{}页\n'.format(counts, str(page), str(max_page)))
for i in range(len(titledict[page])):
text1.insert('end', titledict[page][i])
text1.see(END)
text1.update()
# 显示摘要信息
id = iddict[page][num]
text2.delete(1.0, 'end') # 清空文本框
text2.insert('end', absdict[id])
text2.see(END)
text2.update()
else:
pass
# 翻篇功能
def nextarticle():
global num, iddict, absdict, page, max_page
if num < len(iddict[page])-1:
num += 1
id = iddict[page][num]
if id in absdict:
conent = absdict[id]
text2.delete(1.0, 'end') # 清空文本框
text2.insert('end', conent)
text2.see(END)
text2.update()
else:
getabstract(num)
else:
if page < max_page:
nextpage()
else:
pass
def lastarticle():
global num, iddict, absdict, page, max_page
if num > 0:
num -= 1
id = iddict[page][num]
if id in absdict:
conent = absdict[id]
text2.delete(1.0, 'end') # 清空文本框
text2.insert('end', conent)
text2.see(END)
text2.update()
else:
getabstract(num)
else:
if page > 1:
page -= 1
num = 9
text1.delete(1.0, 'end') # 清空文本框
text1.insert('end', '本次搜索共找到{}结果:第{}页,共{}页\n'.format(counts, str(page), str(max_page)))
for i in range(len(titledict[page])):
text1.insert('end', titledict[page][i])
text1.see(END)
text1.update()
id = iddict[page][num]
if id in absdict:
conent = absdict[id]
text2.delete(1.0, 'end') # 清空文本框
text2.insert('end', conent)
text2.see(END)
text2.update()
else:
getabstract(num)
else:
pass
# 翻译
def translate(text):
'''
将输入文本翻译成所需语种
:param text: chr, 原文
:param toLang: chr, 译文语种
:return: chr, 译文
'''
appid = '你的appid' # 填写你的appid
secretKey = '你的密钥' # 填写你的密钥
httpClient = None
myurl = '/api/trans/vip/translate'
fromLang = 'auto' # 原文语种
toLang = 'zh' # 译文语种
salt = random.randint(32768, 65536)
sign = appid + text + str(salt) + secretKey
sign = hashlib.md5(sign.encode()).hexdigest()
myurl = myurl + '?appid=' + appid + '&q=' + urllib.parse.quote(
text) + '&from=' + fromLang + '&to=' + toLang + '&salt=' + str(
salt) + '&sign=' + sign
try:
httpClient = http.client.HTTPConnection('api.fanyi.baidu.com')
httpClient.request('GET', myurl)
# response是HTTPResponse对象
response = httpClient.getresponse()
result_all = response.read().decode("utf-8")
result = json.loads(result_all)
textall = ''
for r in result['trans_result']:
textall += r['dst'] + '\n'
return textall
except Exception as e:
return e
finally:
if httpClient:
httpClient.close()
def translation():
text = text2.get(1.0, 'end')
text = translate(text)
text3.delete(1.0, 'end') # 清空文本框
text3.insert('end', text)
text3.see(END)
text3.update()
# 打开原文
def openarticle():
global page, num, iddict
id = iddict[page][num]
url = 'https://pubmed.ncbi.nlm.nih.gov/{}/'.format(id)
webbrowser.open(url)
# 清除
def deletetext():
text3.delete(1.0, 'end') # 清空文本框
text3.see(END)
text3.update()
# 帮助
def HelpDocumentation():
documentation = '''PubMed检索规则与语法:
1)布尔运算符AND,OR,NOT必须大写,如:vitaminc OR zinc;
2)PubMed从左至右进行布尔运算,你可以通过加括符改变运算顺序,如: commoncold AND (vitamincORzinc);
3)指定文献记录字段名称检索,如: dna[MH] AND crick[AU] AND 1993[DP],下面列出常用字段名称缩写:
DP——出版日期采用YYYY/MM/DD[DP]格式,如1998/03/06[DP]。输入日期范围则用冒号连接,如1996:1998[DP],1998/01:1998/04[DP]
AD——第一作者机构名称、地址、资助号如LM05545/LM/NLM[AD]
AU——作者姓名如o'brienj[AU]
TA——期刊名称包括期刊名全称、简称、ISSN。jbiolchem[TA]或0021-9258[TA]
LA——文献出版语言 Chinese[LA]
MH-Mesh 主题词 neoplasms[MH]或neoplasms/dt[MH]
PT——出版类型 review[PT]
TW——自由词AA001794[TW]
T1——文献标题内自由词leiomyosarcoma[TI]
有趣的搜索:
1)通过时间和期刊字段的组合,定期查看某期刊的文章动态,如science[TA] AND 2020/05:2020/06[DP]
2)通过关键词和期刊字段的组合,查看顶级期刊上关于xxx的研究进展,如miRNA[MH] AND (science[TA] OR nature[TA] OR cell[TA])
'''
text1.delete(1.0, 'end') # 清空文本框
text1.insert('end', documentation)
text1.see(END)
text1.update()
# 第一部分 窗口布局
# 创建面板
root = Tk()
root.title("Pubmed搜索个人专享版")
root.geometry('1000x540')
root.minsize(width=600, height=400)
# 创建分区
fm1 = Frame(height=250, width=300) # 摘要显示区
fm2 = Frame(height=350, width=300) # 提示显示区
fm3 = Frame(height=50, width=300) # 摘要按钮区
fm4 = Frame(height=250, width=300) # 搜索控件区
fm5 = Frame(height=350, width=300) # 翻译显示区
fm6 = Frame(height=50, width=300) # 翻译按钮区
# 创建控件
# 滚动文本框
scr1 = Scrollbar(fm1)
text1 = Text(fm1, font=('微软雅黑', 12))
scr1.pack(side=RIGHT, fill=Y, anchor='center')
text1.pack(side=LEFT, fill=Y)
scr1.config(command=text1.yview)
text1.config(yscrollcommand=scr1)
scr2 = Scrollbar(fm2)
text2 = Text(fm2, font=('微软雅黑', 12))
scr2.pack(side=RIGHT, fill=Y, anchor='center')
text2.pack(side=LEFT, fill=Y)
scr2.config(command=text2.yview)
text2.config(yscrollcommand=scr2)
scr3 = Scrollbar(fm5)
text3 = Text(fm5, font=('微软雅黑', 12))
scr3.pack(side=RIGHT, fill=Y, anchor='center')
text3.pack(side=LEFT, fill=Y)
scr3.config(command=text3.yview)
text3.config(yscrollcommand=scr3)
# 摘要控制区
b1 = Button(fm3, text='上一篇', command=lastarticle)
b2 = Button(fm3, text='下一篇', command=nextarticle)
b3 = Button(fm3, text='打开原文', command=openarticle)
b1.pack(side=LEFT, anchor='w')
b2.pack(side=LEFT, anchor='center')
b3.pack(side=LEFT, anchor='e')
# 搜索区
l1 = Label(fm4, text='Pubmed 搜索', font=('微软雅黑',16))
entry1 = Entry(fm4, width=25)
b4 = Button(fm4, text='确定', command=searchall)
b5 = Button(fm4, text='最近1年', command=search1year)
b6 = Button(fm4, text='最近5年', command=search5year)
b7 = Button(fm4, text='最近10年', command=search10year)
b8 = Button(fm4, text='上一页', command=lastpage)
b9 = Button(fm4, text='下一页', command=nextpage)
b10 = Button(fm4, text='帮助', command=HelpDocumentation)
l2 = Label(fm4, text='_________________________________')
l1.grid(row=0, columnspan=3)
entry1.grid(row=1, column=0, columnspan=2)
entry1.bind("<Return>", searchall_enter)
b4.grid(row=1, column=2)
b5.grid(row=2, column=0)
b6.grid(row=2, column=1)
b7.grid(row=2, column=2)
b8.grid(row=3, column=0)
b9.grid(row=3, column=1)
b10.grid(row=3, column=2)
l2.grid(row=6,columnspan=3)
# 翻译控件
b11 = Button(fm6, text='翻译', command=translation)
b11.pack(side=RIGHT, anchor='center')
b12 = Button(fm6, text='清除', command=deletetext)
b12.pack(side=LEFT, anchor='center')
# 布局分区
root.rowconfigure(0, weight=70)
root.rowconfigure(1, weight=70)
root.rowconfigure(2, weight=1)
root.columnconfigure(0, weight=1)
root.columnconfigure(1, weight=1)
fm1.grid(row=0, column=0)
fm2.grid(row=1, column=0)
fm3.grid(row=2, column=0)
fm4.grid(row=0, column=1)
fm5.grid(row=1, column=1)
fm6.grid(row=2, column=1)
# 显示界面
root.mainloop()
2)使用说明
1、你需要在百度翻译开放平台申请一个通用翻译的appid,通用翻译选择标准版就好(免费),然后将你的appid和密钥填入代码中即可;
2、使用pyinstaller包打包成exe文件就可以直接使用了。