import requests
import re
def getHTMLText(url
):
try:
headers
= {
"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36",
"cookie": "miid=1428930817865580362; cna=EarZFfUm1S0CARsR+220O8hH; t=8bc94e7bc688eb7af5533f1976650fde; _m_h5_tk=65dbeb4e38f534aacf4025c8d4e81bce_1586794235712; _m_h5_tk_enc=e96d92ee16e958b4890caa9fc2fa6db4; thw=cn; cookie2=1554e5bbbfe6457cf1c1c9aa63c058df; v=0; _tb_token_=5a85e0188653; _samesite_flag_=true; sgcookie=EpId/VCz+PjBPFKeidqdS; unb=2683761081; uc3=lg2=WqG3DMC9VAQiUQ==&id2=UU6p+QEJ8tSc4g==&vt3=F8dBxdGLa3BXsASlX+w=&nk2=BcLP06d1nZPt5PbdCo24Cnoi; csg=1e8e7f0a; lgc=freezing2856803123; cookie17=UU6p+QEJ8tSc4g==; dnk=freezing2856803123; skt=6a084e57cf10b6e6; existShop=MTU4NzE5NDg1OA==; uc4=id4=0@U2xkY0WHChRFrR6VhQm75gIGMATD&nk4=0@B044YAqLRKUazEZ7eWhSvUymCOjtR/kE1PO2nJ8=; tracknick=freezing2856803123; _cc_=U+GCWk/7og==; _l_g_=Ug==; sg=317; _nk_=freezing2856803123; cookie1=B0BXi+rAh+CsG+9LmOzVV9j8dAB5xdFbcF+mnvpYvzA=; tfstk=chgGBuae-cr6eLnsN1asMerwb79daT74EquI8V-uS4f_xE3z_sIoYL5pOSEkdp1..; mt=ci=97_1; enc=0gxF3t55dTUIEQOzUSrgF7p2gdf9xdcdC6xm317h5dXRn7D21KYrLJkRJFp6vcy6l7Z2CrAPewgEdMBB0j7yHg==; alitrackid=www.taobao.com; lastalitrackid=www.taobao.com; hng=CN|zh-CN|CNY|156; uc1=cookie16=UtASsssmPlP/f1IHDsDaPRu+Pw==&cookie21=U+GCWk/7p4mBoUyS4plD&cookie15=URm48syIIVrSKA==&existShop=false&pas=0&cookie14=UoTUPc3lioQ/3A==; JSESSIONID=B3B7C7381542916C591F2634FDE31A52; l=eBSbgB4VqimFn0mBBOfwdA7-hk7OSBdYYu8NeR-MiT5PON1p5CxAWZXZX0L9C3GVhsZXR3Szm2rQBeYBqS24n5U62j-la_kmn; isg=BGJi2OxgSCf6jlezYGKTe0FGvejEs2bNw3JHu6z7jlWAfwL5lEO23eh9r7uD9N5l"
}
r
=requests
.get
(url
,timeout
=30, headers
=headers
)
r
.raise_for_status
()
r
.encoding
=r
.apparent_encoding
return r
.text
except:
return ""
def parsePage(ilt
,html
):
try:
plt
=re
.findall
(r
'\"view_price\"\:\"[\d\.]*\"',html
)
tlt
=re
.findall
(r
'\"raw_title\"\:\".*?\"',html
)
for i
in range(len(plt
)):
price
=eval(plt
[i
].split
(":")[1])
title
=eval(tlt
[i
].split
(":")[1])
ilt
.append
([price
,title
])
except:
print("")
def printGoodsList(ilt
):
tplt
= "{0:^4}\t{1:<8}\t{2:{3}<16}"
print(tplt
.format("序号","价格","商品名称", chr(12288)))
count
= 0
for g
in ilt
:
count
= count
+ 1
print(tplt
.format(count
, g
[0], g
[1], chr(12288)))
def main():
goods
= '书包'
depth
= 2
start_url
= 'https://s.taobao.com/search?q=' + goods
infoList
= []
for i
in range(depth
):
try:
url
= start_url
+ '&s=' + str(44*i
)
html
= getHTMLText
(url
)
parsePage
(infoList
, html
)
except:
continue
printGoodsList
(infoList
)
main
()
结果示例:
转载请注明原文地址:https://ipadbbs.8miu.com/read-7126.html