这个是优化了一下结构的,看着舒服点
# -*- coding: utf-8 -*- """ @author: tanderick """ import requests import re import time import os import urllib.parse #调用包 def search(keyword,headers,filepath): kw=urllib.parse.quote(keyword) t = requests.get('https://www.vmgirls.com/?s='+kw,headers=headers) m_html = t.text m_urls = re.findall('<a href="(.*?)" title="(.*?)" ',m_html) if not os.path.exists(filepath): os.mkdir(filepath) return(m_urls) #搜索内容并返回二级目录网址 def download(urls,filepath1): for url in urls: url = urllib.parse.unquote(str('http'+url+'jpg')) filename = url.split('/')[-1] response = requests.get(url,headers=headers) with open(filepath1+filename,'wb') as f: f.write(response.content) print(url+'下载完成') time.sleep(1) return #命名下载图片 def get_url(m_urls,filepath,headers): for m_url,name in m_urls: t = requests.get(m_url,headers=headers) html = t.text urls = re.findall('http(.*?)jpg',html) filepath1 = str(filepath+name+'/') if not os.path.exists(filepath1): os.mkdir(filepath1) download(urls,filepath1) return(urls,filepath1) #需找图片下载地址 if __name__ == '__main__': keyword = '校园'#搜索名词 filepath = str(r'C:/ml/data/image/vm/') headers ={ 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0' } m_urls = search(keyword,headers,filepath) get_url(m_urls,filepath,headers) #主程序