python2下载网页图片

    技术2025-05-27  17

    #! /usr/bin/python import urllib, os,os.path import logging import datetime

    #file log log_file = '/root/logs/sys_%s.log' % datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d') log_level = logging.INFO log_format = '%(asctime)s[%(levelname)s]: %(message)s' logging.basicConfig(filename=log_file, level=log_level, format=log_format) log=logging.getLogger()

    def url_open(url):     response = urllib.urlopen(url)     html = response.read()     log.info('html..')     return html

    def find_imgs(url):     html = url_open(url).decode('GB2312')     img_addrs = []     log.info('find_imgs..')     a = html.find('img src=')     total=0     while a!=-1:         print('w...')         b=html.find('.jpg',a,a+40)         if b!=-1:             imgurl=html[a+9:b+4]             img_addrs.append('http://www.people.com.cn'+imgurl)                      print(imgurl)             total=total+1         else:             b=a+9         a=html.find('img src=',b)         if total>5:            break     return img_addrs

    def save_imgs(folder, img_addrs):     for each in img_addrs:         filename = each.split('/')[-1]         with open(filename, 'wb') as f:             img = url_open(each)             f.write(img)

    def download_mm(folder = 'xx', pages = 5):     if not os.path.exists('/root/xx'):         os.mkdir('/root/xx')     os.chdir('/root/'+folder)          log.info('init..')     url = 'http://www.people.com.cn/'     img_addrs = find_imgs(url)     log.info('imgs='+img_addrs[0])     save_imgs(folder, img_addrs)

    if __name__ == '__main__':     download_mm()  

    Processed: 0.011, SQL: 9