#! /usr/bin/python import urllib, os,os.path import logging import datetime
#file log log_file = '/root/logs/sys_%s.log' % datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d') log_level = logging.INFO log_format = '%(asctime)s[%(levelname)s]: %(message)s' logging.basicConfig(filename=log_file, level=log_level, format=log_format) log=logging.getLogger()
def url_open(url): response = urllib.urlopen(url) html = response.read() log.info('html..') return html
def find_imgs(url): html = url_open(url).decode('GB2312') img_addrs = [] log.info('find_imgs..') a = html.find('img src=') total=0 while a!=-1: print('w...') b=html.find('.jpg',a,a+40) if b!=-1: imgurl=html[a+9:b+4] img_addrs.append('http://www.people.com.cn'+imgurl) print(imgurl) total=total+1 else: b=a+9 a=html.find('img src=',b) if total>5: break return img_addrs
def save_imgs(folder, img_addrs): for each in img_addrs: filename = each.split('/')[-1] with open(filename, 'wb') as f: img = url_open(each) f.write(img)
def download_mm(folder = 'xx', pages = 5): if not os.path.exists('/root/xx'): os.mkdir('/root/xx') os.chdir('/root/'+folder) log.info('init..') url = 'http://www.people.com.cn/' img_addrs = find_imgs(url) log.info('imgs='+img_addrs[0]) save_imgs(folder, img_addrs)
if __name__ == '__main__': download_mm()