以前我总是想着自己写代码,对那些只知道复制用别人代码的嗤之以鼻,后来几次查到现成的直接复制粘贴拿来用 ,握草,那感觉:
import requests import json import time import re import pymssql import pymysql import datetime import random from requests.adapters import HTTPAdapter def conn(): connect = pymysql.connect(host='', user='', password='', database='',charset='utf8') if connect: print("连接成功!") return connect def get_json_data(base_url,headers): s = requests.Session() s.mount('http://', HTTPAdapter(max_retries=3)) s.mount('https://', HTTPAdapter(max_retries=3)) print(time.strftime('%Y-%m-%d %H:%M:%S')) try: response = requests.get(base_url, timeout=5, headers=headers) html = response.text # print(html) html_cl = html[12:-14] false = False true = True null = None html_json = eval(html_cl) json_str = json.dumps(html_json) results = json.loads(json_str) data = results['result']['data']['feed']['list'] except Exception as e: print('get_json_str未收录错误类型,请检查网络通断,错误位置:',e) time.sleep(5) get_json_data(base_url, headers) else: return data conn = conn() page = 0 while True: try: page+=1 print(page) referer_url = "http://finance.sina.com.cn/7x24/?tag=0" cookie = "UOR=www.baidu.com,tech.sina.com.cn,; SINAGLOBAL=114.84.181.236_1579684610.152568; UM_distinctid=16fcc8a8b704c8-0a1d2def9ca4c6-33365a06-15f900-16fcc8a8b718f1; lxlrttp=1578733570; gr_user_id=2736e487-ee25-4d52-a1eb-c232ac3d58d6; grwng_uid=d762fe92-912b-4ea8-9a24-127a43143ebf; __gads=ID=d79f786106eb99a1:T=1582016329:S=ALNI_MZoErH_0nNZiM3D4E36pqMrbHHOZA; Apache=114.84.181.236_1582267433.457262; ULV=1582626620968:6:4:1:114.84.181.236_1582267433.457262:1582164462661; ZHIBO-SINA-COM-CN=; SUB=_2AkMpBPEzf8NxqwJRmfoWz2_ga4R2zQzEieKfWADoJRMyHRl-yD92qm05tRB6AoTf3EaJ7Bg2UU4l1CDZXUBCzEuJv3mP; SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9WhqhhGsPWdPjar0R99pFT8s" headers = { "Accept": "*/*", "Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.9", "Connection": "keep-alive", "Cookie": cookie, "Host": "zhibo.sina.com.cn", "Referer": referer_url, "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36" } base_url = "http://zhibo.sina.com.cn/api/zhibo/feed?callback=jQuery0&page=%s"%page+"&page_size=20&zhibo_id=152&tag_id=0&dire=f&dpc=1&pagesize=20&_=0 Request Method:GET'" data = get_json_data(base_url,headers) for i in data: id = i['id'] create_time = i['create_time'] rich_text = i['rich_text'] # print(id, create_time, rich_text) new_time = datetime.datetime.strptime(create_time, "%Y-%m-%d %H:%M:%S") judge_time = create_time.replace('-','')[0:8] cursor = conn.cursor() sql_id = "SELECT id from sina_data" find_id = cursor.execute(sql_id) find_id_num = cursor.fetchall() find_id1 = [x[0] for x in find_id_num] if id not in find_id1: print(id, create_time, rich_text) try: sql = "insert into sina_data(id,create_time,rich_text) values(%s,%s,%s)" cursor.execute(sql, (id, new_time, rich_text)) conn.commit() cursor.close() except Exception as e: print(e) continue time.sleep(random.randint(1,3)) except Exception as e: print(e) continue