使用批量插入的手段向mysql一次性批量插入50000条数据(大概耗时6.5秒)。将千万级别的数据分解成为n*5万 按n次插入
```python path = r"D:\21917\桌面\UserBehavior.csv" import csv import time # 装饰器,计算插入50000条数据需要的时间 def timer(func): def decor(*args): start_time = time.time() func(*args) end_time = time.time() d_time = end_time - start_time print("the running time is : ", d_time) return decor def generator(): with open(path,'r') as f: reader = csv.reader(f) for row in reader: yield row import pymysql # 表名UserBehavior dropTableSql = ''' DROP TABLE IF EXISTS UserBehavior ''' # 字段UserID int,ItemID int,CategoryID int,BehaviorType varchar,Timestamp timestamp createTableSql = ''' CREATE TABLE UserBehavior(userID int, itemID int, categoryID int, behaviorType varchar(20), timeStamp varchar(20)) ''' insertTableSql = ''' insert into `UserBehavior`(userID,itemID,categoryID,behaviorType,timeStamp) values(%s,%s,%s,%s,%s) ''' MAXINSERTSIZE = 50000 TOTAL = 0 class DB: def __init__(self): self.conn = pymysql.Connect(host="127.0.0.1", port=3306, user="root", password="prty", database="Test", charset="utf8") self.cursor = self.conn.cursor() def initTable(self): self.cursor.execute(dropTableSql) self.cursor.execute(createTableSql) @timer def insertData(self): global MAXINSERTSIZE,TOTAL self.initTable() data = generator() tmp = [] count = 0 while 1: try: count += 1 ele = next(data) tmp.append(ele) except StopIteration: break if count == MAXINSERTSIZE: self.cursor.executemany(insertTableSql,tmp) TOTAL += 1 print("第{}*50000条记录插入成功".format(TOTAL)) count = 0 tmp = [] self.conn.commit() def __del__(self): self.cursor.close() self.conn.close() if __name__=='__main__': Db = DB() Db.insertData()代码是这样但是还有mysql的ini文件要设置,关键在于executemany命令 关于批量插入是需要进行mysql的配置的 这里有类似的配置
配置不要出错 如果出错 net start mysql会出现 mysql服务正在启动 mysql服务无法启动 如果出现这种情况可以使用,mysqld --console查看错误信息进行处理