当我插入10万数据时发现MySqlCommand 竟然比 MySqlBulkLoader还好快一半的时间,MySqlBulkLoader不应该是批量插入最快的么?
我的表结构:
CREATE TABLE `batchinsert` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`Name1` varchar(4000) DEFAULT NULL,
`Name2` varchar(4000) DEFAULT NULL,
`Name3` varchar(4000) DEFAULT NULL,
`Name4` varchar(4000) DEFAULT NULL,
`Name5` varchar(4000) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1304537 DEFAULT CHARSET=utf8;
使用MySqlBulkLoader的代码:(执行时间大概在40秒左右)
1 /// <summary> 2 /// 46405 3 /// </summary> 4 public static void BatchInsert7() 5 { 6 string connStr = "server=127.0.0.1;uid=root;pwd=root;database=test;CharSet=utf8;Allow User Variables=True;"; 7 string text = "计划》指出,人工智能正引发可产生链式反应的科学突破、催生一批颠覆性技术,引领新一轮科技革命和产业变革。根据《计划》,到2030年,高校要成为建设世界主要人工智能创新中心的核心力量和引领新一代人工智能发展的人才高地,为我国跻身创新型国家前列提供科技支撑和人才保障"; 8 9 DataTable dt = new DataTable(); 10 dt.TableName = "BatchInsert"; 11 dt.Columns.AddRange(new DataColumn[] { 12 new DataColumn("Name1",typeof(string)), 13 new DataColumn("Name2",typeof(string)), 14 new DataColumn("Name3",typeof(string)), 15 new DataColumn("Name4",typeof(string)), 16 new DataColumn("Name5",typeof(string)) 17 }); 18 for (int i = 0; i < 100000; i++) 19 { 20 DataRow dr = dt.NewRow(); 21 dr["Name1"] = text; 22 dr["Name2"] = text; 23 dr["Name3"] = text; 24 dr["Name4"] = text; 25 dr["Name5"] = text; 26 dt.Rows.Add(dr); 27 } 28 29 var result = BulkInsert(connStr, dt); 30 31 } 32 33 ///将DataTable转换为标准的CSV 34 /// </summary> 35 /// <param name="table">数据表</param> 36 /// <returns>返回标准的CSV</returns> 37 private static string DataTableToCsv(DataTable table) 38 { 39 //以半角逗号(即,)作分隔符,列为空也要表达其存在。 40 //列内容如存在半角逗号(即,)则用半角引号(即"")将该字段值包含起来。 41 //列内容如存在半角引号(即")则应替换成半角双引号("")转义,并用半角引号(即"")将该字段值包含起来。 42 StringBuilder sb = new StringBuilder(); 43 DataColumn colum; 44 foreach (DataRow row in table.Rows) 45 { 46 for (int i = 0; i < table.Columns.Count; i++) 47 { 48 colum = table.Columns[i]; 49 if (i != 0) sb.Append(","); 50 if (colum.DataType == typeof(string) && row[colum].ToString().Contains(",")) 51 { 52 sb.Append("\"" + row[colum].ToString().Replace("\"", "\"\"") + "\""); 53 } 54 else sb.Append(row[colum].ToString()); 55 } 56 sb.AppendLine(); 57 } 58 return sb.ToString(); 59 } 60 public static int BulkInsert(string connectionString, DataTable table) 61 { 62 if (string.IsNullOrEmpty(table.TableName)) throw new Exception("请给DataTable的TableName属性附上表名称"); 63 if (table.Rows.Count == 0) return 0; 64 int insertCount = 0; 65 string tmpPath = Path.Combine(Directory.GetCurrentDirectory(), "Temp.csv"); //Path.GetTempFileName(); 66 string csv = DataTableToCsv(table); 67 File.WriteAllText(tmpPath, csv); 68 using (MySqlConnection conn = new MySqlConnection(connectionString)) 69 { 70 try 71 { 72 Stopwatch stopwatch = new Stopwatch(); 73 stopwatch.Start(); 74 conn.Open(); 75 MySqlBulkLoader bulk = new MySqlBulkLoader(conn) 76 { 77 FieldTerminator = ",", 78 FieldQuotationCharacter = '"', 79 EscapeCharacter = '"', 80 LineTerminator = "\r\n", 81 FileName = tmpPath, 82 NumberOfLinesToSkip = 0, 83 TableName = table.TableName, 84 85 }; 86 insertCount = bulk.Load(); 87 stopwatch.Stop(); 88 Console.WriteLine("耗时:{0}", stopwatch.ElapsedMilliseconds); 89 } 90 catch (MySqlException ex) 91 { 92 throw ex; 93 } 94 } 95 File.Delete(tmpPath); 96 return insertCount; 97 }
直接使用MySqlCommand每1000条提交一次事务代码如下:(执行时间大概是30秒左右)
1 public static void BatchInsert5() 2 { 3 string connStr = "server=127.0.0.1;uid=root;pwd=root;database=test;CharSet=utf8;Allow User Variables=True;"; 4 StringBuilder sCommand = 5 new StringBuilder("INSERT INTO BatchInsert (Name1,Name2,Name3,Name4,Name5) VALUES "); 6 string text = "计划》指出,人工智能正引发可产生链式反应的科学突破、催生一批颠覆性技术,引领新一轮科技革命和产业变革。根据《计划》,到2030年,高校要成为建设世界主要人工智能创新中心的核心力量和引领新一代人工智能发展的人才高地,为我国跻身创新型国家前列提供科技支撑和人才保障"; 7 using (MySqlConnection mConnection = new MySqlConnection(connStr)) 8 { 9 Stopwatch stopwatch = new Stopwatch(); 10 stopwatch.Start(); 11 mConnection.Open(); 12 MySqlTransaction ts = null; 13 try 14 { 15 16 for (int i = 0; i < 100; i++) 17 { 18 ts = mConnection.BeginTransaction(); 19 List<string> Rows = new List<string>(); 20 sCommand.Clear(); 21 sCommand.Append("INSERT INTO BatchInsert (Name1,Name2,Name3,Name4,Name5) VALUES "); 22 for (int j = 0; j < 1000; j++) 23 { 24 Rows.Add(string.Format("('{0}','{1}','{2}','{3}','{4}')", text, text, text, text, text)); 25 } 26 sCommand.Append(string.Join(",", Rows)); 27 sCommand.Append(";"); 28 29 using (MySqlCommand myCmd = new MySqlCommand(sCommand.ToString(), mConnection)) 30 { 31 myCmd.Transaction = ts; 32 myCmd.CommandType = CommandType.Text; 33 myCmd.ExecuteNonQuery(); 34 ts.Commit(); 35 } 36 37 } 38 } 39 catch (Exception ex) 40 { 41 Console.WriteLine("异常:{0}", ex); 42 if (ts != null) 43 ts.Rollback(); 44 } 45 stopwatch.Stop(); 46 Console.WriteLine("耗时:{0}", stopwatch.ElapsedMilliseconds); 47 } 48 }
这是哪里有问题么还是说MySqlBulkLoader 批量插入时不一定是最快的?
参考:https://bbs.csdn.net/topics/392072986
看了,我是空的表插入的,数据库也差不多是新建的,难道说是我的磁盘有问题?也不应该啊,起码他的插入速度应该比上一个插入速度快一点啊
好吧确定估计是我自己机器的问题,在数据库中直接执行: LOAD DATA LOCAL INFILE 'g:\Temp.csv' INTO TABLE BatchInsert FIELDS TERMINATED BY ',' ENCLOSED BY '"' ESCAPED BY '"' LINES TERMINATED BY '' (Name1,Name2,Name3,Name4,Name5)
也要30多秒,难道是我数据量太大,数据大小180M.
4.2W条,0.8s(MySqlBulkLoader ),你的机器磁盘可能真的有瓶颈
我想问下最终结果你怎么说的很模糊啊 究竟那个效率更高
BatchInsert 应该是最快的,你可以自己试一下
@GYY_顽石: oo 谢谢