关于MySQL中insert ignore,insert on duplicate和replace into，你可能没想过区别

2020 年 2 月 13 日
笔记

读完需要10分钟

速读仅需5分钟

在数据流转中或者日常的数据操作中，势必会有数据写入的过程，如果把一些数据写入一张数据库表中，如果写入量有100万，而重复的数据有90万，那么如何让这10%的数据能够更高更高效的写入。

在MySQL方向提供了Insert ignore into,insert into on duplicate,replace into这几种写入的方式，看起来好像都差不多，但是实际上在一些场景下的差异还比较大，如果使用不当，恰恰是性能的瓶颈。

整体上我分为两个大的部分，会分别测试这三种数据写入场景。

第一部分基于id,name的数据列，其中id为主键，自增

第二部分基于id,xid,name的数据列，其中id为主键，自增，xid为唯一性索引

至于为什么要这么分，我们可以先看结果再做讨论。

基于id,name的数据列，其中id为主键，自增

为了三种测试场景的基准对等，数据初始化会按照如下的三种方式来进行。

数据初始化

create table test_data(id int primary key auto_increment,name varchar(30)) engine=innodb;
insert into test_data values(1,'aa'),(2,'bb'),(3,'cc');
show create table test_dataG Table: test_dataCreate Table: CREATE TABLE `test_data` ( `id` int(11) NOT NULL AUTO_INCREMENT, `name` varchar(30) DEFAULT NULL, PRIMARY KEY (`id`)) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=utf8

insert ignore

insert ignore into test_data values(1,'aa');Query OK, 0 rows affected, 1 warning (0.00 sec)>>show warnings;+———+——+—————————————+\| Level \| Code \| Message \|+———+——+—————————————+\| Warning \| 1062 \| Duplicate entry '1' for key 'PRIMARY' \|+———+——+—————————————+1 row in set (0.00 sec)
show create table test_dataG Table: test_dataCreate Table: CREATE TABLE `test_data` ( `id` int(11) NOT NULL AUTO_INCREMENT, `name` varchar(30) DEFAULT NULL, PRIMARY KEY (`id`)) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=utf8
insert ignore into test_data values(1,'aaa');Query OK, 0 rows affected, 1 warning (0.01 sec)>>show warnings;+———+——+—————————————+\| Level \| Code \| Message \|+———+——+—————————————+\| Warning \| 1062 \| Duplicate entry '1' for key 'PRIMARY' \|+———+——+—————————————+1 row in set (0.00 sec)
insert ignore into test_data values(4,'cc');Query OK, 1 row affected (0.01 sec)
select * from test_data;+—-+——+\| id \| name \|+—-+——+\| 1 \| aa \|\| 2 \| bb \|\| 3 \| cc \|\| 4 \| cc \|+—-+——+4 rows in set (0.00 sec)

replace into场景

>>replace into test_data values(1,'aa');Query OK, 1 row affected (0.01 sec)
show create table test_dataG Table: test_dataCreate Table: CREATE TABLE `test_data` ( `id` int(11) NOT NULL AUTO_INCREMENT, `name` varchar(30) DEFAULT NULL, PRIMARY KEY (`id`)) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=utf8
replace into test_data values(1,'aaa');Query OK, 2 rows affected (0.00 sec)
replace into test_data values(4,'cc');Query OK, 1 row affected (0.00 sec)
select *from test_data;+—-+——+\| id \| name \|+—-+——+\| 1 \| aaa \|\| 2 \| bb \|\| 3 \| cc \|\| 4 \| cc \|+—-+——+4 rows in set (0.00 sec)

insert into on duplicate场景

insert into test_data values(1,'aa') on duplicate key update id=id;Query OK, 0 rows affected (0.00 sec)insert into test_data values(1,'aa') on duplicate key update id=id, name=name;Query OK, 0 rows affected (0.00 sec)
show create table test_dataG Table: test_dataCreate Table: CREATE TABLE `test_data` ( `id` int(11) NOT NULL AUTO_INCREMENT, `name` varchar(30) DEFAULT NULL, PRIMARY KEY (`id`)) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=utf8
insert into test_data values(1,'aaa') on duplicate key update id=id;Query OK, 0 rows affected (0.00 sec)insert into test_data values(1,'aaa') on duplicate key update id=id,name=name;Query OK, 0 rows affected (0.00 sec)
insert into test_data values(4,'cc') on duplicate key update id=id;Query OK, 1 row affected (0.01 sec)insert into test_data values(4,'ccc') on duplicate key update id=id, name=name;Query OK, 0 rows affected (0.00 sec)
select * from test_data;+—-+——+\| id \| name \|+—-+——+\| 1 \| aa \|\| 2 \| bb \|\| 3 \| cc \|\| 4 \| cc \|+—-+——+4 rows in set (0.00 sec)

小结：这三种场景的结果从自增列的处理方式来看是完全对等的，但是对于重复数据的处理方式还是存在差异。

相比而言，replace into和insert into on duplicate存在本质的区别，replace into是覆盖写，即删除原来的，写入新的。不光是主键列，其他列也会保持一致

insert into on duplicate则可以根据自己的需求来定制重复数据的处理策略，不会主动改变数据。

insert ignore into 在这种场景下最为通用，而且对于数据的侵入性最小。

所以如果要保证源端的数据基于主键完全一致，不管非主键列的数据是否一致，都需要完全覆盖，选择replace into是一种好的方法。

否则采用insert into on duplcate或者insert ignore into

基于id,xid,name的数据列，其中id为主键，自增,xid为唯一性索引

为了三种测试场景的基准对等，数据初始化会按照如下的三种方式来进行。

数据初始化

create table test_data(id int primary key auto_increment,xid int unique key,name varchar(30)) engine=innodb;
insert into test_data(xid,name) values(1,'aa'),(2,'bb'),(3,'cc');Query OK, 3 rows affected (0.01 sec)Records: 3 Duplicates: 0 Warnings: 0
select *from test_data;+—-+——+——+\| id \| xid \| name \|+—-+——+——+\| 1 \| 1 \| aa \|\| 2 \| 2 \| bb \|\| 3 \| 3 \| cc \|+—-+——+——+3 rows in set (0.00 sec)

insert ignore into

insert ignore into test_data(xid,name) values(1,'aa');Query OK, 0 rows affected, 1 warning
CREATE TABLE `test_data` ( `id` int(11) NOT NULL AUTO_INCREMENT, `xid` int(11) DEFAULT NULL, `name` varchar(30) DEFAULT NULL, PRIMARY KEY (`id`), UNIQUE KEY `xid` (`xid`)) ENGINE=InnoDB AUTO_INCREMENT=5 DEFAULT CHARSET=utf8
insert ignore into test_data(xid,name) values(1,'aaa');Query OK, 0 rows affected, 1 warning (0.01 sec)mysql–root@localhost:test 18:58:13>>show warnings;+———+——+———————————–+\| Level \| Code \| Message \|+———+——+———————————–+\| Warning \| 1062 \| Duplicate entry '1' for key 'xid' \|+———+——+———————————–+
insert ignore into test_data(xid,name) values(4,'dd');Query OK, 1 row affected (0.00 sec)
Create Table: CREATE TABLE `test_data` ( `id` int(11) NOT NULL AUTO_INCREMENT, `xid` int(11) DEFAULT NULL, `name` varchar(30) DEFAULT NULL, PRIMARY KEY (`id`), UNIQUE KEY `xid` (`xid`)) ENGINE=InnoDB AUTO_INCREMENT=7 DEFAULT CHARSET=utf8
>select * from test_data;+—-+——+——+\| id \| xid \| name \|+—-+——+——+\| 1 \| 1 \| aa \|\| 2 \| 2 \| bb \|\| 3 \| 3 \| cc \|\| 6 \| 4 \| dd \|+—-+——+——+4 rows in set (0.00 sec)

replace into

replace into test_data(xid,name) values(1,'aa');Query OK, 2 rows affected (0.00 sec)
+—-+——+——+\| id \| xid \| name \|+—-+——+——+\| 2 \| 2 \| bb \|\| 3 \| 3 \| cc \|\| 4 \| 1 \| aa \|+—-+——+——+3 rows in set (0.00 sec)
replace into test_data(xid,name) values(1,'aaa');Query OK, 2 rows affected (0.01 sec)
select *from test_data;+—-+——+——+\| id \| xid \| name \|+—-+——+——+\| 2 \| 2 \| bb \|\| 3 \| 3 \| cc \|\| 5 \| 1 \| aaa \|+—-+——+——+
replace into test_data(xid,name) values(4,'cc');Query OK, 1 row affected (0.00 sec)
select *from test_data;+—-+——+——+\| id \| xid \| name \|+—-+——+——+\| 2 \| 2 \| bb \|\| 3 \| 3 \| cc \|\| 5 \| 1 \| aaa \|\| 6 \| 4 \| dd \|+—-+——+——+4 rows in set (0.00 sec)

insert into on duplicate

insert into test_data(xid,name) values(1,'aa') on duplicate key update xid=xid;Query OK, 0 rows affected (0.00 sec)insert into test_data(xid,name) values(1,'aa') on duplicate key update xid=xid, name=name;Query OK, 0 rows affected (0.01 sec)
+—-+——+——+\| id \| xid \| name \|+—-+——+——+\| 1 \| 1 \| aa \|\| 2 \| 2 \| bb \|\| 3 \| 3 \| cc \|+—-+——+——+3 rows in set (0.00 sec)
insert into test_data(xid,name) values(1,'aaa') on duplicate key update xid=xid;Query OK, 0 rows affected (0.01 sec)insert into test_data(xid,name) values(1,'aaa') on duplicate key update xid=xid,name=name;Query OK, 0 rows affected (0.00 sec)
insert into test_data(xid,name) values(4,'cc') on duplicate key update xid=xid;Query OK, 1 row affected (0.01 sec)insert into test_data(xid,name) values(4,'ccc') on duplicate key update xid=xid, name=name;Query OK, 0 rows affected (0.00 sec)
select * from test_data;+—-+——+——+\| id \| xid \| name \|+—-+——+——+\| 1 \| 1 \| aa \|\| 2 \| 2 \| bb \|\| 3 \| 3 \| cc \|\| 8 \| 4 \| cc \|+—-+——+——+4 rows in set (0.00 sec)

小结：在这个场景里面，可以看到三种场景的变化真是很大，而且区别也很明显。

insert ignore into如果不指定自增列，尽管没有写入数据，但是自增列依然会自增

replace into如果不指定自增列，会看到数据重新写入的效果已经非常明显，而且自增列始终会自动维护。

insert into on duplicate对于重复数据依然会消耗自增列值，实现相对更加灵活。

关于MySQL中insert ignore,insert on duplicate和replace into，你可能没想过区别

VirMach 便宜 VPS

QNews

关于MySQL中insert ignore,insert on duplicate和replace into，你可能没想过区别

分享此文：

Related Posts

七年北漂落幕

项目经理思维导图——8 如何激励项目团队成员前进，防止成员抱团怼项目经理的情况发生？ ​

CSS-T | Mysql Client 任意文件读取攻击链拓展

IDEA主题美化

VirMach 便宜 VPS

QNews

热门文章

热门搜寻

项目经理思维导图——8 如何激励项目团队成员前进，防止成员抱团怼项目经理的情况发生？