表中重复数据的删除方法总结
注意:
1、要看重复的两行数据完全一样,还是除主键外都不一样
2、要求是保留大值还是小值
3、对于表中数据是存在两条重复的数据,还是多余两条重复的数据
4、除了使用 in 和 max(min) 的组合的方法外,还可以使用 join
5、此类型的 SQL 题目见:
1、重复的数据完全一样
-- 建测试表
CREATE TABLE websites (
id int(11) ,
name char(20) ,
url varchar(255),
alexa int(11),
country char(10)
);
-- 插入数据,每条数据都重复,但仅重复两次
INSERT INTO websites VALUES
('1', 'Google', '//www.google.cm/', '1', 'USA'),
('2', 'taobao', '//www.taobao.com/', '13', 'CN'),
('3', 'runoob', '//www.runoob.com/', '4689', 'CN'),
('4', 'weibo', '//weibo.com/', '20', 'CN'),
('5', 'Facebook', '//www.facebook.com/', '3', 'USA'),
('1', 'Google', '//www.google.cm/', '1', 'USA'),
('2', 'taobao', '//www.taobao.com/', '13', 'CN'),
('3', 'runoob', '//www.runoob.com/', '4689', 'CN'),
('4', 'weibo', '//weibo.com/', '20', 'CN'),
('5', 'Facebook', '//www.facebook.com/', '3', 'USA');
-- 插入数据后查看
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+
| id | name | url | alexa | country |
+------+----------+---------------------------+-------+---------+
| 1 | Google | //www.google.cm/ | 1 | USA |
| 2 | taobao | //www.taobao.com/ | 13 | CN |
| 3 | runoob | //www.runoob.com/ | 4689 | CN |
| 4 | weibo | //weibo.com/ | 20 | CN |
| 5 | Facebook | //www.facebook.com/ | 3 | USA |
| 1 | Google | //www.google.cm/ | 1 | USA |
| 2 | taobao | //www.taobao.com/ | 13 | CN |
| 3 | runoob | //www.runoob.com/ | 4689 | CN |
| 4 | weibo | //weibo.com/ | 20 | CN |
| 5 | Facebook | //www.facebook.com/ | 3 | USA |
+------+----------+---------------------------+-------+---------+
10 rows in set (0.00 sec)
-- 添加一个自增的主键列
mysql> alter table websites add column uid int primary key auto_increment;
-- 添加后查看
mysql> desc websites;
+---------+--------------+------+-----+---------+----------------+
| Field | Type | Null | Key | Default | Extra |
+---------+--------------+------+-----+---------+----------------+
| id | int | YES | | NULL | |
| name | char(20) | YES | | NULL | |
| url | varchar(255) | YES | | NULL | |
| alexa | int | YES | | NULL | |
| country | char(10) | YES | | NULL | |
| uid | int | NO | PRI | NULL | auto_increment |
+---------+--------------+------+-----+---------+----------------+
-- 删除uid大的那条
delete from websites
where uid in (
select uid from
(select max(uid) uid,count(id) c_id
from websites
group by id
having c_id>1 -- 防止删除不重复的数据
order by c_id desc) t
);
-- 删除后查看
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+-----+
| id | name | url | alexa | country | uid |
+------+----------+---------------------------+-------+---------+-----+
| 1 | Google | //www.google.cm/ | 1 | USA | 1 |
| 2 | taobao | //www.taobao.com/ | 13 | CN | 2 |
| 3 | runoob | //www.runoob.com/ | 4689 | CN | 3 |
| 4 | weibo | //weibo.com/ | 20 | CN | 4 |
| 5 | Facebook | //www.facebook.com/ | 3 | USA | 5 |
+------+----------+---------------------------+-------+---------+-----+
-- 删除掉新增的主键ID
mysql> alter table websites DROP column uid;
-- 删除后查看
mysql> desc websites;
+---------+--------------+------+-----+---------+-------+
| Field | Type | Null | Key | Default | Extra |
+---------+--------------+------+-----+---------+-------+
| id | int | YES | | NULL | |
| name | char(20) | YES | | NULL | |
| url | varchar(255) | YES | | NULL | |
| alexa | int | YES | | NULL | |
| country | char(10) | YES | | NULL | |
+---------+--------------+------+-----+---------+-------+
2、重复的数据除主键外都不一样
-- 建测试表
CREATE TABLE websites (
id int(11) ,
name char(20) ,
url varchar(255),
alexa int(11),
country char(10)
);
-- 插入数据,除了`weibo`和`Facebook`外,
-- 每条数据都重复,但仅重复两次,主键都唯一
INSERT INTO websites VALUES
('1', 'Google', '//www.google.cm/', '1', 'USA'),
('2', 'taobao', '//www.taobao.com/', '13', 'CN'),
('3', 'runoob', '//www.runoob.com/', '4689', 'CN'),
('4', 'weibo', '//weibo.com/', '20', 'CN'),
('5', 'Facebook', '//www.facebook.com/', '3', 'USA'),
('6', 'Google', '//www.google.cm/', '1', 'USA'),
('7', 'taobao', '//www.taobao.com/', '13', 'CN'),
('8', 'runoob', '//www.runoob.com/', '4689', 'CN');
-- 插入数据后查看
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+
| id | name | url | alexa | country |
+------+----------+---------------------------+-------+---------+
| 1 | Google | //www.google.cm/ | 1 | USA |
| 2 | taobao | //www.taobao.com/ | 13 | CN |
| 3 | runoob | //www.runoob.com/ | 4689 | CN |
| 4 | weibo | //weibo.com/ | 20 | CN |
| 5 | Facebook | //www.facebook.com/ | 3 | USA |
| 6 | Google | //www.google.cm/ | 1 | USA |
| 7 | taobao | //www.taobao.com/ | 13 | CN |
| 8 | runoob | //www.runoob.com/ | 4689 | CN |
+------+----------+---------------------------+-------+---------+
-- 删除id大的那条
delete from websites
where id in (
select id from (
select name,url,alexa,country,max(id) id,count(*) c
from websites
group by name,url,alexa,country
having c>1
) t
);
-- 删除后查看
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+
| id | name | url | alexa | country |
+------+----------+---------------------------+-------+---------+
| 1 | Google | //www.google.cm/ | 1 | USA |
| 2 | taobao | //www.taobao.com/ | 13 | CN |
| 3 | runoob | //www.runoob.com/ | 4689 | CN |
| 4 | weibo | //weibo.com/ | 20 | CN |
| 5 | Facebook | //www.facebook.com/ | 3 | USA |
+------+----------+---------------------------+-------+---------+
3、重复的数据多于2条
以 除主键外都不一样
为例
-- 清空数据
mysql> truncate table websites;
-- 插入数据,`Google`项的数据重复了三次。主键都唯一
INSERT INTO websites VALUES
('1', 'Google', '//www.google.cm/', '1', 'USA'),
('2', 'taobao', '//www.taobao.com/', '13', 'CN'),
('3', 'runoob', '//www.runoob.com/', '4689', 'CN'),
('4', 'weibo', '//weibo.com/', '20', 'CN'),
('5', 'Facebook', '//www.facebook.com/', '3', 'USA'),
('6', 'Google', '//www.google.cm/', '1', 'USA'),
('7', 'Google', '//www.google.cm/', '1', 'USA');
-- 插入后查看
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+
| id | name | url | alexa | country |
+------+----------+---------------------------+-------+---------+
| 1 | Google | //www.google.cm/ | 1 | USA |
| 2 | taobao | //www.taobao.com/ | 13 | CN |
| 3 | runoob | //www.runoob.com/ | 4689 | CN |
| 4 | weibo | //weibo.com/ | 20 | CN |
| 5 | Facebook | //www.facebook.com/ | 3 | USA |
| 6 | Google | //www.google.cm/ | 1 | USA |
| 7 | Google | //www.google.cm/ | 1 | USA |
+------+----------+---------------------------+-------+---------+
-- 使用上述方法删除id大的那条
delete from websites
where id in (
select id from (
select name,url,alexa,country,max(id) id,count(*) c
from websites
group by name,url,alexa,country
having c>1
) t
);
-- 删除后查看
-- 还有条重复数据,第6行
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+
| id | name | url | alexa | country |
+------+----------+---------------------------+-------+---------+
| 1 | Google | //www.google.cm/ | 1 | USA |
| 2 | taobao | //www.taobao.com/ | 13 | CN |
| 3 | runoob | //www.runoob.com/ | 4689 | CN |
| 4 | weibo | //weibo.com/ | 20 | CN |
| 5 | Facebook | //www.facebook.com/ | 3 | USA |
| 6 | Google | //www.google.cm/ | 1 | USA |
+------+----------+---------------------------+-------+---------+
-- 重新调整表中数据
-- 清空数据
mysql> truncate table websites;
-- 插入数据,`Google`项的数据重复了三次。主键都唯一
INSERT INTO websites VALUES
('1', 'Google', '//www.google.cm/', '1', 'USA'),
('2', 'taobao', '//www.taobao.com/', '13', 'CN'),
('3', 'runoob', '//www.runoob.com/', '4689', 'CN'),
('4', 'weibo', '//weibo.com/', '20', 'CN'),
('5', 'Facebook', '//www.facebook.com/', '3', 'USA'),
('6', 'Google', '//www.google.cm/', '1', 'USA'),
('7', 'Google', '//www.google.cm/', '1', 'USA');
-- 调整后查看
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+
| id | name | url | alexa | country |
+------+----------+---------------------------+-------+---------+
| 1 | Google | //www.google.cm/ | 1 | USA |
| 2 | taobao | //www.taobao.com/ | 13 | CN |
| 3 | runoob | //www.runoob.com/ | 4689 | CN |
| 4 | weibo | //weibo.com/ | 20 | CN |
| 5 | Facebook | //www.facebook.com/ | 3 | USA |
| 6 | Google | //www.google.cm/ | 1 | USA |
| 7 | Google | //www.google.cm/ | 1 | USA |
+------+----------+---------------------------+-------+---------+
-- 使用如下方法删除
delete from websites
where id not in (
select id from (
select name,url,alexa,country,min(id) id
from websites
group by name,url,alexa,country) t
);
-- 删除后查看
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+
| id | name | url | alexa | country |
+------+----------+---------------------------+-------+---------+
| 1 | Google | //www.google.cm/ | 1 | USA |
| 2 | taobao | //www.taobao.com/ | 13 | CN |
| 3 | runoob | //www.runoob.com/ | 4689 | CN |
| 4 | weibo | //weibo.com/ | 20 | CN |
| 5 | Facebook | //www.facebook.com/ | 3 | USA |
+------+----------+---------------------------+-------+---------+
-- 重新调整表中数据
-- 使用join的方法
delete w1 from websites w1
join websites w2 on w1.name=w2.name and w1.url=w2.url and w1.alexa=w2.alexa and w1.country=w2.country
where w1.id>w2.id;
-- 删除后查看
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+
| id | name | url | alexa | country |
+------+----------+---------------------------+-------+---------+
| 1 | Google | //www.google.cm/ | 1 | USA |
| 2 | taobao | //www.taobao.com/ | 13 | CN |
| 3 | runoob | //www.runoob.com/ | 4689 | CN |
| 4 | weibo | //weibo.com/ | 20 | CN |
| 5 | Facebook | //www.facebook.com/ | 3 | USA |
+------+----------+---------------------------+-------+---------+