表中重复数据的删除方法总结

注意:

1、要看重复的两行数据完全一样,还是除主键外都不一样

2、要求是保留大值还是小值

3、对于表中数据是存在两条重复的数据,还是多余两条重复的数据

4、除了使用 in 和 max(min) 的组合的方法外,还可以使用 join

5、此类型的 SQL 题目见:

//github.com/ZGG2016/sql-practice-leetcode/blob/master/196.%20%E5%88%A0%E9%99%A4%E9%87%8D%E5%A4%8D%E7%9A%84%E7%94%B5%E5%AD%90%E9%82%AE%E7%AE%B1.md

1、重复的数据完全一样

-- 建测试表
CREATE TABLE websites (
    id int(11) ,
    name char(20) ,
    url varchar(255),
    alexa int(11),
    country char(10)
);

-- 插入数据,每条数据都重复,但仅重复两次
INSERT INTO websites VALUES 
    ('1', 'Google', '//www.google.cm/', '1', 'USA'), 
    ('2', 'taobao', '//www.taobao.com/', '13', 'CN'), 
    ('3', 'runoob', '//www.runoob.com/', '4689', 'CN'), 
    ('4', 'weibo', '//weibo.com/', '20', 'CN'), 
    ('5', 'Facebook', '//www.facebook.com/', '3', 'USA'),
    ('1', 'Google', '//www.google.cm/', '1', 'USA'), 
    ('2', 'taobao', '//www.taobao.com/', '13', 'CN'), 
    ('3', 'runoob', '//www.runoob.com/', '4689', 'CN'), 
    ('4', 'weibo', '//weibo.com/', '20', 'CN'), 
    ('5', 'Facebook', '//www.facebook.com/', '3', 'USA');

-- 插入数据后查看
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+
| id   | name     | url                       | alexa | country |
+------+----------+---------------------------+-------+---------+
|    1 | Google   | //www.google.cm/    |     1 | USA     |
|    2 | taobao   | //www.taobao.com/   |    13 | CN      |
|    3 | runoob   | //www.runoob.com/    |  4689 | CN      |
|    4 | weibo    | //weibo.com/         |    20 | CN      |
|    5 | Facebook | //www.facebook.com/ |     3 | USA     |
|    1 | Google   | //www.google.cm/    |     1 | USA     |
|    2 | taobao   | //www.taobao.com/   |    13 | CN      |
|    3 | runoob   | //www.runoob.com/    |  4689 | CN      |
|    4 | weibo    | //weibo.com/         |    20 | CN      |
|    5 | Facebook | //www.facebook.com/ |     3 | USA     |
+------+----------+---------------------------+-------+---------+
10 rows in set (0.00 sec)

-- 添加一个自增的主键列
mysql> alter table websites add column uid int primary key auto_increment;

-- 添加后查看
mysql> desc websites;
+---------+--------------+------+-----+---------+----------------+
| Field   | Type         | Null | Key | Default | Extra          |
+---------+--------------+------+-----+---------+----------------+
| id      | int          | YES  |     | NULL    |                |
| name    | char(20)     | YES  |     | NULL    |                |
| url     | varchar(255) | YES  |     | NULL    |                |
| alexa   | int          | YES  |     | NULL    |                |
| country | char(10)     | YES  |     | NULL    |                |
| uid     | int          | NO   | PRI | NULL    | auto_increment |
+---------+--------------+------+-----+---------+----------------+

-- 删除uid大的那条
delete from websites 
    where uid in (
        select uid from 
            (select max(uid) uid,count(id) c_id 
                from websites 
                group by id 
                having c_id>1  -- 防止删除不重复的数据
                order by c_id desc) t
    );

-- 删除后查看
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+-----+
| id   | name     | url                       | alexa | country | uid |
+------+----------+---------------------------+-------+---------+-----+
|    1 | Google   | //www.google.cm/    |     1 | USA     |   1 |
|    2 | taobao   | //www.taobao.com/   |    13 | CN      |   2 |
|    3 | runoob   | //www.runoob.com/    |  4689 | CN      |   3 |
|    4 | weibo    | //weibo.com/         |    20 | CN      |   4 |
|    5 | Facebook | //www.facebook.com/ |     3 | USA     |   5 |
+------+----------+---------------------------+-------+---------+-----+

-- 删除掉新增的主键ID
mysql> alter table websites DROP column uid;

-- 删除后查看
mysql> desc websites;
+---------+--------------+------+-----+---------+-------+
| Field   | Type         | Null | Key | Default | Extra |
+---------+--------------+------+-----+---------+-------+
| id      | int          | YES  |     | NULL    |       |
| name    | char(20)     | YES  |     | NULL    |       |
| url     | varchar(255) | YES  |     | NULL    |       |
| alexa   | int          | YES  |     | NULL    |       |
| country | char(10)     | YES  |     | NULL    |       |
+---------+--------------+------+-----+---------+-------+

2、重复的数据除主键外都不一样

-- 建测试表
CREATE TABLE websites (
    id int(11) ,
    name char(20) ,
    url varchar(255),
    alexa int(11),
    country char(10)
);

-- 插入数据,除了`weibo`和`Facebook`外,
-- 每条数据都重复,但仅重复两次,主键都唯一
INSERT INTO websites VALUES 
    ('1', 'Google', '//www.google.cm/', '1', 'USA'), 
    ('2', 'taobao', '//www.taobao.com/', '13', 'CN'), 
    ('3', 'runoob', '//www.runoob.com/', '4689', 'CN'), 
    ('4', 'weibo', '//weibo.com/', '20', 'CN'), 
    ('5', 'Facebook', '//www.facebook.com/', '3', 'USA'),
    ('6', 'Google', '//www.google.cm/', '1', 'USA'), 
    ('7', 'taobao', '//www.taobao.com/', '13', 'CN'), 
    ('8', 'runoob', '//www.runoob.com/', '4689', 'CN');

-- 插入数据后查看
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+
| id   | name     | url                       | alexa | country |
+------+----------+---------------------------+-------+---------+
|    1 | Google   | //www.google.cm/    |     1 | USA     |
|    2 | taobao   | //www.taobao.com/   |    13 | CN      |
|    3 | runoob   | //www.runoob.com/    |  4689 | CN      |
|    4 | weibo    | //weibo.com/         |    20 | CN      |
|    5 | Facebook | //www.facebook.com/ |     3 | USA     |
|    6 | Google   | //www.google.cm/    |     1 | USA     |
|    7 | taobao   | //www.taobao.com/   |    13 | CN      |
|    8 | runoob   | //www.runoob.com/    |  4689 | CN      |
+------+----------+---------------------------+-------+---------+

-- 删除id大的那条
delete from websites
    where id in (
        select id from (
            select name,url,alexa,country,max(id) id,count(*) c
                from websites 
                group by name,url,alexa,country
                having c>1
                ) t
     );

-- 删除后查看
mysql> select * from websites; 
+------+----------+---------------------------+-------+---------+
| id   | name     | url                       | alexa | country |
+------+----------+---------------------------+-------+---------+
|    1 | Google   | //www.google.cm/    |     1 | USA     |
|    2 | taobao   | //www.taobao.com/   |    13 | CN      |
|    3 | runoob   | //www.runoob.com/    |  4689 | CN      |
|    4 | weibo    | //weibo.com/         |    20 | CN      |
|    5 | Facebook | //www.facebook.com/ |     3 | USA     |
+------+----------+---------------------------+-------+---------+

3、重复的数据多于2条

除主键外都不一样 为例

-- 清空数据
mysql> truncate table websites;

-- 插入数据,`Google`项的数据重复了三次。主键都唯一
INSERT INTO websites VALUES 
    ('1', 'Google', '//www.google.cm/', '1', 'USA'), 
    ('2', 'taobao', '//www.taobao.com/', '13', 'CN'), 
    ('3', 'runoob', '//www.runoob.com/', '4689', 'CN'), 
    ('4', 'weibo', '//weibo.com/', '20', 'CN'), 
    ('5', 'Facebook', '//www.facebook.com/', '3', 'USA'),
    ('6', 'Google', '//www.google.cm/', '1', 'USA'), 
    ('7', 'Google', '//www.google.cm/', '1', 'USA');

-- 插入后查看
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+
| id   | name     | url                       | alexa | country |
+------+----------+---------------------------+-------+---------+
|    1 | Google   | //www.google.cm/    |     1 | USA     |
|    2 | taobao   | //www.taobao.com/   |    13 | CN      |
|    3 | runoob   | //www.runoob.com/    |  4689 | CN      |
|    4 | weibo    | //weibo.com/         |    20 | CN      |
|    5 | Facebook | //www.facebook.com/ |     3 | USA     |
|    6 | Google   | //www.google.cm/    |     1 | USA     |
|    7 | Google   | //www.google.cm/    |     1 | USA     |
+------+----------+---------------------------+-------+---------+


-- 使用上述方法删除id大的那条
delete from websites
    where id in (
        select id from (
            select name,url,alexa,country,max(id) id,count(*) c
                from websites 
                group by name,url,alexa,country
                having c>1
                ) t
     );

-- 删除后查看
-- 还有条重复数据,第6行
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+
| id   | name     | url                       | alexa | country |
+------+----------+---------------------------+-------+---------+
|    1 | Google   | //www.google.cm/    |     1 | USA     |
|    2 | taobao   | //www.taobao.com/   |    13 | CN      |
|    3 | runoob   | //www.runoob.com/    |  4689 | CN      |
|    4 | weibo    | //weibo.com/         |    20 | CN      |
|    5 | Facebook | //www.facebook.com/ |     3 | USA     |
|    6 | Google   | //www.google.cm/    |     1 | USA     |
+------+----------+---------------------------+-------+---------+

-- 重新调整表中数据
-- 清空数据
mysql> truncate table websites;

-- 插入数据,`Google`项的数据重复了三次。主键都唯一
INSERT INTO websites VALUES 
    ('1', 'Google', '//www.google.cm/', '1', 'USA'), 
    ('2', 'taobao', '//www.taobao.com/', '13', 'CN'), 
    ('3', 'runoob', '//www.runoob.com/', '4689', 'CN'), 
    ('4', 'weibo', '//weibo.com/', '20', 'CN'), 
    ('5', 'Facebook', '//www.facebook.com/', '3', 'USA'),
    ('6', 'Google', '//www.google.cm/', '1', 'USA'), 
    ('7', 'Google', '//www.google.cm/', '1', 'USA');

-- 调整后查看
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+
| id   | name     | url                       | alexa | country |
+------+----------+---------------------------+-------+---------+
|    1 | Google   | //www.google.cm/    |     1 | USA     |
|    2 | taobao   | //www.taobao.com/   |    13 | CN      |
|    3 | runoob   | //www.runoob.com/    |  4689 | CN      |
|    4 | weibo    | //weibo.com/         |    20 | CN      |
|    5 | Facebook | //www.facebook.com/ |     3 | USA     |
|    6 | Google   | //www.google.cm/    |     1 | USA     |
|    7 | Google   | //www.google.cm/    |     1 | USA     |
+------+----------+---------------------------+-------+---------+

-- 使用如下方法删除
delete from websites 
    where id not in (
        select id from (
            select name,url,alexa,country,min(id) id 
            from websites 
            group by name,url,alexa,country) t
    );


-- 删除后查看
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+
| id   | name     | url                       | alexa | country |
+------+----------+---------------------------+-------+---------+
|    1 | Google   | //www.google.cm/    |     1 | USA     |
|    2 | taobao   | //www.taobao.com/   |    13 | CN      |
|    3 | runoob   | //www.runoob.com/    |  4689 | CN      |
|    4 | weibo    | //weibo.com/         |    20 | CN      |
|    5 | Facebook | //www.facebook.com/ |     3 | USA     |
+------+----------+---------------------------+-------+---------+

-- 重新调整表中数据
-- 使用join的方法
delete w1 from websites w1
	join websites w2 on w1.name=w2.name and w1.url=w2.url and w1.alexa=w2.alexa and w1.country=w2.country
	where w1.id>w2.id;

-- 删除后查看
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+
| id   | name     | url                       | alexa | country |
+------+----------+---------------------------+-------+---------+
|    1 | Google   | //www.google.cm/    |     1 | USA     |
|    2 | taobao   | //www.taobao.com/   |    13 | CN      |
|    3 | runoob   | //www.runoob.com/    |  4689 | CN      |
|    4 | weibo    | //weibo.com/         |    20 | CN      |
|    5 | Facebook | //www.facebook.com/ |     3 | USA     |
+------+----------+---------------------------+-------+---------+
Tags: