表中重複數據的刪除方法總結

注意:

1、要看重複的兩行數據完全一樣,還是除主鍵外都不一樣

2、要求是保留大值還是小值

3、對於表中數據是存在兩條重複的數據,還是多餘兩條重複的數據

4、除了使用 in 和 max(min) 的組合的方法外,還可以使用 join

5、此類型的 SQL 題目見:

//github.com/ZGG2016/sql-practice-leetcode/blob/master/196.%20%E5%88%A0%E9%99%A4%E9%87%8D%E5%A4%8D%E7%9A%84%E7%94%B5%E5%AD%90%E9%82%AE%E7%AE%B1.md

1、重複的數據完全一樣

-- 建測試表
CREATE TABLE websites (
    id int(11) ,
    name char(20) ,
    url varchar(255),
    alexa int(11),
    country char(10)
);

-- 插入數據,每條數據都重複,但僅重複兩次
INSERT INTO websites VALUES 
    ('1', 'Google', '//www.google.cm/', '1', 'USA'), 
    ('2', 'taobao', '//www.taobao.com/', '13', 'CN'), 
    ('3', 'runoob', '//www.runoob.com/', '4689', 'CN'), 
    ('4', 'weibo', '//weibo.com/', '20', 'CN'), 
    ('5', 'Facebook', '//www.facebook.com/', '3', 'USA'),
    ('1', 'Google', '//www.google.cm/', '1', 'USA'), 
    ('2', 'taobao', '//www.taobao.com/', '13', 'CN'), 
    ('3', 'runoob', '//www.runoob.com/', '4689', 'CN'), 
    ('4', 'weibo', '//weibo.com/', '20', 'CN'), 
    ('5', 'Facebook', '//www.facebook.com/', '3', 'USA');

-- 插入數據後查看
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+
| id   | name     | url                       | alexa | country |
+------+----------+---------------------------+-------+---------+
|    1 | Google   | //www.google.cm/    |     1 | USA     |
|    2 | taobao   | //www.taobao.com/   |    13 | CN      |
|    3 | runoob   | //www.runoob.com/    |  4689 | CN      |
|    4 | weibo    | //weibo.com/         |    20 | CN      |
|    5 | Facebook | //www.facebook.com/ |     3 | USA     |
|    1 | Google   | //www.google.cm/    |     1 | USA     |
|    2 | taobao   | //www.taobao.com/   |    13 | CN      |
|    3 | runoob   | //www.runoob.com/    |  4689 | CN      |
|    4 | weibo    | //weibo.com/         |    20 | CN      |
|    5 | Facebook | //www.facebook.com/ |     3 | USA     |
+------+----------+---------------------------+-------+---------+
10 rows in set (0.00 sec)

-- 添加一個自增的主鍵列
mysql> alter table websites add column uid int primary key auto_increment;

-- 添加後查看
mysql> desc websites;
+---------+--------------+------+-----+---------+----------------+
| Field   | Type         | Null | Key | Default | Extra          |
+---------+--------------+------+-----+---------+----------------+
| id      | int          | YES  |     | NULL    |                |
| name    | char(20)     | YES  |     | NULL    |                |
| url     | varchar(255) | YES  |     | NULL    |                |
| alexa   | int          | YES  |     | NULL    |                |
| country | char(10)     | YES  |     | NULL    |                |
| uid     | int          | NO   | PRI | NULL    | auto_increment |
+---------+--------------+------+-----+---------+----------------+

-- 刪除uid大的那條
delete from websites 
    where uid in (
        select uid from 
            (select max(uid) uid,count(id) c_id 
                from websites 
                group by id 
                having c_id>1  -- 防止刪除不重複的數據
                order by c_id desc) t
    );

-- 刪除後查看
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+-----+
| id   | name     | url                       | alexa | country | uid |
+------+----------+---------------------------+-------+---------+-----+
|    1 | Google   | //www.google.cm/    |     1 | USA     |   1 |
|    2 | taobao   | //www.taobao.com/   |    13 | CN      |   2 |
|    3 | runoob   | //www.runoob.com/    |  4689 | CN      |   3 |
|    4 | weibo    | //weibo.com/         |    20 | CN      |   4 |
|    5 | Facebook | //www.facebook.com/ |     3 | USA     |   5 |
+------+----------+---------------------------+-------+---------+-----+

-- 刪除掉新增的主鍵ID
mysql> alter table websites DROP column uid;

-- 刪除後查看
mysql> desc websites;
+---------+--------------+------+-----+---------+-------+
| Field   | Type         | Null | Key | Default | Extra |
+---------+--------------+------+-----+---------+-------+
| id      | int          | YES  |     | NULL    |       |
| name    | char(20)     | YES  |     | NULL    |       |
| url     | varchar(255) | YES  |     | NULL    |       |
| alexa   | int          | YES  |     | NULL    |       |
| country | char(10)     | YES  |     | NULL    |       |
+---------+--------------+------+-----+---------+-------+

2、重複的數據除主鍵外都不一樣

-- 建測試表
CREATE TABLE websites (
    id int(11) ,
    name char(20) ,
    url varchar(255),
    alexa int(11),
    country char(10)
);

-- 插入數據,除了`weibo`和`Facebook`外,
-- 每條數據都重複,但僅重複兩次,主鍵都唯一
INSERT INTO websites VALUES 
    ('1', 'Google', '//www.google.cm/', '1', 'USA'), 
    ('2', 'taobao', '//www.taobao.com/', '13', 'CN'), 
    ('3', 'runoob', '//www.runoob.com/', '4689', 'CN'), 
    ('4', 'weibo', '//weibo.com/', '20', 'CN'), 
    ('5', 'Facebook', '//www.facebook.com/', '3', 'USA'),
    ('6', 'Google', '//www.google.cm/', '1', 'USA'), 
    ('7', 'taobao', '//www.taobao.com/', '13', 'CN'), 
    ('8', 'runoob', '//www.runoob.com/', '4689', 'CN');

-- 插入數據後查看
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+
| id   | name     | url                       | alexa | country |
+------+----------+---------------------------+-------+---------+
|    1 | Google   | //www.google.cm/    |     1 | USA     |
|    2 | taobao   | //www.taobao.com/   |    13 | CN      |
|    3 | runoob   | //www.runoob.com/    |  4689 | CN      |
|    4 | weibo    | //weibo.com/         |    20 | CN      |
|    5 | Facebook | //www.facebook.com/ |     3 | USA     |
|    6 | Google   | //www.google.cm/    |     1 | USA     |
|    7 | taobao   | //www.taobao.com/   |    13 | CN      |
|    8 | runoob   | //www.runoob.com/    |  4689 | CN      |
+------+----------+---------------------------+-------+---------+

-- 刪除id大的那條
delete from websites
    where id in (
        select id from (
            select name,url,alexa,country,max(id) id,count(*) c
                from websites 
                group by name,url,alexa,country
                having c>1
                ) t
     );

-- 刪除後查看
mysql> select * from websites; 
+------+----------+---------------------------+-------+---------+
| id   | name     | url                       | alexa | country |
+------+----------+---------------------------+-------+---------+
|    1 | Google   | //www.google.cm/    |     1 | USA     |
|    2 | taobao   | //www.taobao.com/   |    13 | CN      |
|    3 | runoob   | //www.runoob.com/    |  4689 | CN      |
|    4 | weibo    | //weibo.com/         |    20 | CN      |
|    5 | Facebook | //www.facebook.com/ |     3 | USA     |
+------+----------+---------------------------+-------+---------+

3、重複的數據多於2條

除主鍵外都不一樣 為例

-- 清空數據
mysql> truncate table websites;

-- 插入數據,`Google`項的數據重複了三次。主鍵都唯一
INSERT INTO websites VALUES 
    ('1', 'Google', '//www.google.cm/', '1', 'USA'), 
    ('2', 'taobao', '//www.taobao.com/', '13', 'CN'), 
    ('3', 'runoob', '//www.runoob.com/', '4689', 'CN'), 
    ('4', 'weibo', '//weibo.com/', '20', 'CN'), 
    ('5', 'Facebook', '//www.facebook.com/', '3', 'USA'),
    ('6', 'Google', '//www.google.cm/', '1', 'USA'), 
    ('7', 'Google', '//www.google.cm/', '1', 'USA');

-- 插入後查看
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+
| id   | name     | url                       | alexa | country |
+------+----------+---------------------------+-------+---------+
|    1 | Google   | //www.google.cm/    |     1 | USA     |
|    2 | taobao   | //www.taobao.com/   |    13 | CN      |
|    3 | runoob   | //www.runoob.com/    |  4689 | CN      |
|    4 | weibo    | //weibo.com/         |    20 | CN      |
|    5 | Facebook | //www.facebook.com/ |     3 | USA     |
|    6 | Google   | //www.google.cm/    |     1 | USA     |
|    7 | Google   | //www.google.cm/    |     1 | USA     |
+------+----------+---------------------------+-------+---------+


-- 使用上述方法刪除id大的那條
delete from websites
    where id in (
        select id from (
            select name,url,alexa,country,max(id) id,count(*) c
                from websites 
                group by name,url,alexa,country
                having c>1
                ) t
     );

-- 刪除後查看
-- 還有條重複數據,第6行
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+
| id   | name     | url                       | alexa | country |
+------+----------+---------------------------+-------+---------+
|    1 | Google   | //www.google.cm/    |     1 | USA     |
|    2 | taobao   | //www.taobao.com/   |    13 | CN      |
|    3 | runoob   | //www.runoob.com/    |  4689 | CN      |
|    4 | weibo    | //weibo.com/         |    20 | CN      |
|    5 | Facebook | //www.facebook.com/ |     3 | USA     |
|    6 | Google   | //www.google.cm/    |     1 | USA     |
+------+----------+---------------------------+-------+---------+

-- 重新調整表中數據
-- 清空數據
mysql> truncate table websites;

-- 插入數據,`Google`項的數據重複了三次。主鍵都唯一
INSERT INTO websites VALUES 
    ('1', 'Google', '//www.google.cm/', '1', 'USA'), 
    ('2', 'taobao', '//www.taobao.com/', '13', 'CN'), 
    ('3', 'runoob', '//www.runoob.com/', '4689', 'CN'), 
    ('4', 'weibo', '//weibo.com/', '20', 'CN'), 
    ('5', 'Facebook', '//www.facebook.com/', '3', 'USA'),
    ('6', 'Google', '//www.google.cm/', '1', 'USA'), 
    ('7', 'Google', '//www.google.cm/', '1', 'USA');

-- 調整後查看
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+
| id   | name     | url                       | alexa | country |
+------+----------+---------------------------+-------+---------+
|    1 | Google   | //www.google.cm/    |     1 | USA     |
|    2 | taobao   | //www.taobao.com/   |    13 | CN      |
|    3 | runoob   | //www.runoob.com/    |  4689 | CN      |
|    4 | weibo    | //weibo.com/         |    20 | CN      |
|    5 | Facebook | //www.facebook.com/ |     3 | USA     |
|    6 | Google   | //www.google.cm/    |     1 | USA     |
|    7 | Google   | //www.google.cm/    |     1 | USA     |
+------+----------+---------------------------+-------+---------+

-- 使用如下方法刪除
delete from websites 
    where id not in (
        select id from (
            select name,url,alexa,country,min(id) id 
            from websites 
            group by name,url,alexa,country) t
    );


-- 刪除後查看
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+
| id   | name     | url                       | alexa | country |
+------+----------+---------------------------+-------+---------+
|    1 | Google   | //www.google.cm/    |     1 | USA     |
|    2 | taobao   | //www.taobao.com/   |    13 | CN      |
|    3 | runoob   | //www.runoob.com/    |  4689 | CN      |
|    4 | weibo    | //weibo.com/         |    20 | CN      |
|    5 | Facebook | //www.facebook.com/ |     3 | USA     |
+------+----------+---------------------------+-------+---------+

-- 重新調整表中數據
-- 使用join的方法
delete w1 from websites w1
	join websites w2 on w1.name=w2.name and w1.url=w2.url and w1.alexa=w2.alexa and w1.country=w2.country
	where w1.id>w2.id;

-- 刪除後查看
mysql> select * from websites;
+------+----------+---------------------------+-------+---------+
| id   | name     | url                       | alexa | country |
+------+----------+---------------------------+-------+---------+
|    1 | Google   | //www.google.cm/    |     1 | USA     |
|    2 | taobao   | //www.taobao.com/   |    13 | CN      |
|    3 | runoob   | //www.runoob.com/    |  4689 | CN      |
|    4 | weibo    | //weibo.com/         |    20 | CN      |
|    5 | Facebook | //www.facebook.com/ |     3 | USA     |
+------+----------+---------------------------+-------+---------+
Tags: