对于MySQL表,如果自增ID不是主键时,是否可以用来做增量查询?
需要按照自增ID字段进行增量查询,有些表的自增ID是主键,而有些表的自增只是普通索引,有些采用MyISAM,有些采用InnoDB。
如果采用粗暴的“SELECT * FROM table WHERE f_id>M ORDER BY f_id LIMIT N”,功能上没有任何问题,但当表的记录数很大时(比如1000万条),“ORDER BY f_id”会极影响查询效率。为此,需要弄清楚“SELECT * FROM table WHERE f_id>M LIMIT N”的可行性,即增量查询时,不指定“ORDER BY f_id”。
研究基于的MySQL(注:5.6.7之前最大分区数限制为1024,从5.6.7开始调整为8192,另外5.6版本分区表不支持HANDLER):
MySQL [test]> select version();
+-----------+
| version() |
+-----------+
| 5.7.18 |
+-----------+
1 row in set (0.01 sec)
建表SQL语句:
DROP TABLE IF EXISTS `tableA1`;
CREATE TABLE `tableA1` (
`id` BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY,
`af` INT NOT NULL,
`bf` INT NOT NULL,
`cf` INT NOT NULL,
INDEX `idx_af` (`af`),
INDEX `idx_bf` (`bf`)
)ENGINE=InnoDB;
依顺序执行下列插入操作:
INSERT INTO tableA1 (af,bf,cf) VALUES (1,2,1);
INSERT INTO tableA1 (af,bf,cf) VALUES (2,1,2);
INSERT INTO tableA1 (id,af,bf,cf) VALUES (11,12,11,11);
INSERT INTO tableA1 (id,af,bf,cf) VALUES (12,11,12,12);
INSERT INTO tableA1 (af,bf,cf) VALUES (13,16,13);
INSERT INTO tableA1 (id,af,bf,cf) VALUES (3,3,3,3);
INSERT INTO tableA1 (af,bf,cf) VALUES (14,17,14);
INSERT INTO tableA1 (id,af,bf,cf) VALUES (5,15,5,5);
查看结果:
// 按自增ID有序(自增ID为主键)
MySQL [test]> SELECT * FROM tableA1;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 1 | 1 | 2 | 1 |
| 2 | 2 | 1 | 2 |
| 3 | 3 | 3 | 3 |
| 5 | 15 | 5 | 5 |
| 11 | 12 | 11 | 11 |
| 12 | 11 | 12 | 12 |
| 13 | 13 | 16 | 13 |
| 14 | 14 | 17 | 14 |
+----+----+----+----+
8 rows in set (0.00 sec)
// 按自增ID有序(自增ID为主键)
MySQL [test]> SELECT * FROM tableA1 WHERE id>=1 LIMIT 10;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 1 | 1 | 2 | 1 |
| 2 | 2 | 1 | 2 |
| 3 | 3 | 3 | 3 |
| 5 | 15 | 5 | 5 |
| 11 | 12 | 11 | 11 |
| 12 | 11 | 12 | 12 |
| 13 | 13 | 16 | 13 |
| 14 | 14 | 17 | 14 |
+----+----+----+----+
8 rows in set (0.00 sec)
// 按自增ID有序(自增ID为主键)
MySQL [test]> SELECT * FROM tableA1 WHERE id>=2 LIMIT 10;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 2 | 2 | 1 | 2 |
| 3 | 3 | 3 | 3 |
| 5 | 15 | 5 | 5 |
| 11 | 12 | 11 | 11 |
| 12 | 11 | 12 | 12 |
| 13 | 13 | 16 | 13 |
| 14 | 14 | 17 | 14 |
+----+----+----+----+
7 rows in set (0.00 sec)
// 按自增ID有序(自增ID为主键)
MySQL [test]> SELECT * FROM tableA1 WHERE id>=2 LIMIT 7;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 2 | 2 | 1 | 2 |
| 3 | 3 | 3 | 3 |
| 5 | 15 | 5 | 5 |
| 11 | 12 | 11 | 11 |
| 12 | 11 | 12 | 12 |
| 13 | 13 | 16 | 13 |
| 14 | 14 | 17 | 14 |
+----+----+----+----+
7 rows in set (0.00 sec)
可以看到,当自增ID为主键时,自增ID乱序插入,查询结果也是按自增ID有序(实测有序插入一样有序),因此可以放心依自增ID增量查询,而不必指定“ORDER BY f_id”。
DROP TABLE IF EXISTS `tableA2`;
CREATE TABLE `tableA2` (
`id` BIGINT NOT NULL AUTO_INCREMENT,
`af` INT NOT NULL,
`bf` INT NOT NULL,
`cf` INT NOT NULL,
UNIQUE INDEX `idx_af` (`af`),
INDEX `idx_id` (`id`),
INDEX `idx_bf` (`bf`)
)ENGINE=InnoDB;
依顺序执行下列插入操作:
INSERT INTO tableA2 (af,bf,cf) VALUES (1,2,1);
INSERT INTO tableA2 (af,bf,cf) VALUES (2,1,2);
INSERT INTO tableA2 (id,af,bf,cf) VALUES (11,12,11,11);
INSERT INTO tableA2 (id,af,bf,cf) VALUES (12,11,12,12);
INSERT INTO tableA2 (af,bf,cf) VALUES (13,16,13);
INSERT INTO tableA2 (id,af,bf,cf) VALUES (3,3,3,3);
INSERT INTO tableA2 (af,bf,cf) VALUES (14,17,14);
INSERT INTO tableA2 (id,af,bf,cf) VALUES (5,15,5,5);
查看结果:
// 总共8条记录
MySQL [test]> SELECT COUNT(1) FROM tableA2;
+----------+
| COUNT(1) |
+----------+
| 8 |
+----------+
1 row in set (0.00 sec)
// 按自增ID无序,但按唯一索引有序
MySQL [test]> SELECT * FROM tableA2;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 1 | 1 | 2 | 1 |
| 2 | 2 | 1 | 2 |
| 3 | 3 | 3 | 3 |
| 12 | 11 | 12 | 12 |
| 11 | 12 | 11 | 11 |
| 13 | 13 | 16 | 13 |
| 14 | 14 | 17 | 14 |
| 5 | 15 | 5 | 5 |
+----+----+----+----+
8 rows in set (0.00 sec)
// 按自增ID无序,但按唯一索引有序
MySQL [test]> SELECT * FROM tableA2 WHERE id>=1 LIMIT 10;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 1 | 1 | 2 | 1 |
| 2 | 2 | 1 | 2 |
| 3 | 3 | 3 | 3 |
| 12 | 11 | 12 | 12 |
| 11 | 12 | 11 | 11 |
| 13 | 13 | 16 | 13 |
| 14 | 14 | 17 | 14 |
| 5 | 15 | 5 | 5 |
+----+----+----+----+
8 rows in set (0.00 sec)
// 按自增ID无序,但按唯一索引有序
MySQL [test]> SELECT * FROM tableA2 WHERE id>=2 LIMIT 10;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 2 | 2 | 1 | 2 |
| 3 | 3 | 3 | 3 |
| 12 | 11 | 12 | 12 |
| 11 | 12 | 11 | 11 |
| 13 | 13 | 16 | 13 |
| 14 | 14 | 17 | 14 |
| 5 | 15 | 5 | 5 |
+----+----+----+----+
7 rows in set (0.00 sec)
// 按自增ID有序,但按唯一索引无序(LIMIT数小于表总记录数)
MySQL [test]> SELECT * FROM tableA2 WHERE id>=2 LIMIT 5;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 2 | 2 | 1 | 2 |
| 3 | 3 | 3 | 3 |
| 5 | 15 | 5 | 5 |
| 11 | 12 | 11 | 11 |
| 12 | 11 | 12 | 12 |
+----+----+----+----+
5 rows in set (0.00 sec)
// 按自增ID有序,但按唯一索引无序(LIMIT数小于表总记录数)
MySQL [test]> SELECT * FROM tableA2 WHERE id>=1 LIMIT 7;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 1 | 1 | 2 | 1 |
| 2 | 2 | 1 | 2 |
| 3 | 3 | 3 | 3 |
| 5 | 15 | 5 | 5 |
| 11 | 12 | 11 | 11 |
| 12 | 11 | 12 | 12 |
| 13 | 13 | 16 | 13 |
+----+----+----+----+
7 rows in set (0.00 sec)
// 更新一条记录
MySQL [test]> UPDATE tableA2 SET id=15 WHERE id=12;
Query OK, 1 row affected (0.00 sec)
Rows matched: 1 Changed: 1 Warnings: 0
// 按自增ID是无序的
MySQL [test]> SELECT * FROM tableA2 LIMIT 7;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 1 | 1 | 2 | 1 |
| 2 | 2 | 1 | 2 |
| 3 | 3 | 3 | 3 |
| 15 | 11 | 12 | 12 |
| 11 | 12 | 11 | 11 |
| 13 | 13 | 16 | 13 |
| 14 | 14 | 17 | 14 |
+----+----+----+----+
7 rows in set (0.00 sec)
// 按自增ID是有序的(LIMIT数小于表记录数)
// 按唯一自增ID无序
MySQL [test]> SELECT * FROM tableA2 WHERE id>=1 LIMIT 7;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 1 | 1 | 2 | 1 |
| 2 | 2 | 1 | 2 |
| 3 | 3 | 3 | 3 |
| 5 | 15 | 5 | 5 |
| 11 | 12 | 11 | 11 |
| 13 | 13 | 16 | 13 |
| 14 | 14 | 17 | 14 |
+----+----+----+----+
7 rows in set (0.00 sec)
// 按自增ID是无序的(LIMIT数等于或大于表记录数)
// 按唯一自增ID有序
MySQL [test]> SELECT * FROM tableA2 WHERE id>=1 LIMIT 8;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 1 | 1 | 2 | 1 |
| 2 | 2 | 1 | 2 |
| 3 | 3 | 3 | 3 |
| 15 | 11 | 12 | 12 |
| 11 | 12 | 11 | 11 |
| 13 | 13 | 16 | 13 |
| 14 | 14 | 17 | 14 |
| 5 | 15 | 5 | 5 |
+----+----+----+----+
8 rows in set (0.00 sec)
从测试可以看到,当LIMIT的数小于表的记录数时,结果是按自增ID有序返回。
InnoDB存储数据时,即按B+树结果存储,B+树的叶子结果保存完整的记录,表文件本身即为主索引(即主键),普通索引并不直接指向数据,而是指向了主索引。
如对于表tableA2(tableA1结果相同):
INSERT INTO tableA2 (id,af,bf,cf) VALUES (1,1,1,1);
INSERT INTO tableA2 (id,af,bf,cf) VALUES (2,5,1,1);
INSERT INTO tableA2 (id,af,bf,cf) VALUES (3,2,1,1);
INSERT INTO tableA2 (id,af,bf,cf) VALUES (4,8,1,1);
INSERT INTO tableA2 (id,af,bf,cf) VALUES (5,3,1,1);
INSERT INTO tableA2 (id,af,bf,cf) VALUES (6,4,1,1);
INSERT INTO tableA2 (id,af,bf,cf) VALUES (7,7,1,1);
MySQL [test]> SELECT * FROM tableA2 WHERE af>0 LIMIT 3;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 1 | 1 | 1 | 1 |
| 3 | 2 | 1 | 1 |
| 5 | 3 | 1 | 1 |
+----+----+----+----+
3 rows in set (0.00 sec)
MySQL [test]> SELECT * FROM tableA2 WHERE af>0 LIMIT 10;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 1 | 1 | 1 | 1 |
| 3 | 2 | 1 | 1 |
| 5 | 3 | 1 | 1 |
| 6 | 4 | 1 | 1 |
| 2 | 5 | 1 | 1 |
| 7 | 7 | 1 | 1 |
| 4 | 8 | 1 | 1 |
+----+----+----+----+
7 rows in set (0.00 sec)
建表SQL语句:
DROP TABLE IF EXISTS `tableB1`;
CREATE TABLE `tableB1` (
`id` BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY,
`af` INT NOT NULL,
`bf` INT NOT NULL,
`cf` INT NOT NULL,
INDEX `idx_id` (`id`),
INDEX `idx_bf` (`bf`)
)ENGINE=MyISAM;
依顺序执行下列插入操作:
INSERT INTO tableB1 (af,bf,cf) VALUES (1,2,1);
INSERT INTO tableB1 (af,bf,cf) VALUES (2,1,2);
INSERT INTO tableB1 (id,af,bf,cf) VALUES (11,12,11,11);
INSERT INTO tableB1 (id,af,bf,cf) VALUES (12,11,12,12);
INSERT INTO tableB1 (af,bf,cf) VALUES (13,16,13);
INSERT INTO tableB1 (id,af,bf,cf) VALUES (3,3,3,3);
INSERT INTO tableB1 (af,bf,cf) VALUES (14,17,14);
INSERT INTO tableB1 (id,af,bf,cf) VALUES (5,15,5,5);
查看结果:
// 乱序
MySQL [test]> SELECT * FROM tableB1;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 1 | 1 | 1 | 1 |
| 2 | 2 | 2 | 2 |
| 11 | 11 | 11 | 11 |
| 12 | 12 | 12 | 12 |
| 13 | 13 | 13 | 13 |
| 3 | 3 | 3 | 3 |
| 14 | 14 | 14 | 14 |
| 5 | 5 | 5 | 5 |
+----+----+----+----+
8 rows in set (0.00 sec)
// 乱序了
MySQL [test]> SELECT * FROM tableB1 WHERE id>1 LIMIT 10;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 2 | 2 | 2 | 2 |
| 11 | 11 | 11 | 11 |
| 12 | 12 | 12 | 12 |
| 13 | 13 | 13 | 13 |
| 3 | 3 | 3 | 3 |
| 14 | 14 | 14 | 14 |
| 5 | 5 | 5 | 5 |
+----+----+----+----+
7 rows in set (0.00 sec)
可以看到,结果并不是按自增ID有序,但是否意味着不能用来做增量查询了?继续看下面的操作:
MySQL [test]> SELECT COUNT(1) FROM tableB1;
+----------+
| COUNT(1) |
+----------+
| 8 |
+----------+
1 row in set (0.00 sec)
MySQL [test]> SELECT * FROM tableB1 WHERE id>0 LIMIT 3;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 1 | 1 | 1 | 1 |
| 2 | 2 | 2 | 2 |
| 3 | 3 | 3 | 3 |
+----+----+----+----+
3 rows in set (0.01 sec)
// 未乱序
MySQL [test]> SELECT * FROM tableB1 WHERE id>0 LIMIT 7;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 1 | 1 | 1 | 1 |
| 2 | 2 | 2 | 2 |
| 3 | 3 | 3 | 3 |
| 5 | 5 | 5 | 5 |
| 11 | 11 | 11 | 11 |
| 12 | 12 | 12 | 12 |
| 13 | 13 | 13 | 13 |
+----+----+----+----+
7 rows in set (0.00 sec)
// 乱序
MySQL [test]> SELECT * FROM tableB1 WHERE id>0 LIMIT 8;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 1 | 1 | 1 | 1 |
| 2 | 2 | 2 | 2 |
| 11 | 11 | 11 | 11 |
| 12 | 12 | 12 | 12 |
| 13 | 13 | 13 | 13 |
| 3 | 3 | 3 | 3 |
| 14 | 14 | 14 | 14 |
| 5 | 5 | 5 | 5 |
+----+----+----+----+
8 rows in set (0.00 sec)
MySQL [jay_data]> SELECT * FROM tableB1 WHERE id>6 LIMIT 3;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 11 | 11 | 11 | 11 |
| 12 | 12 | 12 | 12 |
| 13 | 13 | 13 | 13 |
+----+----+----+----+
3 rows in set (0.01 sec)
MySQL [jay_data]> SELECT * FROM tableB1 WHERE id>8 LIMIT 3;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 11 | 11 | 11 | 11 |
| 12 | 12 | 12 | 12 |
| 13 | 13 | 13 | 13 |
+----+----+----+----+
3 rows in set (0.00 sec)
这里发生了有趣的事,如果LIMIT指定的数小于表实际的记录数,则仍然是按ID有序,否则是ID是乱序的。但是实际遇到:即使LIMIT指定的数小于表实际的记录数,也会返回乱序的结果。
建表SQL语句:
DROP TABLE IF EXISTS `tableB2`;
CREATE TABLE `tableB2` (
`id` BIGINT NOT NULL AUTO_INCREMENT,
`af` INT NOT NULL,
`bf` INT NOT NULL,
`cf` INT NOT NULL,
UNIQUE INDEX `idx_af` (`af`),
INDEX `idx_id` (`id`),
INDEX `idx_bf` (`bf`)
)ENGINE=MyISAM;
依顺序执行下列插入操作:
INSERT INTO tableB2 (af,bf,cf) VALUES (1,2,1);
INSERT INTO tableB2 (af,bf,cf) VALUES (2,1,2);
INSERT INTO tableB2 (id,af,bf,cf) VALUES (11,12,11,11);
INSERT INTO tableB2 (id,af,bf,cf) VALUES (12,11,12,12);
INSERT INTO tableB2 (af,bf,cf) VALUES (13,16,13);
INSERT INTO tableB2 (id,af,bf,cf) VALUES (3,3,3,3);
INSERT INTO tableB2 (af,bf,cf) VALUES (14,17,14);
INSERT INTO tableB2 (id,af,bf,cf) VALUES (5,15,5,5);
查看结果:
// 乱序
MySQL [test]> SELECT * FROM tableB2;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 1 | 1 | 1 | 1 |
| 2 | 2 | 2 | 2 |
| 11 | 11 | 11 | 11 |
| 12 | 12 | 12 | 12 |
| 13 | 13 | 13 | 13 |
| 3 | 3 | 3 | 3 |
| 14 | 14 | 14 | 14 |
| 5 | 5 | 5 | 5 |
+----+----+----+----+
8 rows in set (0.00 sec)
自增ID不影响查询结果的顺序,继续看下面的操作:
MySQL [test]> SELECT COUNT(1) FROM tableB2;
+----------+
| COUNT(1) |
+----------+
| 8 |
+----------+
1 row in set (0.01 sec)
MySQL [test]> SELECT * FROM tableB2 WHERE id>0 LIMIT 3;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 1 | 1 | 1 | 1 |
| 2 | 2 | 2 | 2 |
| 3 | 3 | 3 | 3 |
+----+----+----+----+
3 rows in set (0.00 sec)
// 未乱序
MySQL [test]> SELECT * FROM tableB2 WHERE id>0 LIMIT 7;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 1 | 1 | 1 | 1 |
| 2 | 2 | 2 | 2 |
| 3 | 3 | 3 | 3 |
| 5 | 5 | 5 | 5 |
| 11 | 11 | 11 | 11 |
| 12 | 12 | 12 | 12 |
| 13 | 13 | 13 | 13 |
+----+----+----+----+
7 rows in set (0.00 sec)
// 乱序
MySQL [test]> SELECT * FROM tableB2 WHERE id>0 LIMIT 8;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 1 | 1 | 1 | 1 |
| 2 | 2 | 2 | 2 |
| 11 | 11 | 11 | 11 |
| 12 | 12 | 12 | 12 |
| 13 | 13 | 13 | 13 |
| 3 | 3 | 3 | 3 |
| 14 | 14 | 14 | 14 |
| 5 | 5 | 5 | 5 |
+----+----+----+----+
8 rows in set (0.00 sec)
MySQL [jay_data]> SELECT * FROM tableB2 WHERE id>6 LIMIT 3;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 11 | 11 | 11 | 11 |
| 12 | 12 | 12 | 12 |
| 13 | 13 | 13 | 13 |
+----+----+----+----+
3 rows in set (0.01 sec)
MySQL [jay_data]> SELECT * FROM tableB2 WHERE id>8 LIMIT 3;
+----+----+----+----+
| id | af | bf | cf |
+----+----+----+----+
| 11 | 11 | 11 | 11 |
| 12 | 12 | 12 | 12 |
| 13 | 13 | 13 | 13 |
+----+----+----+----+
3 rows in set (0.00 sec)
现象和自增ID为主键时完全相同。
MyISAM的索引也是B+树结构,但索引文件和数据文件分开存储在不同文件。如果LIMIT的值达到或超过表的总记录数,则查询直接扫描数据文件,因此如果不指定“ORDER BY f_id”,则返回结果和插入顺序一致。但如果LIMIT的值小于表的总记录数,则和InnoDB一样扫描索引,因此可以不指定“ORDER BY f_id”。MyISAM的主键(主索引)和普通索引没有本质区别,只是主键有唯一性约束,而普通索引可重复。
实际情况会更复杂,比如有修改有删除,这些都需要是一步测试,甚至可能和版本相关。即使是聚集索引,不指定“ORDER BY f_id”,也没法保证顺序。如果对数据没有严格的要求,可以考虑不指定“ORDER BY f_id”,但如果必须不多不少,则必须带上“ORDER BY f_id”,不管是InnoDB还是MyISAM,也不管自增ID是主键还是非主键。但是对于一张大表,加上“ORDER BY f_id”后的查询性能可能降低一个甚至更多数量级。谨记:MySQL没有默认顺序这个概念。
在使用“ORDER BY f_id”时,请指定f_id的上下限,这样能够保证较好的性能,比如:“WHERE f_id>=N AND f_id<=M”,否则如果只有上限或下限,性能可能会受到很大影响,建议用EXPLAIN了解详情。
如果实在不想用ORDER BY f_id”,还可以考虑如下方式(query可能返回空可能是扫描完了,也可能是该段是空隙无数据):
const int step = 1000;
while (true)
{
const std::string& sql =
format_string(
"SELECT f_id,f_a,f_b,f_c,f_d FROM table "
"WHERE f_id BETWEEN %u AND (%u+step)",
id, id);
mysql.query(sql);
id += step + 1;
}