(总结速查)
(lp=line pointer:页面中等宽数组,每一个指向页面底部的数据区域)
关键步骤总结(no vacuum场景):
堆栈:
ExecModifyTable
ExecProcNode // ExecProcNode = 0x783005 <ExecIndexScan>
| ExecIndexScan
| ExecScan
| ExecScanFetch
| IndexNext
| index_getnext_slot
| tid = index_getnext_tid // 3.1 总是拿到ip_posid = 130
| index_fetch_heap
| table_index_fetch_tuple(ItemPointer tid) // {ip_posid = 130}
| heapam_index_fetch_tuple
| heap_hot_search_buffer // 3.2 遍历HOT链找旧元组
| heap_page_prune_opt // 3.3 碎片整理
| heap_page_prune
| heap_page_prune_execute
| PageRepairFragmentation
| compactify_tuples
ExecUpdate
ExecUpdateAct
table_tuple_update
heapam_tuple_update
heap_update // 3.4 更新
照常先给出分析用例。
-- 测试表,单页可以放136条数据
drop table testbl;
create table testbl(i int primary key not null, id int not null, info varchar(200) not null);
alter table testbl set (autovacuum_enabled = false);
insert into testbl select generate_series(1,130), (random()*100)::integer, repeat('DUfw',(5)::integer);
select * from testbl limit 10;
i | id | info
----+----+----------------------
1 | 57 | DUfwDUfwDUfwDUfwDUfw
2 | 2 | DUfwDUfwDUfwDUfwDUfw
3 | 29 | DUfwDUfwDUfwDUfwDUfw
4 | 37 | DUfwDUfwDUfwDUfwDUfw
5 | 2 | DUfwDUfwDUfwDUfwDUfw
6 | 44 | DUfwDUfwDUfwDUfwDUfw
7 | 53 | DUfwDUfwDUfwDUfwDUfw
8 | 24 | DUfwDUfwDUfwDUfwDUfw
9 | 49 | DUfwDUfwDUfwDUfwDUfw
10 | 17 | DUfwDUfwDUfwDUfwDUfw
HEAP_ONLY_TUPLE | HEAP_HOT_UPDATED
标记。UPDATE | lp=130 | lp=131 | lp=132 |
---|---|---|---|
第一次 | 原tuple:HEAP_HOT_UPDATED | 新tuple:HEAP_ONLY_TUPLE | |
第二次 | 原tuple:HEAP_ONLY_TUPLE HEAP_HOT_UPDATED | 新tuple:HEAP_ONLY_TUPLE | |
第三次 | 新tuple:HEAP_ONLY_TUPLE | 原tuple:HEAP_ONLY_TUPLE HEAP_HOT_UPDATED | |
第四次 | 原tuple:HEAP_ONLY_TUPLE HEAP_HOT_UPDATED | 新tuple:HEAP_ONLY_TUPLE | |
第五次 | 新tuple:HEAP_ONLY_TUPLE | 原tuple:HEAP_ONLY_TUPLE HEAP_HOT_UPDATED |
数组区域状态(等宽更新,对应上表)
注意uppdate都是使用lp有效的位置,用之前先做碎片整理,把有效的向下移动,填充到删除的地方。然后再insert。
数组区域状态(不等宽更新)
注意第四次更新和第五次更新,新数据更宽了,可以明显看到碎片整理的过程:
SELECT lp, lp_off, t_xmin, t_xmax, t_field3, t_ctid, t_infomask2, t_infomask FROM heap_page_items(get_raw_page('testbl', 0));
lp | lp_off | t_xmin | t_xmax | t_field3 | t_ctid | t_infomask2 | t_infomask
-----+--------+--------+--------+----------+---------+-------------+------------
130 | 912 | 8169 | 0 | 0 | (0,130) | 3 | 2050
============
t_infomask
============
2050(0x802) = HEAP_XMAX_INVALID | HEAP_HASVARWIDTH
============
t_infomask2(11 bits for number of attributes)
============
number_of_ = 3
update testbl set info = 'DDDDDDDDDDDDDDDDDDD1' where i = 130;
SELECT lp, lp_off, t_xmin, t_xmax, t_field3, t_ctid, t_infomask2, t_infomask FROM heap_page_items(get_raw_page('testbl', 0));
lp | lp_off | t_xmin | t_xmax | t_field3 | t_ctid | t_infomask2 | t_infomask
-----+--------+--------+--------+----------+---------+-------------+------------
130 | 912 | 8169 | 8170 | 0 | (0,131) | 16387 | 258
131 | 856 | 8170 | 0 | 0 | (0,131) | 32771 | 10242
============
t_infomask
============
258(0x102) = HEAP_XMIN_COMMITTED | HEAP_HASVARWIDTH
10242(0x2802) = HEAP_UPDATED | HEAP_XMAX_INVALID | HEAP_HASVARWIDTH
============
t_infomask2
============
16387(0x4003) = HEAP_HOT_UPDATED | 3attributes
32771(0x8003) = HEAP_ONLY_TUPLE | 3attributes
update testbl set info = 'DDDDDDDDDDDDDDDDDDD2' where i = 130;
SELECT lp, lp_off, t_xmin, t_xmax, t_field3, t_ctid, t_infomask2, t_infomask FROM heap_page_items(get_raw_page('testbl', 0));
lp | lp_off | t_xmin | t_xmax | t_field3 | t_ctid | t_infomask2 | t_infomask
-----+--------+--------+--------+----------+---------+-------------+------------
130 | 131 | | | | | |
131 | 912 | 8170 | 8171 | 0 | (0,132) | 49155 | 8450
132 | 856 | 8171 | 0 | 0 | (0,132) | 32771 | 10242
============
t_infomask
============
8450(0x2102) = HEAP_UPDATED | HEAP_XMIN_COMMITTED | HEAP_HASVARWIDTH
10242(0x2802) = HEAP_UPDATED | HEAP_XMAX_INVALID | HEAP_HASVARWIDTH
============
t_infomask2
============
49155(0xC003) = HEAP_ONLY_TUPLE | HEAP_HOT_UPDATED | 3attributes
32771(0x8003) = HEAP_ONLY_TUPLE | 3attributes
update testbl set info = 'DDDDDDDDDDDDDDDDDDD3' where i = 130;
SELECT lp, lp_off, t_xmin, t_xmax, t_field3, t_ctid, t_infomask2, t_infomask FROM heap_page_items(get_raw_page('testbl', 0));
lp | lp_off | t_xmin | t_xmax | t_field3 | t_ctid | t_infomask2 | t_infomask
-----+--------+--------+--------+----------+---------+-------------+------------
130 | 132 | | | | | |
131 | 856 | 8172 | 0 | 0 | (0,131) | 32771 | 10242
132 | 912 | 8171 | 8172 | 0 | (0,131) | 49155 | 8450
============
t_infomask
============
10242(0x2802) = HEAP_UPDATED | HEAP_XMAX_INVALID | HEAP_HASVARWIDTH
8450(0x2102) = HEAP_UPDATED | HEAP_XMIN_COMMITTED | HEAP_HASVARWIDTH
============
t_infomask2
============
49155(0xC003) = HEAP_ONLY_TUPLE | HEAP_HOT_UPDATED | 3attributes
32771(0x8003) = HEAP_ONLY_TUPLE | 3attributes
update testbl set info = 'DDDDDDDDDDDDDDDDDDD4' where i = 130;
SELECT lp, lp_off, t_xmin, t_xmax, t_field3, t_ctid, t_infomask2, t_infomask FROM heap_page_items(get_raw_page('testbl', 0));
lp | lp_off | t_xmin | t_xmax | t_field3 | t_ctid | t_infomask2 | t_infomask
-----+--------+--------+--------+----------+---------+-------------+------------
130 | 131 | | | | | |
131 | 912 | 8172 | 8173 | 0 | (0,132) | 49155 | 8450
132 | 856 | 8173 | 0 | 0 | (0,132) | 32771 | 10242
============
t_infomask
============
8450(0x2102) = HEAP_UPDATED | HEAP_XMIN_COMMITTED | HEAP_HASVARWIDTH
10242(0x2802) = HEAP_UPDATED | HEAP_XMAX_INVALID | HEAP_HASVARWIDTH
============
t_infomask2
============
49155(0xC003) = HEAP_ONLY_TUPLE | HEAP_HOT_UPDATED | 3attributes
32771(0x8003) = HEAP_ONLY_TUPLE | 3attributes
update testbl set info = 'DDDDDDDDDDDDDDDDDDD5' where i = 130;
SELECT lp, lp_off, t_xmin, t_xmax, t_field3, t_ctid, t_infomask2, t_infomask FROM heap_page_items(get_raw_page('testbl', 0));
lp | lp_off | t_xmin | t_xmax | t_field3 | t_ctid | t_infomask2 | t_infomask
-----+--------+--------+--------+----------+---------+-------------+------------
130 | 132 | | | | | |
131 | 856 | 8174 | 0 | 0 | (0,131) | 32771 | 10242
132 | 912 | 8173 | 8174 | 0 | (0,131) | 49155 | 8450
============
t_infomask
============
10242(0x2802) = HEAP_UPDATED | HEAP_XMAX_INVALID | HEAP_HASVARWIDTH
8450(0x2102) = HEAP_UPDATED | HEAP_XMIN_COMMITTED | HEAP_HASVARWIDTH
============
t_infomask2
============
49155(0xC003) = HEAP_ONLY_TUPLE | HEAP_HOT_UPDATED | 3attributes
32771(0x8003) = HEAP_ONLY_TUPLE | 3attributes
从顶层ExecModifyTable进入索引扫描部分,因为关掉了VAUUM,索引总是返回130:index_getnext_tid
ExecModifyTable
ExecProcNode // ExecProcNode = 0x783005 <ExecIndexScan>
ExecIndexScan
ExecScan
ExecScanFetch
IndexNext
index_getnext_slot
tid = index_getnext_tid // 3.1 总是拿到ip_posid = 130 <<<<
index_fetch_heap
table_index_fetch_tuple(ItemPointer tid) // {ip_posid = 130}
heapam_index_fetch_tuple
heap_hot_search_buffer // 3.2 遍历HOT链找旧元组
heap_page_prune_opt // 3.3 碎片整理
再用130去找元组:heap_hot_search_buffer
bool
heap_hot_search_buffer(
ItemPointer tid, // {ip_blkid = {bi_hi = 0, bi_lo = 0}, ip_posid = 130}
Relation relation, Buffer buffer,
Snapshot snapshot, HeapTuple heapTuple,
bool *all_dead, bool first_call)
{
...
...
/* Scan through possible multiple members of HOT-chain */
for (;;)
{
ItemId lp;
/* check for bogus TID */
if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(dp))
break;
【第一轮循环读到HOT链130,找到132】
这里比较重要,offnum现在还是130,但是lp拿出来就直接是132了:
ItemIdData = {lp_off = 132, lp_flags = 2, lp_len = 0}
这里的lp_flags=2表示LP_REDIRECT,重定向到132。
#define LP_UNUSED 0 /* unused (should always have lp_len=0) */
#define LP_NORMAL 1 /* used (should always have lp_len>0) */
#define LP_REDIRECT 2 /* HOT redirect (should have lp_len=0) */
#define LP_DEAD 3 /* dead, may or may not have storage */
【第二轮循环读到HOT链132,找到数据位置】
第二次进入循环后,offnum=132拿到的lp:
ItemIdData = {lp_off = 912, lp_flags = 1, lp_len = 53}
(LP_NORMAL)
现在的912就是指向数据区域了。
lp = PageGetItemId(dp, offnum);
/* check for unused, dead, or redirected items */
if (!ItemIdIsNormal(lp))
{
/* We should only see a redirect at start of chain */
if (ItemIdIsRedirected(lp) && at_chain_start)
{
/* Follow the redirect */
offnum = ItemIdGetRedirect(lp);
使用lp=132拿到offnum=132,继续下一轮循环继续遍历HOT链。
at_chain_start = false;
continue;
}
/* else must be end of chain */
break;
}
遍历完HOT链出循环,开始读132的数据部分,拼接元组。
heapTuple->t_data = (HeapTupleHeader) PageGetItem(dp, lp);
heapTuple->t_len = ItemIdGetLength(lp);
heapTuple->t_tableOid = RelationGetRelid(relation);
ItemPointerSet(&heapTuple->t_self, blkno, offnum);
if (!skip)
{
/* If it's visible per the snapshot, we must return it */
valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
HeapCheckForSerializableConflictOut(valid, relation, heapTuple,
buffer, snapshot);
if (valid)
{
ItemPointerSetOffsetNumber(tid, offnum);
PredicateLockTID(relation, &heapTuple->t_self, snapshot,
HeapTupleHeaderGetXmin(heapTuple->t_data));
if (all_dead)
*all_dead = false;
找到132返回。
return true;
}
}
skip = false;
将132移动到131的位置上,因为131删掉已经是空洞了。
这里就不展开分析了。记录下函数堆栈。
ExecModifyTable
ExecProcNode // ExecProcNode = 0x783005 <ExecIndexScan>
ExecIndexScan
ExecScan
ExecScanFetch
IndexNext
index_getnext_slot
tid = index_getnext_tid // 总是拿到ip_posid = 130 <<<<
index_fetch_heap
table_index_fetch_tuple(ItemPointer tid) // {ip_posid = 130}
heapam_index_fetch_tuple
heap_hot_search_buffer // 3.2 遍历HOT链找旧元组
heap_page_prune_opt // 3.3 碎片整理
heap_page_prune
heap_page_prune_execute
PageRepairFragmentation
compactify_tuples
ExecModifyTable
ExecProcNode // ExecProcNode = 0x783005 <ExecIndexScan>
| ExecIndexScan
| ExecScan
| ExecScanFetch
| IndexNext
| index_getnext_slot
| tid = index_getnext_tid // 总是拿到ip_posid = 130 <<<<
| index_fetch_heap
| table_index_fetch_tuple(ItemPointer tid) // {ip_posid = 130}
| heapam_index_fetch_tuple
| heap_hot_search_buffer // 3.2 遍历HOT链找旧元组
| heap_page_prune_opt // 3.3 碎片整理
| heap_page_prune
| heap_page_prune_execute
| PageRepairFragmentation
| compactify_tuples
ExecUpdate
ExecUpdateAct
table_tuple_update
heapam_tuple_update
heap_update
ItemId = {lp_off = 912, lp_flags = 1, lp_len = 53}
lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid));
HeapTupleData = {t_len = 53, t_self = {ip_blkid = {bi_hi = 0, bi_lo = 0}, ip_posid = 132}, t_tableOid = 32946, t_data = 0x2aaaab4ae610}
HeapTupleData = {t_len = 53, t_self = {ip_blkid = {bi_hi = 65535, bi_lo = 65535}, ip_posid = 0}, t_tableOid = 32946, t_data = 0x26fe440}
oldtup.t_tableOid = RelationGetRelid(relation);
oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
oldtup.t_len = ItemIdGetLength(lp);
oldtup.t_self = *otid;
newtup->t_tableOid = RelationGetRelid(relation);
result = TM_Ok
result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer);
旧的加:HEAP_HOT_UPDATED 新的加:HEAP_ONLY_TUPLE
1: *newtup = {t_len = 53, t_self = {ip_blkid = {bi_hi = 65535, bi_lo = 65535}, ip_posid = 0}, t_tableOid = 32946, t_data = 0x26fe440}
3: oldtup = {t_len = 53, t_self = {ip_blkid = {bi_hi = 0, bi_lo = 0}, ip_posid = 132}, t_tableOid = 32946, t_data = 0x2aaaab4ae610}
RelationPutHeapTuple执行insert操作,需要先找到插入位置。
RelationPutHeapTuple
PageAddItem
核心逻辑(重要)
for (offsetNumber = FirstOffsetNumber;
offsetNumber < limit; /* limit is maxoff+1 */
offsetNumber++)
{
itemId = PageGetItemId(phdr, offsetNumber);
/*
* We check for no storage as well, just to be paranoid;
* unused items should never have storage. Assert() that the
* invariant is respected too.
*/
Assert(ItemIdIsUsed(itemId) || !ItemIdHasStorage(itemId));
if (!ItemIdIsUsed(itemId) && !ItemIdHasStorage(itemId))
break;
}