之前讲了mysql的binlog,redo log, 也该轮到ibd文件了...
版本: mysql 8.0 (附的python源码都尽量标注了源码位置)
innodb_file_per_table = ON
innodb_page_size = 16384
每个innodb表一个数据文件, 数据和索引都放在同一个文件的(ibd)
每个ibd文件包含1个(不考虑ibdata)表空间(一张表), 每个表空间包含若干个segment. 每个segment对应一个索引的叶子节点/非叶子节点. 也就是每2个segment对于一个索引. 每个segment对于n个区(空间分配是按照区来的). 每个区(extent)对于n个page. 为了方便管理区, 每256个区会使用一个page(XDES:EXTENT DESCRIPTOR)来记录相关信息. (是不是都晕了... 不慌,后面有图)
page和区的计算方式如下:
page size | file space extent size
----------+-----------------------
4 KiB | 256 pages = 1 MiB
8 KiB | 128 pages = 1 MiB
16 KiB | 64 pages = 1 MiB
32 KiB | 64 pages = 2 MiB
64 KiB | 64 pages = 4 MiB
整理了一部分, 差不多就像下面这样
innodb页结构如下
名字 | 大小(字节) | 描述 |
---|---|---|
FIL_HEADER | 38 | 记录页类型,之类的信息 |
BODY | 不同的页结构不一样 | |
FIL_TRAILER | 8(结尾) | 校验和(4)+LSN(4) |
下面就看看常见的页
就是FIL_SPACE_PAGE
第一页, 记录表空间信息, 比如页数量, 使用数量, 未使用数量,碎片页等. 同时还作为XDES记录前256个区的信息. 结构如下
名字 | 大小 | 描述 |
---|---|---|
FIL_HEADER | 38 | 页头 |
SPACE_HEADER | 112 | 这个页的页头... |
XDES | 256*40 | 区描述信息, 每个描述信息40字节,一共256个区 |
FIL_TRAILER | 8 |
剩余的都是空着的
SPACE_HEADER的详细信息可以看上面的图, 画得还是很详细的.
这个页是记录segment的, 就是记录索引的, 是第三个页. 结构如下
名字 | 大小 | 描述 |
---|---|---|
FIL_HEADER | 38 | |
INODE INFO | 12 | 其它inode信息(一个inode可能不够用) |
SEGMENT | 192*n | 索引信息, 一个索引2个sgement. 8.0里面前2个segment是记录的sdi |
FIL_TRAILER | 8 |
基本上解析这两页就能得到ibd文件的大部分信息了.
索引页. mysql的主键索引记录了所有字段数据, 二级索引记录了索引值和主键值. (没得主键的,mysql会自动加个隐藏字段作为主键)
名字 | 大小(字节) | 描述 |
---|---|---|
FIL_HEADER | 38 | |
INDEX PAGE HEADER | 56 | 页信息, 比如page_level |
PAGE DATA | 包含最大值最小和用户记录值 | |
PAGE DIRECTORY | 页内目录信息,slot | |
FIL_TRAILER | 8 |
每256MB一个, 记录区相关信息
FIL_HEADER | 38 | |
XDES 0 | 40 | 区描述信息 |
.... | 40*n | |
XDES 255 | 40 | |
FIL_TRAILER | 8 |
import innodb_file
filename = '/data/mysql_3314/mysqldata/db1/ddcw2023_1.ibd'
aa = innodb_file.innodb_ibd(filename)
page_summary = aa.page_summary()
for x in page_summary:
print(x,page_summary[x])
import innodb_file
filename = '/data/mysql_3314/mysqldata/db1/ddcw2023_1.ibd'
page_size = 16384
with open(filename,'rb') as f:
data1 = f.read(page_size)
header = innodb_file.fil_header(data1[:38])
print(header)
space_header = innodb_file.space_header(data1[38:38+112])
print(space_header)
得到表空间ID为85, 表大小为764MB, 我们区数据库验证下
对的上, 没毛病
import innodb_file
filename = '/data/mysql_3314/mysqldata/db1/ddcw2023_1.ibd'
page_size = 16384
with open(filename,'rb') as f:
f.seek(page_size*2,0)
data1 = f.read(page_size)
header = innodb_file.fil_header(data1[:38])
print(header)
inode_info = innodb_file.inode(data1[38:16384-8])
print(inode_info)
print(inode_info.index)
print(inode_info.sdi_page)
发现有两个索引, 我们去数据库端验证下
发现确实有两个索引
索引页下章再讲,毕竟有点多.... 你也可以自己去解析看看
1. innodbl数据大小限制为 (2**32)*page_size = 64TB (page_id是4字节)
2. ibd文件是按照区来分配内存的, 所以ibd文件一定是区的整数倍(碎片页(32)使用完之后会扩展至一个区大小)
#解析innodb 文件的 (8.0)
import struct
import innodb_page_type
innodb_page_name = {}
for x in dir(innodb_page_type):
if x[:2] != '__':
innodb_page_name[getattr(innodb_page_type,x)] = x
#FIL_PAGE_DATA = 38
class fil_header(object):
def __init__(self,bdata):
if len(bdata) != 38:
return False
self.FIL_PAGE_SPACE_OR_CHKSUM, self.FIL_PAGE_OFFSET, self.FIL_PAGE_PREV, self.FIL_PAGE_NEXT, self.FIL_PAGE_LSN, self.FIL_PAGE_TYPE, self.FIL_PAGE_FILE_FLUSH_LSN = struct.unpack('>4LQHQ',bdata[:34])
if self.FIL_PAGE_TYPE == innodb_page_type.FIL_PAGE_COMPRESSED:
pass #懒得管了....
self.FIL_PAGE_SPACE_ID = struct.unpack('>L',bdata[34:38])[0]
def __str__(self):
return f'PAGE_SPACE_ID:{self.FIL_PAGE_SPACE_ID} PAGE_TYPE:{innodb_page_name[self.FIL_PAGE_TYPE]} PREV:{self.FIL_PAGE_PREV} NEXT:{self.FIL_PAGE_NEXT}'
#8
class fil_trailer(object):
def __init__(self,bdata):
self.checksum, self.FIL_PAGE_LSN = struct.unpack('>2L',bdata[:8])
def __str__(self):
return f'CHECKSUM:{self.checksum} PAGE_LSN:{self.FIL_PAGE_LSN}'
#index page header
# uint32_t PAGE_HEADER = FSEG_PAGE_DATA;
# size : 36 + 2 * FSEG_HEADER_SIZE = 56
class page_header(object):
def __init__(self,bdata):
self.PAGE_N_DIR_SLOTS, self.PAGE_HEAP_TOP, self.PAGE_N_HEAP, self.PAGE_FREE, self.PAGE_GARBAGE, self.PAGE_LAST_INSERT, self.PAGE_DIRECTION, self.PAGE_N_DIRECTION, self.PAGE_N_RECS, self.PAGE_MAX_TRX_ID, self.PAGE_LEVEL, self.PAGE_INDEX_ID = struct.unpack('<9HQHQ',bdata[:36])
self.PAGE_BTR_SEG_LEAF = fseg_header(bdata[36:46])
self.PAGE_BTR_SEG_TOP = fseg_header(bdata[46:56])
def __str__(self):
return f'SLOTS:{self.PAGE_N_DIR_SLOTS} PAGE_LEVEL:{self.PAGE_LEVEL} INDEX_ID:{self.PAGE_INDEX_ID} RECORDS:{self.PAGE_N_RECS} PAGE_HEAP_TOP:{self.PAGE_HEAP_TOP} PAGE_GARBAGE(deleted):{self.PAGE_GARBAGE} PAGE_FREE:{self.PAGE_FREE}'
#Offset of the directory start down from the page end
class page_directory(object):
def __init__(self,bdata):
pass
def __str__(self):
pass
#FIL_PAGE_TYPE_FSP_HDR 第一个page
#FSP_HEADER_SIZE = 32 + 5 * FLST_BASE_NODE_SIZE = 32+5*16 = 112
class space_header(object):
def __init__(self,bdata,):
FLST_BASE_NODE_SIZE = 16
self.FSP_SPACE_ID, self.FSP_NOT_USED, self.FSP_SIZE, self.FSP_FREE_LIMIT, self.FSP_SPACE_FLAGS, self.FSP_FRAG_N_USED = struct.unpack('>6L',bdata[:24])
i = 24
self.FSP_FREE = bdata[i:i+FLST_BASE_NODE_SIZE]
i += FLST_BASE_NODE_SIZE
self.FSP_FREE_FRAG = bdata[i:i+FLST_BASE_NODE_SIZE]
i += FLST_BASE_NODE_SIZE
self.FSP_FULL_FRAG= bdata[i:i+FLST_BASE_NODE_SIZE]
i += FLST_BASE_NODE_SIZE
self.FSP_SEG_ID = bdata[i:i+8] #/** 8 bytes which give the first unused segment id */
i += 8
self.FSP_SEG_INODES_FULL = bdata[i:i+FLST_BASE_NODE_SIZE]
i += FLST_BASE_NODE_SIZE
self.FSP_SEG_INODES_FREE = bdata[i:i+FLST_BASE_NODE_SIZE]
def __str__(self):
return f'FSP_SPACE_ID:{self.FSP_SPACE_ID} PAGE_COUNT:{self.FSP_SIZE} USED(FSP_FREE_FRAG):{self.FSP_FRAG_N_USED}'
#storage/innobase/include/fsp0fsp.h
class inode(object):
def __init__(self,bdata,FSP_EXTENT_SIZE=64): #按16384算, 1024*1024/16384 = 64 page
i = 0
lbdata = len(bdata)
FLST_BASE_NODE_SIZE = 16
FSEG_FRAG_ARR_N_SLOTS = int(FSP_EXTENT_SIZE / 2)
FSEG_FRAG_SLOT_SIZE = 4
FSEG_INODE_SIZE = 16 + 3*FLST_BASE_NODE_SIZE + FSEG_FRAG_ARR_N_SLOTS*FSEG_FRAG_SLOT_SIZE
segment_list = []
self.node_pre,self.node_next = flst(bdata[0:12])
i += 12
while True:
if lbdata <= i+FSEG_INODE_SIZE-1:
break
FSEG_ID = struct.unpack('>Q',bdata[i:i+8])[0]
if FSEG_ID == 0:
i += FSEG_INODE_SIZE
continue
i += 8
FSEG_NOT_FULL_N_USED = struct.unpack('>L',bdata[i:i+4])[0]
i += 4
FSEG_FREE = flst_base(bdata[i:i+FLST_BASE_NODE_SIZE])
i += FLST_BASE_NODE_SIZE
FSEG_NOT_FULL = flst_base(bdata[i:i+FLST_BASE_NODE_SIZE])
i += FLST_BASE_NODE_SIZE
FSEG_FULL = flst_base(bdata[i:i+FLST_BASE_NODE_SIZE])
i += FLST_BASE_NODE_SIZE
FSEG_MAGIC_N = bdata[i:i+4]
i += 4
FSEG_FRAG_ARR = [] #碎片页
for x in range(FSEG_FRAG_ARR_N_SLOTS):
FSEG_FRAG_ARR.append(struct.unpack('>L',bdata[i:i+FSEG_FRAG_SLOT_SIZE])[0])
i += FSEG_FRAG_SLOT_SIZE
segment_list.append({'FSEG_ID':FSEG_ID,'FSEG_NOT_FULL_N_USED':FSEG_NOT_FULL_N_USED,'FSEG_FREE':FSEG_FREE,'FSEG_NOT_FULL':FSEG_NOT_FULL,'FSEG_FULL':FSEG_FULL,'FSEG_MAGIC_N':FSEG_MAGIC_N,'FSEG_FRAG_ARR':FSEG_FRAG_ARR})
self.segment_list = segment_list
self.root_pages = [ x['FSEG_FRAG_ARR'][0] for x in segment_list ] #并非都是非叶子节点
self.sdi_page = self.root_pages[0]
self.index = []
for x in range(1,int(len(self.root_pages)/2)):
self.index.append({'no_leaf':self.root_pages[x*2],'leaf':self.root_pages[x*2+1]})
def __str__(self,):
return f'SEGMENT COUNTS:{len(self.segment_list)} INDEX_COUNT:{len(self.index)} INODE_PRE:{self.node_pre[0] if self.node_pre[0] != 4294967295 else None} INODE_NEXT:{self.node_next[0] if self.node_next[0] != 4294967295 else None}'
#storage/innobase/include/fsp0fsp.h
#XDES_SIZE = (XDES_BITMAP + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE)) = 24 + (128+7)/8 = 40
#
class xdes(object):
def __init__(self,bdata):
#38+112+bdata+8=page
extent_list = []
XDES_SIZE = 40
FLST_NODE_SIZE = 12
i = 0
lbdata = len(bdata)
while True:
if i+XDES_SIZE-1 >= lbdata:
break #不够一个xdes了
XDES_ID = struct.unpack('>Q',bdata[i:i+8])[0] #/** The identifier of the segment to which this extent belongs */
if XDES_ID == 0:
i += XDES_SIZE
continue
i += 8
XDES_FLST_NODE = flst(bdata[i:i+FLST_NODE_SIZE])
i += FLST_NODE_SIZE
XDES_STATE = struct.unpack('>L',bdata[i:i+4])[0] #xdes_state_t 0:未初始化, 1:FREE 2:FREE_FRAG 3:FULL_FRAG 4:属于segment 5:FSEG_FRAG
i += 4
XDES_BITMAP = bdata[i:i+16]
i += 16
extent_list.append({'XDES_ID':XDES_ID,'XDES_FLST_NODE':XDES_FLST_NODE,'XDES_STATE':XDES_STATE,'XDES_BITMAP':XDES_BITMAP})
self.extent_list = extent_list
def __str__(self):
return f'EXTENT COUNT: {len(self.extent_list)}'
def summary(self):
pass
class page_data(object):
def __init__(self,bdata):
pass
def __str__(self):
pass
def _get_fil_addr(bdata):
return struct.unpack('>LH',bdata)
def flst_base(bdata):
#FLST_BASE_NODE storage/innobase/include/fut0lst.ic #/* We define the field offsets of a base node for the list */
#FLST_LEN:0-4 FLST_FIRST:4-(4 + FIL_ADDR_SIZE) FLST_LAST:4+FIL_ADDR_SIZE:16
#4+6+6
#FIL_ADDR_SIZE = FIL_ADDR_PAGE(4) + FIL_ADDR_BYTE(2) #/** First in address is the page offset. */ Then comes 2-byte byte offset within page.*/
FLST_LEN = struct.unpack('>L',bdata[:4])[0]
FLST_FIRST = struct.unpack('<LH',bdata[4:10])
FLST_LAST = struct.unpack('<LH',bdata[10:16])
return (FLST_LEN,FLST_FIRST,FLST_LAST)
def flst(bdata):
#FLST_NODE storage/innobase/include/fut0lst.ic #/* We define the field offsets of a node for the list */
#FLST_PREV:6 FLST_NEXT:6
FLST_PREV = struct.unpack('<LH',bdata[0:6])
FLST_NEXT = struct.unpack('<LH',bdata[6:12])
return(FLST_PREV,FLST_NEXT)
def fseg_header(bdata):
return struct.unpack('>LLH',bdata[:10])
class innodb_ibd(object):
def __init__(self,filename,pagesize=16384):
self.filename = filename
self.pagesize = pagesize
def page_summary(self,del0=True):
"""
返回dict, 各page的数量
"""
data = {}
for x in innodb_page_name:
data[x] = 0
f = open(self.filename,'rb')
i = 0
while True:
bdata = f.read(self.pagesize)
if len(bdata) < self.pagesize:
break
filh = fil_header(bdata[:38])
data[filh.FIL_PAGE_TYPE] += 1
i += 1
#if filh.FIL_PAGE_TYPE == innodb_page_type.FIL_PAGE_SDI:
# print(i-1)
f.close()
data1 = {}
for x in data:
if data[x] == 0 and del0:
continue
data1[innodb_page_name[x]] = data[x]
return data1
def index(self,n=0):
"""
获取第N个index, 返回(非叶子节点列表, 叶子节点列表)
"""
pass
#storage/innobase/include/fil0fil.h
#/** File page types (values of FIL_PAGE_TYPE) @{ */
#/** B-tree node */
FIL_PAGE_INDEX = 17855;
#/** R-tree node */
FIL_PAGE_RTREE = 17854;
#/** Tablespace SDI Index page */
FIL_PAGE_SDI = 17853;
#/** This page type is unused. */
FIL_PAGE_TYPE_UNUSED = 1;
#/** Undo log page */
FIL_PAGE_UNDO_LOG = 2;
#/** Index node */
FIL_PAGE_INODE = 3;
#/** Insert buffer free list */
FIL_PAGE_IBUF_FREE_LIST = 4;
#/* File page types introduced in MySQL/InnoDB 5.1.7 */
#/** Freshly allocated page */
FIL_PAGE_TYPE_ALLOCATED = 0;
#/** Insert buffer bitmap */
FIL_PAGE_IBUF_BITMAP = 5;
#/** System page */
FIL_PAGE_TYPE_SYS = 6;
#/** Transaction system data */
FIL_PAGE_TYPE_TRX_SYS = 7;
#/** File space header */
FIL_PAGE_TYPE_FSP_HDR = 8;
#/** Extent descriptor page */
FIL_PAGE_TYPE_XDES = 9;
#/** Uncompressed BLOB page */
FIL_PAGE_TYPE_BLOB = 10;
#/** First compressed BLOB page */
FIL_PAGE_TYPE_ZBLOB = 11;
#/** Subsequent compressed BLOB page */
FIL_PAGE_TYPE_ZBLOB2 = 12;
#/** In old tablespaces, garbage in FIL_PAGE_TYPE is replaced with
#this value when flushing pages. */
FIL_PAGE_TYPE_UNKNOWN = 13;
#/** Compressed page */
FIL_PAGE_COMPRESSED = 14;
#/** Encrypted page */
FIL_PAGE_ENCRYPTED = 15;
#/** Compressed and Encrypted page */
FIL_PAGE_COMPRESSED_AND_ENCRYPTED = 16;
#/** Encrypted R-tree page */
FIL_PAGE_ENCRYPTED_RTREE = 17;
#/** Uncompressed SDI BLOB page */
FIL_PAGE_SDI_BLOB = 18;
#/** Compressed SDI BLOB page */
FIL_PAGE_SDI_ZBLOB = 19;
#/** Legacy doublewrite buffer page. */
FIL_PAGE_TYPE_LEGACY_DBLWR = 20;
#/** Rollback Segment Array page */
FIL_PAGE_TYPE_RSEG_ARRAY = 21;
#/** Index pages of uncompressed LOB */
FIL_PAGE_TYPE_LOB_INDEX = 22;
#/** Data pages of uncompressed LOB */
FIL_PAGE_TYPE_LOB_DATA = 23;
#/** The first page of an uncompressed LOB */
FIL_PAGE_TYPE_LOB_FIRST = 24;
#/** The first page of a compressed LOB */
FIL_PAGE_TYPE_ZLOB_FIRST = 25;
#/** Data pages of compressed LOB */
FIL_PAGE_TYPE_ZLOB_DATA = 26;
#/** Index pages of compressed LOB. This page contains an array of
#z_index_entry_t objects.*/
FIL_PAGE_TYPE_ZLOB_INDEX = 27;
#/** Fragment pages of compressed LOB. */
FIL_PAGE_TYPE_ZLOB_FRAG = 28;
#/** Index pages of fragment pages (compressed LOB). */
FIL_PAGE_TYPE_ZLOB_FRAG_ENTRY = 29;
#/** Note the highest valid non-index page_type_t. */
FIL_PAGE_TYPE_LAST = FIL_PAGE_TYPE_ZLOB_FRAG_ENTRY;
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。