psql交互式词法解析流程分析
交互式词法解析的经典代码框架,需要自己写个交互式小工具可以用psql当模板,快速上手lex
* <xb> bit string literal
* <xc> extended C-style comments
* <xd> delimited identifiers (double-quoted identifiers)
* <xh> hexadecimal numeric string
* <xq> standard quoted strings
* <xqs> quote stop (detect continued strings)
* <xe> extended quoted strings (support backslash escape sequences)
* <xdolq> $foo$ quoted strings
* <xui> quoted identifier with Unicode escapes
* <xus> quoted string with Unicode escapes
全篇分析三类SQL的解析过程,见大标题和里面的用例。
select 1;
psql/mainloop.c
MainLoop(FILE *source)
...
/* 交互式拿到单行SQL */
/* line: "select 1;" */
line = gets_interactive(get_prompt(prompt_status, cond_stack), query_buf);
...
/* 检查是否是特殊预留关键字 */
...
scan_state = psql_scan_create(&psqlscan_callbacks);
...
psql_scan_setup(scan_state, line, strlen(line), pset.encoding, standard_strings());
/* p scan_state */
/* $3 = {
scanner = 0x138afb0, output_buf = 0x0, buffer_stack = 0x0, scanbufhandle = 0x137c770,
scanbuf = 0x137c530 "select 1;", scanline = 0x137c4e0 "select 1;", encoding = 6,
safe_encoding = true, std_strings = true, curline = 0x137c530 "select 1;",
refline = 0x137c4e0 "select 1;", start_state = 0, state_before_str_stop = 0,
paren_depth = 0, xcdepth = 0, dolqstart = 0x0, identifier_count = 0,
identifiers = "\000\000\000", begin_depth = 0,
callbacks = 0x471db0 <psqlscan_callbacks>, cb_passthrough = 0x138b050}
*/
while (successResult == EXIT_SUCCESS)
...
scan_result = psql_scan(scan_state, query_buf, &prompt_tmp)
lexresult = yylex(NULL, state->scanner)
/* psqlscan.l */
{identifier} { ... }
psqlscan_emit (state=0x138af20, txt=0x137c530 "select", len=6)
{whitespace} { ... }
psqlscan_emit (state=0x138af20, txt=0x137c536 " ", len=1)
{integer} { ... }
psqlscan_emit (state=0x138af20, txt=0x137c537 "1", len=1)
";" { ... }
psqlscan_emit (state=0x138af20, txt=0x137c538 ";", len=1)
if (cur_state->paren_depth == 0 && cur_state->begin_depth == 0)
cur_state->start_state = YY_START;
cur_state->identifier_count = 0;
return LEXRES_SEMI;
switch (lexresult)
case LEXRES_SEMI: /* semicolon */
result = PSCAN_SEMICOLON;
*prompt = PROMPT_READY;
break;
...
if (scan_result == PSCAN_SEMICOLON || (scan_result == PSCAN_EOL && pset.singleline))
...
success = SendQuery(query_buf->data)
...
else if (scan_result == PSCAN_BACKSLASH)
...
if (scan_result == PSCAN_INCOMPLETE || scan_result == PSCAN_EOL)
break
...
psql_scan_finish(scan_state)
总结多行SQL解析流程:
测试SQL:
select datname,usename,query,xact_start,now()-xact_start xact_duration,query_start,now()-query_start query_duration,state
from pg_stat_activity
where state<>$$idle$$
and (backend_xid is not null or backend_xmin is not null)
and now()-xact_start > interval $$30 min$$
order by xact_start;
第一行解析
MainLoop(FILE *source)
...
while (successResult == EXIT_SUCCESS)
/* 交互式拿到单行SQL */
// select datname,usename,query,xact_start,now()-xact_start xact_duration,query_start,now()-query_start query_duration,state
line = gets_interactive(get_prompt(prompt_status, cond_stack), query_buf);
while (successResult == EXIT_SUCCESS)
...
scan_result = psql_scan(scan_state, query_buf, &prompt_tmp)
lexresult = yylex(NULL, state->scanner)
/* psqlscan.l */
{identifier}
psqlscan_emit (state=0x138af20, txt=0x137ded0 "select", len=6)
{whitespace}
psqlscan_emit (state=0x138af20, txt=0x137ded6 " ", len=1)
{identifier}
psqlscan_emit (state=0x138af20, txt=0x137ded7 "datname", len=7)
{self}
psqlscan_emit (state=0x138af20, txt=0x137dede ",", len=1)
{identifier}
psqlscan_emit (state=0x138af20, txt=0x137dedf "usename", len=7)
...
psqlscan_emit (state=0x138af20, txt=0x137dee6 ",", len=1)
psqlscan_emit (state=0x138af20, txt=0x137dee7 "query", len=5)
psqlscan_emit (state=0x138af20, txt=0x137dee6 ",", len=1)
psqlscan_emit (state=0x138af20, txt=0x137deed "xact_start", len=10)
psqlscan_emit (state=0x138af20, txt=0x137def7 ",", len=1)
...
{identifier}
psqlscan_emit (state=0x138af20, txt=0x137def8 "now", len=3)
"("
cur_state->paren_depth++;
psqlscan_emit (state=0x138af20, txt=0x137defb "(", len=1)
")"
psqlscan_emit (state=0x138af20, txt=0x137defc ")", len=1)
{self}
psqlscan_emit (state=0x138af20, txt=0x137defd "-", len=1)
{identifier}
psqlscan_emit (state=0x138af20, txt=0x137defe "xact_start", len=10)
...
psqlscan_emit (state=0x138af20, txt=0x137df44 "state", len=5)
第二行解析
MainLoop(FILE *source)
...
while (successResult == EXIT_SUCCESS)
/* 交互式拿到单行SQL */
// from pg_stat_activity
line = gets_interactive(get_prompt(prompt_status, cond_stack), query_buf);
while (successResult == EXIT_SUCCESS)
...
scan_result = psql_scan(scan_state, query_buf, &prompt_tmp)
lexresult = yylex(NULL, state->scanner)
/* psqlscan.l */
...
...
问题:正常SQL遇到分号就结束了,但是存储过程中经常有分号怎么处理?
答案:$$内部启动xdolq模式,整段不在解析,直接发给服务端
测试SQL:
CREATE OR REPLACE FUNCTION somefunc() RETURNS integer AS $$
<< outerblock >>
DECLARE
quantity integer := 30;
BEGIN
RAISE NOTICE 'Quantity here is %', quantity; -- Prints 30
quantity := 50;
--
-- Create a subblock
--
DECLARE
quantity integer := 80;
BEGIN
RAISE NOTICE 'Quantity here is %', quantity; -- Prints 80
RAISE NOTICE 'Outer quantity here is %', outerblock.quantity; -- Prints 50
END;
RAISE NOTICE 'Quantity here is %', quantity; -- Prints 50
RETURN quantity;
END;
$$ LANGUAGE plpgsql;
$$前
CREATE OR REPLACE FUNCTION somefunc() RETURNS integer AS $$
=================================================
{identifier} CREATE
{whitespace}
{identifier} OR
{whitespace}
{identifier} REPLACE
{whitespace}
{identifier} FUNCTION
{whitespace}
{identifier} somefunc
"("
")"
{whitespace}
{identifier} RETURNS
{whitespace}
{identifier} integer
{whitespace}
{identifier} AS
{whitespace}
{dolqdelim} $$ BEGIN(xdolq)
$$中间部分
<< outerblock >>
DECLARE
quantity integer := 30;
BEGIN
RAISE NOTICE 'Quantity here is %', quantity; -- Prints 30
quantity := 50;
--
-- Create a subblock
--
DECLARE
quantity integer := 80;
BEGIN
RAISE NOTICE 'Quantity here is %', quantity; -- Prints 80
RAISE NOTICE 'Outer quantity here is %', outerblock.quantity; -- Prints 50
END;
RAISE NOTICE 'Quantity here is %', quantity; -- Prints 50
RETURN quantity;
END;
=================================================
<xdolq>{dolqinside} $$包裹的中间内容全部匹配
psqlscan_emit (state=0x138af20, txt=0x137ea5b "\n<< outerblock >>\nDECLARE\n.......)
$$后面
=================================================
$$ LANGUAGE plpgsql;
=================================================
<xdolq>{dolqdelim} $$
free(cur_state->dolqstart) 清理记录的$$
cur_state->dolqstart = NULL
BEGIN(INITIAL) 恢复正常识别模式
{whitespace}
{identifier} LANGUAGE
{whitespace}
{identifier} plpgsql
";"
if (cur_state->paren_depth == 0 && cur_state->begin_depth == 0)
cur_state->start_state = YY_START;
cur_state->identifier_count = 0;
return LEXRES_SEMI;
{identifier} DO
{whitespace}
{dolqdelim} $$
cur_state->dolqstart = pg_strdup(yytext)
BEGIN(xdolq)
<xdolq>{dolqinside} $$包裹的中间内容全部匹配
<xdolq>{dolqdelim} $$
";"
return LEXRES_SEMI;