文档中心>实践教程>云数据库 PostgreSQL>使用 tencentdb_ai 插件构建 AI 应用

使用 tencentdb_ai 插件构建 AI 应用

最近更新时间:2025-02-19 18:48:12

我的收藏
本文需要添加tencentdb_ai插件,该插件的使用请参考 使用 tencentdb_ai 插件调用大模型

集成 DeepSeek-V3 实现聊天

1. 创建 tencentdb_ai 插件。
damoxing=> CREATE EXTENSION tencentdb_ai CASCADE;
NOTICE: installing required extension "pgcrypto"
CREATE EXTENSION
damoxing=> SELECT * FROM pg_extension;
oid | extname | extowner | extnamespace | extrelocatable | extversion | extconfig | extcondition
-------+--------------+----------+--------------+----------------+------------+-----------+--------------
14275 | plpgsql | 10 | 11 | f | 1.0 | |
16631 | pgcrypto | 16615 | 2200 | t | 1.3 | |
16668 | tencentdb_ai | 16615 | 2200 | t | 1.0 | {16670} | {""}
(3 rows)
2. 添加 DeepSeek-V3 模型。
damoxing=> SELECT tencentdb_ai.add_model('deepseek-v3', '2024-05-22', 'ap-guangzhou', '$.Response.Choices[*].Message.Content');
add_model
-----------
(1 row)

damoxing=> SELECT tencentdb_ai.update_model_attr('deepseek-v3', 'SecretId', 'AKID**************');
update_model_attr
-------------------
(1 row)

damoxing=> SELECT tencentdb_ai.update_model_attr('deepseek-v3', 'SecretKey', '*******************');
update_model_attr
-------------------
(1 row)

3. 准备云数据库 PostgreSQL 对象。
damoxing=> CREATE TABLE chat_logs (
id SERIAL PRIMARY KEY,
user_input TEXT,
system_response TEXT,
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE
damoxing=>
4. 调用大模型聊天。
创建存储过程调用 deepseek-v3 的 API 接口聊天:
damoxing=> CREATE OR REPLACE FUNCTION chat_with_deepseek(user_input TEXT)
RETURNS TEXT AS $$
DECLARE
system_response TEXT;
BEGIN
SELECT tencentdb_ai.chat_completions('deepseek-v3', user_input) INTO system_response;
INSERT INTO chat_logs (user_input, system_response)
VALUES (user_input, system_response);

RETURN system_response;
END;
$$ LANGUAGE plpgsql;
CREATE FUNCTION
damoxing=>
调用存储过程聊天:
damoxing=> SELECT chat_with_deepseek('hi,deepseek');
chat_with_deepseek
----------------------------------------------------------------------------------------------------------------------------------------
---------------------------------------------------------------------------------
"Hello! It seems like you're trying to reach DeepSeek, but I’m not sure what you’re looking for. Could you clarify? Are you referring t
o a specific tool, service, or something else? Let me know how I can assist!"
(1 row)

damoxing=>
5. 查询聊天结果。
damoxing=> SELECT * FROM chat_logs ORDER BY timestamp DESC;
id | user_input | system_response
| timestamp
----+--------------+--------------------------------------------------------------------------------------------------------------------
-----------------------------------------------------------------------------------------------------+----------------------------
3 | hi,deepseek | "Hello! It seems like you're trying to reach DeepSeek, but I’m not sure what you’re looking for. Could you clarify?
Are you referring to a specific tool, service, or something else? Let me know how I can assist!" | 2025-02-18 16:02:39.145322
(1 row)

damoxing=>

集成 lke-text-embedding-v1 实现文本检索

1. 创建 tencentdb_ai 插件。
damoxing_lke_embdding=> CREATE EXTENSION tencentdb_ai CASCADE;
NOTICE: installing required extension "pgcrypto"
CREATE EXTENSION
damoxing_lke_embdding=> SELECT * FROM pg_extension;
oid | extname | extowner | extnamespace | extrelocatable | extversion | extconfig | extcondition
-------+--------------+----------+--------------+----------------+------------+-----------+--------------
14275 | plpgsql | 10 | 11 | f | 1.0 | |
16631 | pgcrypto | 16615 | 2200 | t | 1.3 | |
16668 | tencentdb_ai | 16615 | 2200 | t | 1.0 | {16670} | {""}
(3 rows)
2. 添加 lke-text-embedding-v1 模型。
damoxing_lke_embdding=> SELECT tencentdb_ai.add_model('lke-text-embedding-v1', '2024-05-22', 'ap-guangzhou', NULL);
-[ RECORD 1 ]
add_model |

damoxing_lke_embdding=> SELECT tencentdb_ai.update_model_attr('lke-text-embedding-v1', 'SecretId', 'AKID**************');
-[ RECORD 1 ]-----+-
update_model_attr |

damoxing_lke_embdding=> SELECT tencentdb_ai.update_model_attr('lke-text-embedding-v1', 'SecretKey', '*******************');
-[ RECORD 1 ]-----+-
update_model_attr |

damoxing_lke_embdding=>
3. 准备云数据库 PostgreSQL 对象。
damoxing_lke_embdding=> CREATE TABLE documents (
id SERIAL PRIMARY KEY,
content TEXT,
embedding FLOAT8[]
);
CREATE TABLE
damoxing_lke_embdding=>
4. 创建生成嵌入向量的存储过程。
damoxing_lke_embdding=> CREATE OR REPLACE FUNCTION generate_embedding(doc_contents TEXT[])
RETURNS FLOAT8[][] AS $$
DECLARE
embeddings FLOAT8[][] := '{}';
embedding FLOAT8[];
doc_content TEXT;
BEGIN
SELECT * from tencentdb_ai.get_embedding('lke-text-embedding-v1', doc_contents) INTO embedding;
RETURN embedding;
END;
$$ LANGUAGE plpgsql;
CREATE FUNCTION
damoxing_lke_embdding=>
5. 创建插入文档及其嵌入向量的存储过程。
damoxing_lke_embdding=> CREATE OR REPLACE FUNCTION insert_documents(doc_contents TEXT[])
RETURNS VOID AS $$
DECLARE
embedding FLOAT8[];
doc_content TEXT;
BEGIN
doc_content := doc_contents[0];
embedding := generate_embedding(doc_contents);
INSERT INTO documents (content, embedding)
VALUES (doc_contents, embedding);
END;
$$ LANGUAGE plpgsql;
CREATE FUNCTION
damoxing_lke_embdding=>
6. 创建余弦相似度计算的存储过程。
damoxing_lke_embdding=> CREATE OR REPLACE FUNCTION cosine_similarity(vec1 FLOAT8[], vec2 FLOAT8[])
RETURNS FLOAT8 AS $$
DECLARE
dot_product FLOAT8 := 0;
norm1 FLOAT8 := 0;
norm2 FLOAT8 := 0;
similarity FLOAT8;
BEGIN
FOR i IN 1..array_length(vec1, 1) LOOP
dot_product := dot_product + vec1[i] * vec2[i];
norm1 := norm1 + vec1[i] * vec1[i];
norm2 := norm2 + vec2[i] * vec2[i];
END LOOP;

similarity := dot_product / (sqrt(norm1) * sqrt(norm2));
RETURN similarity;
END;
$$ LANGUAGE plpgsql;
damoxing_lke_embdding=>
7. 创建文本检索的存储过程。
damoxing_lke_embdding=> CREATE OR REPLACE FUNCTION search_similar_documents(query TEXT, limit_count INT)
RETURNS TABLE(id INT, content TEXT, similarity FLOAT8) AS $$
DECLARE
query_embedding FLOAT8[];
BEGIN
query_embedding := generate_embedding(ARRAY[query]);

RETURN QUERY
SELECT d.id, d.content, cosine_similarity(d.embedding, query_embedding) AS similarity
FROM documents d
ORDER BY similarity DESC
LIMIT limit_count;
END;
$$ LANGUAGE plpgsql;
CREATE FUNCTION
damoxing_lke_embdding=>
8. 插入文档及其嵌入向量。
damoxing_lke_embdding=> SELECT insert_documents(ARRAY['This is a sample document.']);
-[ RECORD 1 ]----+-
insert_documents |

damoxing_lke_embdding=> SELECT insert_documents(ARRAY['Another example of a document.']);
-[ RECORD 1 ]----+-
insert_documents |

damoxing_lke_embdding=> SELECT insert_documents(ARRAY['This document is about PostgreSQL and text embeddings.']);
-[ RECORD 1 ]----+-
insert_documents |

damoxing_lke_embdding=> SELECT insert_documents(ARRAY['Deep learning models can generate text embeddings.']);
-[ RECORD 1 ]----+-
insert_documents |

damoxing_lke_embdding=>
9. 进行文本检索。
damoxing_lke_embdding=> SELECT * FROM search_similar_documents('PostgreSQL text embeddings', 5);
-[ RECORD 1 ]----------------------------------------------------------
id | 3
content | {"This document is about PostgreSQL and text embeddings."}
similarity | 0.8649945496222797
-[ RECORD 2 ]----------------------------------------------------------
id | 4
content | {"Deep learning models can generate text embeddings."}
similarity | 0.6824638514797066
-[ RECORD 3 ]----------------------------------------------------------
id | 1
content | {"This is a sample document."}
similarity | 0.66412244051794
-[ RECORD 4 ]----------------------------------------------------------
id | 2
content | {"Another example of a document."}
similarity | 0.6142928906219256

damoxing_lke_embdding=>

集成 lke-reranker-base 实现 RAG

1. 创建 tencentdb_ai 插件。
damoxing_rerank=> CREATE EXTENSION tencentdb_ai CASCADE;
NOTICE: installing required extension "pgcrypto"
CREATE EXTENSION
damoxing=> SELECT * FROM pg_extension;
oid | extname | extowner | extnamespace | extrelocatable | extversion | extconfig | extcondition
-------+--------------+----------+--------------+----------------+------------+-----------+--------------
14275 | plpgsql | 10 | 11 | f | 1.0 | |
16631 | pgcrypto | 16615 | 2200 | t | 1.3 | |
16668 | tencentdb_ai | 16615 | 2200 | t | 1.0 | {16670} | {""}
(3 rows)
2. 添加 lke-reranker-base 和 lke-text-embedding-v1 模型。
damoxing_rerank=> SELECT tencentdb_ai.add_model('lke-reranker-base', '2024-05-22', 'ap-guangzhou', '$.Response.ScoreList');
add_model
-----------
(1 row)

damoxing_rerank=> SELECT tencentdb_ai.update_model_attr('lke-reranker-base', 'SecretId', 'AKID**************');
update_model_attr
-------------------
(1 row)

damoxing_rerank=> SELECT tencentdb_ai.update_model_attr('lke-reranker-base', 'SecretKey', '*******************');
update_model_attr
-------------------
(1 row)

damoxing_rerank=> SELECT tencentdb_ai.add_model('lke-text-embedding-v1', '2024-05-22', 'ap-guangzhou', NULL);
add_model
-----------
(1 row)

damoxing_rerank=> SELECT tencentdb_ai.update_model_attr('lke-text-embedding-v1', 'SecretId', 'AKID**************');
update_model_attr
-------------------
(1 row)

damoxing_rerank=> SELECT tencentdb_ai.update_model_attr('lke-text-embedding-v1', 'SecretKey', '*******************');
update_model_attr
-------------------
(1 row)
3. 准备云数据库 PostgreSQL 对象。
damoxing_rerank=> CREATE TABLE documents (
id SERIAL PRIMARY KEY,
content TEXT,
embedding FLOAT8[]
);
CREATE TABLE
damoxing_rerank=>
4. 创建生成嵌入向量的存储过程。
damoxing_rerank=> CREATE OR REPLACE FUNCTION generate_embedding(doc_contents TEXT[])
RETURNS FLOAT8[][] AS $$
DECLARE
embeddings FLOAT8[][] := '{}';
embedding FLOAT8[];
doc_content TEXT;
BEGIN
SELECT * from tencentdb_ai.get_embedding('lke-text-embedding-v1', doc_contents) INTO embedding;
RETURN embedding;
END;
$$ LANGUAGE plpgsql;
CREATE FUNCTION
5. 创建插入文档及其嵌入向量的存储过程。
damoxing_rerank=> CREATE OR REPLACE FUNCTION insert_documents(doc_contents TEXT[])
RETURNS VOID AS $$
DECLARE
embedding FLOAT8[];
doc_content TEXT;
BEGIN
doc_content := doc_contents[0];
embedding := generate_embedding(doc_contents);
INSERT INTO documents (content, embedding)
VALUES (doc_contents, embedding);
END;
$$ LANGUAGE plpgsql;
CREATE FUNCTION
6. 创建召回和大模型重排的存储过程。
damoxing_rerank=> CREATE OR REPLACE FUNCTION retrieve_and_rerank(query TEXT)
RETURNS TABLE (id INT, content TEXT, score FLOAT8) AS $$
DECLARE
keyword_results RECORD;
vector_results RECORD;
query_vector VECTOR;
rerank_content TEXT[];
rerank_ids INT[];
rerank_scores my_record;
i INT;
BEGIN
-- 生成查询向量
SELECT generate_embedding(ARRAY[query])::vector INTO query_vector;
-- 初始化重排序内容数组
rerank_content := ARRAY[]::TEXT[];
rerank_ids := ARRAY[]::INT[];
-- 关键词召回
FOR keyword_results IN
SELECT d.id, d.content, d.embedding
FROM documents d
WHERE to_tsvector('english', d.content) @@ plainto_tsquery('english', query)
LOOP
-- 向量召回
FOR vector_results IN
SELECT d.id, d.content, d.embedding
FROM documents d
WHERE d.id = keyword_results.id
ORDER BY d.embedding <-> query_vector
LIMIT 10
LOOP
rerank_content := array_append(rerank_content, vector_results.content);
rerank_ids := array_append(rerank_ids, vector_results.id);
END LOOP;
END LOOP;
-- 大模型重排序
SELECT * FROM tencentdb_ai.run_rerank('lke-reranker-base', query, rerank_content) INTO rerank_scores;
RETURN QUERY
select text_field, numeric_field from rerank_scores;
END;
$$ LANGUAGE plpgsql;
7. 调用存储过程。
damoxing_rerank=>SELECT insert_documents(ARRAY['This is the first document.']);
-[ RECORD 1 ]----+-
insert_documents |
damoxing_rerank=> SELECT * FROM retrieve_and_rerank('document');
("{""This is the first document.""}",-1.5107422)