编写自己的flow;
class MyExecutor(Executor):
@requests
def foo(self, docs: DocumentArray, **kwargs):
docs[0].text = 'hello, world!'
docs[1].text = 'goodbye, world!'
@requests(on='/crunch-numbers')
def bar(self, docs: DocumentArray, **kwargs):
for doc in docs:
doc.tensor = torch.tensor(np.random.random([10, 2]))
提供api接口,定义好输入输出,比较灵活;
一个项目可以由多个flow共同决定
可以将写好的flow放到hub上快速加载
image.png
from docarray import DocumentArray
from jina import Flow
docs = DocumentArray.from_files("pdf_data/*.pdf", recursive=True)
flow = Flow()
flow = (
Flow()
.add(
uses="jinahub://PDFSegmenter",
install_requirements=True,
name="segmenter"
)
.add(
uses="jinahub://SpacySentencizer",
uses_with={"traversal_paths": "@c"},
install_requirements=True,
name="sentencizer",
)
.add(
uses="jinahub://TransformerTorchEncoder",
uses_with={"traversal_paths": "@cc"},
install_requirements=True,
name="encoder"
)
.add(
uses="jinahub://SimpleIndexer",
uses_with={"traversal_right": "@cc"},
install_requirements=True,
name="indexer"
)
)
flow.plot()
with flow:
docs = flow.index(docs, show_progress=True)
# 构建搜索flow
search_flow = (
Flow()
.add(
uses="jinahub://TransformerTorchEncoder",
name="encoder"
)
.add(
uses="jinahub://SimpleIndexer",
uses_with={"traversal_right": "@cc"},
name="indexer"
)
)
search_term = "一种基于词向量的hownet表示方法"
from docarray import Document
query_doc = Document(text=search_term)
with search_flow:
results = search_flow.search(query_doc, show_progress=True, return_results=True)
for match in results[0].matches:
print(match.text)
print(match.scores["cosine"].value)
print("---")