解决方案是参照其它LLM给它补上chat()
方法:
def chat(model, tok, ques, history=[], **kw):
iids = tok.apply_chat_template(
history + [{'role': 'user', 'content': ques}],
add_generation_prompt=1,
)
oids = model.generate(
inputs=torch.tensor([iids]).to(model.device),
**(model.generation_config.to_dict() | kw),
)
oids = oids[0][len(iids):].tolist()
if oids[-1] == tok.eos_token_id:
oids = oids[:-1]
ans = tok.decode(oids)
return ans
Qwen2ForCausalLM.chat = chat