_is_space(c):
R.append('[unused1]') # space类用未经训练的[unused1]表示
else:...R.append('[UNK]') # 剩余的字符是[UNK]
return R
tokenizer = OurTokenizer(token_dict)
neg = pd.read_csv...if label in [2, 0, 1]:
if isinstance(d, str):
data.append((d, label))
# 按照9:1的比例划分训练集和验证集...shape=(None,))
x = bert_model([x1_in, x2_in])
x = Lambda(lambda x: x[:, 0])(x)
x = Dropout(0.8)(x)
p...= Dense(3, activation='softmax')(x)
model = Model([x1_in, x2_in], p)
save = ModelCheckpoint(
os.path.join