首页
学习
活动
专区
圈层
工具
发布
首页
学习
活动
专区
圈层
工具
MCP广场
社区首页 >问答首页 >Bert模型显示InvalidArgumentError条件x <= y不包含元素

Bert模型显示InvalidArgumentError条件x <= y不包含元素
EN

Stack Overflow用户
提问于 2020-12-01 14:58:06
回答 1查看 164关注 0票数 2

我在训练一只伯特。

有人能解释一下以下错误消息的含义吗?

代码语言:javascript
运行
复制
Condition x == y did not hold element wise

这是参考colab notebook

我的代码是:

代码语言:javascript
运行
复制
!pip install bert-for-tf2
import math 
import os
from tqdm import tqdm
import tensorflow as tf
from tensorflow import keras
import bert
from bert import BertModelLayer
from bert.loader import StockBertConfig, map_stock_config_to_params, load_stock_weights
from bert.tokenization.bert_tokenization import FullTokenizer

%%time

bert_ckpt_dir="gs://bert_models/2018_10_18/uncased_L-12_H-768_A-12/"
bert_ckpt_file = bert_ckpt_dir + "bert_model.ckpt"
bert_config_file = bert_ckpt_dir + "bert_config.json"

bert_model_dir="2018_10_18"
bert_model_name="uncased_L-12_H-768_A-12"

!mkdir -p .model .model/$bert_model_name

    for fname in ["bert_config.json", "vocab.txt", "bert_model.ckpt.meta", "bert_model.ckpt.index", "bert_model.ckpt.data-00000-of-00001"]:
      cmd = f"gsutil cp gs://bert_models/{bert_model_dir}/{bert_model_name}/{fname} .model/{bert_model_name}"
      !$cmd
    
    !ls -la .model .model/$bert_model_name
    
    bert_ckpt_dir = os.path.join(".model/",bert_model_name)
    bert_ckpt_file = os.path.join(bert_ckpt_dir, "bert_model.ckpt")
    bert_config_file = os.path.join(bert_ckpt_dir, "bert_config.json")

class FakeNewsData:
    """
    將本文預處理為Bert功能。

    max_seq_len:指定的最大序列長度
    標記器:Bert標記器
    """
    DATA_COLUMN = "text"
    LABEL_COLUMN = "label"

    def __init__(self, tokenizer, train, validation, test, max_seq_len = 150):
        self.tokenizer = tokenizer
        self.max_seq_len = max_seq_len
        ((self.train_x, self.train_y),
         (self.val_x, self.val_y),
         (self.test_x, self.test_y)) = map(self._prepare, [train, validation, test])

        ((self.train_x, self.train_x_token_types),
         (self.val_x, self.val_x_token_types),
         (self.test_x, self.test_x_token_types)) = map(self._pad, 
                                                       [self.train_x, self.val_x, self.test_x])

    def _prepare(self, df):
        """
        為每個序列添加開始和結束標記,並將本文轉換為標記ID。
        """
        x, y = [], []
        with tqdm(total=df.shape[0], unit_scale=True) as pbar:
            for ndx, row in df.iterrows():
                text, label = row[FakeNewsData.DATA_COLUMN], row[FakeNewsData.LABEL_COLUMN]
                tokens = self.tokenizer.tokenize(text)
                tokens = ["[CLS]"] + tokens + ["[SEP]"]
                token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
                self.max_seq_len = max(self.max_seq_len, len(token_ids))
                x.append(token_ids)
                y.append(int(label))
                pbar.update()
        return np.array(x), np.array(y)

    def _pad(self, ids):
        """
        用[0]  將每個序列填充到指定的最大序列長度
        """
        x, t = [], []
        token_type_ids = [0] * self.max_seq_len
        for input_ids in ids:
            input_ids = input_ids[:min(len(input_ids), self.max_seq_len - 2)]
            input_ids = input_ids + [0] * (self.max_seq_len - len(input_ids))
            x.append(np.array(input_ids))
            t.append(token_type_ids)
        return np.array(x), np.array(t)

%%time
tokenizer = FullTokenizer(vocab_file=os.path.join(bert_ckpt_dir, "vocab.txt"))
data = FakeNewsData(tokenizer, 
                    train = train_df,
                    validation = val_df,
                    test = test_df,
                    max_seq_len= 150)

def create_model(max_seq_len,lr = 1e-5):
  """
  創建一個Bert分類模型。
  模型architecutre是原始輸入->Bert輸入->退出層,以防止過度擬合->密度層,輸出預測的概率。

  max_seq_len:最大序列長度
  lr:優化器的學習率
  """


  # create the bert layer
  with tf.io.gfile.GFile(bert_config_file, "r") as reader:
      bc = StockBertConfig.from_json_string(reader.read())
      bert_params = map_stock_config_to_params(bc)
      bert = BertModelLayer.from_params(bert_params, name="bert")
        
  input_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32', name="input_ids")
  output = bert(input_ids)

  print("bert shape", output.shape)
  cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output)
  # Dropout layer
  cls_out = keras.layers.Dropout(0.8)(cls_out)
  # Dense layer with probibility output
  logits = keras.layers.Dense(units=2, activation="softmax")(cls_out)

  model = keras.Model(inputs=input_ids, outputs=logits)
  model.build(input_shape=(None, max_seq_len))

  # load the pre-trained model weights
  load_stock_weights(bert, bert_ckpt_file)

  model.compile(optimizer=keras.optimizers.Adam(learning_rate = lr),
                loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")])

  model.summary()
        
  return model

model = create_model(max_seq_len = data.max_seq_len, lr = 1e-5)

以下是输出

代码语言:javascript
运行
复制
InvalidArgumentError                      Traceback (most recent call last)
<ipython-input-25-578d63d49a0e> in <module>()
     39   return model
     40 
---> 41 model = create_model(max_seq_len = data.max_seq_len, lr = 1e-5)

3 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
    256       except Exception as e:  # pylint:disable=broad-except
    257         if hasattr(e, 'ag_error_metadata'):
--> 258           raise e.ag_error_metadata.to_exception(e)
    259         else:
    260           raise

InvalidArgumentError: in user code:

    /usr/local/lib/python3.6/dist-packages/bert/model.py:79 call  *
        embedding_output = self.embeddings_layer(inputs, mask=mask, training=training)
    /usr/local/lib/python3.6/dist-packages/bert/embeddings.py:223 call  *
        pos_embeddings = self.position_embeddings_layer(seq_len)
    /usr/local/lib/python3.6/dist-packages/bert/embeddings.py:48 call  *
        assert_op = tf.compat.v2.debugging.assert_less_equal(seq_len, self.params.max_position_embeddings)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper  **
        return target(*args, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/check_ops.py:938 assert_less_equal_v2
        summarize=summarize, message=message, name=name)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/check_ops.py:947 assert_less_equal
        np.less_equal, x, y, data, summarize, message, name)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/check_ops.py:372 _binary_assert
        _assert_static(condition_static, data)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/check_ops.py:87 _assert_static
        message='\n'.join(data_static))

    InvalidArgumentError: Condition x <= y did not hold element-wise:
    x (bert/embeddings/Const_2:0) = 
    9755
    y (bert/embeddings/position_embeddings/assert_less_equal_2/y:0) = 
    512

长文档预处理

代码语言:javascript
运行
复制
def get_split(text):
    """
    Split each news text to subtexts no longer than 150 words.
    """
    l_total = []
    l_parcial = []
    if len(text.split())//120 >0:
        n = len(text.split())//120
    else: 
        n = 1
    for w in range(n):
        if w == 0:
            l_parcial = text.split()[:150]
            l_total.append(" ".join(l_parcial))
        else:
            l_parcial = text.split()[w*120:w*120 + 150]
            l_total.append(" ".join(l_parcial))
    return l_total

train['text_split'] = train['text'].apply(get_split)
val['text_split'] = val['text'].apply(get_split)
test['text_split'] = test['text'].apply(get_split)


def data_augumentation(df, df_name):
    """
    Create a new dataframe from the original one because now one text may contain multiple subtexts of length 200. 
    Text correspond to subtexts from original text, while index correspond to its index of original set.
    """
    text_l = []
    label_l = []
    index_l = []
    for idx,row in df.iterrows():
      for l in row['text_split']:
        text_l.append(l)
        label_l.append(row['label'])
        index_l.append(idx)
    new_df = pd.DataFrame({'text':text_l, 'label':label_l, 'index':index_l})
    print("The " + df_name +" set now has " + str(len(new_df)) + ' subtexts extracted from ' + str(len(df)) + ' texts.')
    return new_df

train_df = data_augumentation(train, df_name = 'training')
val_df = data_augumentation(val, df_name  = 'validation')
test_df = data_augumentation(test, df_name = 'testing')

我将短序列提供给我的模型。它起作用了。长序列是否来自长文档预处理?我找不到问题所在。请帮帮我。谢谢。

EN

回答 1

Stack Overflow用户

发布于 2020-12-01 16:01:02

BERT的最大序列长度限制为512。试着给你的模型提供短序列。如果它工作了-检查你的数据:在某个地方有一个很长的序列。

票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/65085991

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档