add layer

Forver6 · Jan 10, 2022 · b957ec9 · b957ec9
1 parent 2c6c323
commit b957ec9
Show file tree

Hide file tree

Showing 6 changed files with 470 additions and 314 deletions.
diff --git a/examples/run_bm25.py b/examples/run_bm25.py
@@ -1,5 +1,5 @@
 #! -*- coding: utf-8 -*-
-""" Run TFIdf with sklearn
+""" Run BM25 with sklearn
 """
 # Author: DengBoCong <[email protected]>
 #

diff --git a/examples/tensorflow/run_basic_bert.py b/examples/tensorflow/run_basic_bert.py
@@ -0,0 +1,16 @@
+#! -*- coding: utf-8 -*-
+""" TensorFlow Run Basic Bert
+"""
+# Author: DengBoCong <[email protected]>
+#
+# License: MIT License
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+
+
+
diff --git a/sim/bert_base/__init__.py b/sim/bert_base/__init__.py
diff --git a/sim/bert_base/tf/modeling_bert.py b/sim/bert_base/tf/modeling_bert.py
@@ -15,7 +15,6 @@
 import tensorflow as tf
 from sim.bert_base import BertConfig
 from typing import Any
-from typing import NoReturn
 
 
 def embedding_lookup(input_ids: tf.Tensor,
@@ -132,42 +131,7 @@ def gen_relative_positions_embeddings(position: int, d_model: int) -> tuple:
     return tf.cast(pos_encoding, dtype=d_type)
 
 
-def bert_embedding(config: BertConfig, is_training: bool, manual_seed: int = 1) -> tf.keras.Model:
-    """Bert Embedding
-    :param config: BertConfig实例
-    :param is_training: 是否处于训练模式
-    :param manual_seed: 随机种子
-    """
-    input_ids = tf.keras.Input(shape=(None,))
-    token_type_ids = tf.keras.Input(shape=(None,))
-    segment_token_type_ids = token_type_ids % 10
-    diff_token_type_ids = token_type_ids // 10
-
-    batch_size, seq_len = tf.shape(input_ids)[0], tf.shape(input_ids)[1]
 
-    word_embeddings = tf.keras.layers.Embedding(input_dim=config.vocab_size, output_dim=config.hidden_size)(input_ids)
-
-    if config.use_relative_position:
-        pos_encoding = gen_relative_positions_embeddings(position=config.vocab_size, d_model=config.hidden_size)
-        position_embeddings = pos_encoding[:, :seq_len, :]
-    else:
-        position_ids = tf.expand_dims(input=tf.range(start=0, limit=seq_len, delta=1), axis=0)
-        position_ids = tf.repeat(input=position_ids, repeats=[batch_size], axis=0)
-        position_embeddings = tf.keras.layers.Embedding(
-            input_dim=config.max_position_embeddings, output_dim=config.hidden_size)(position_ids)
-    segment_token_type_embeddings = tf.keras.layers.Embedding(
-        input_dim=config.type_vocab_size, output_dim=config.hidden_size)(segment_token_type_ids)
-    diff_token_type_embeddings = tf.keras.layers.Embedding(
-        input_dim=5, output_dim=config.hidden_size)(diff_token_type_ids)
-    # token_type_embeddings = segment_token_type_embeddings + diff_token_type_embeddings
-    token_type_embeddings = segment_token_type_embeddings
-
-    embeddings = word_embeddings + position_embeddings + token_type_embeddings
-    layer_norm_output = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps)(embeddings)
-    dropout_output = tf.keras.layers.Dropout(rate=config.hidden_dropout_prob,
-                                             seed=manual_seed)(layer_norm_output, is_training)
-
-    return tf.keras.Model(inputs=[input_ids, token_type_ids], outputs=dropout_output)
 
 
 def split_heads(input_tensor: tf.Tensor, head_num: int, head_size: int):