Imports

load_transformer_tokenizer(
            'voidful/albert_chinese_tiny', 'BertTokenizer')

PreTrainedTokenizer(name_or_path='voidful/albert_chinese_tiny', vocab_size=21128, model_max_len=1000000000000000019884624838656, is_fast=False, padding_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'})

config = load_transformer_config(
    'bert-base-chinese')
config_dict = config.to_dict()
# load config with dict
config = load_transformer_config(
    config_dict, load_module_name='BertConfig')

# this is a pt only model
model = load_transformer_model(
    'voidful/albert_chinese_tiny')

# load by config (not load weights)
model = load_transformer_model(load_transformer_config(
    'bert-base-chinese'), 'TFBertModel')

404 Client Error: Not Found for url: https://huggingface.co/voidful/albert_chinese_tiny/resolve/main/tf_model.h5
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFAlbertModel: ['predictions.decoder.weight', 'predictions.dense.bias', 'predictions.bias', 'predictions.LayerNorm.bias', 'predictions.dense.weight', 'predictions.decoder.bias', 'predictions.LayerNorm.weight']
- This IS expected if you are initializing TFAlbertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFAlbertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFAlbertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFAlbertModel for predictions without further training.

le_train = get_or_make_label_encoder(
    params=params, problem='weibo_fake_ner', mode=m3tl.TRAIN, label_list=[['a', 'b'], ['c']]
)
# seq_tag will add [PAD]
assert len(le_train.encode_dict) == 4, le_train.encode_dict

le_predict = get_or_make_label_encoder(
    params=params, problem='weibo_fake_ner', mode=m3tl.PREDICT)
assert le_predict.encode_dict==le_train.encode_dict

# list train
le_train = get_or_make_label_encoder(
    params=params, problem='weibo_fake_cls', mode=m3tl.TRAIN, label_list=['a', 'b', 'c']
)
# seq_tag will add [PAD]
assert len(le_train.encode_dict) == 3

le_predict = get_or_make_label_encoder(
    params=params, problem='weibo_fake_cls', mode=m3tl.PREDICT)
assert le_predict.encode_dict==le_train.encode_dict

# text
le_train = get_or_make_label_encoder(
    params=params, problem='weibo_masklm', mode=m3tl.TRAIN)
assert isinstance(le_train, transformers.PreTrainedTokenizer)
le_predict = get_or_make_label_encoder(
    params=params, problem='weibo_masklm', mode=m3tl.PREDICT)
assert isinstance(le_predict, transformers.PreTrainedTokenizer)

test_dict = {
    'test1': np.random.uniform(size=(64, 32)),
    'test2': np.array([1, 2, 3], dtype='int32'),
    'test5': 5
}
desc_dict = infer_shape_and_type_from_dict(
    test_dict)
assert desc_dict == ({'test1': [None, 32], 'test2': [None], 'test5': []}, {
                    'test1': tf.float32, 'test2': tf.int32, 'test5': tf.int32})

model = load_transformer_model(
    'voidful/albert_chinese_tiny')
main_model = get_transformer_main_model(model)
isinstance(main_model, transformers.TFAlbertMainLayer)

404 Client Error: Not Found for url: https://huggingface.co/voidful/albert_chinese_tiny/resolve/main/tf_model.h5
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFAlbertModel: ['predictions.decoder.weight', 'predictions.dense.bias', 'predictions.bias', 'predictions.LayerNorm.bias', 'predictions.dense.weight', 'predictions.decoder.bias', 'predictions.LayerNorm.weight']
- This IS expected if you are initializing TFAlbertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFAlbertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFAlbertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFAlbertModel for predictions without further training.

True

embedding = get_embedding_table_from_model(
    model)
assert embedding.shape == (21128, 128)

Utils

Imports

`load_transformer_tokenizer`[source]

`load_transformer_config`[source]

`load_transformer_model`[source]

`get_label_encoder_save_path`[source]

`class` `LabelEncoder`[source]

Notes

`create_path`[source]

`need_make_label_encoder`[source]

`get_or_make_label_encoder`[source]

`cluster_alphnum`[source]

`filter_empty`[source]

`infer_shape_and_type_from_dict`[source]

`get_transformer_main_model`[source]

`get_embedding_table_from_model`[source]

`get_shape_list`[source]

`gather_indexes`[source]

`dispatch_features`[source]

`create_dict_from_nested_model`[source]

`variable_summaries`[source]

`set_phase`[source]

`get_phase`[source]

`set_is_pyspark`[source]

`get_is_pyspark`[source]

`class` `TFRedundantWarningFilter`[source]

`compress_tf_warnings`[source]

Utils

Imports

load_transformer_tokenizer[source]

load_transformer_config[source]

load_transformer_model[source]

get_label_encoder_save_path[source]

class LabelEncoder[source]

Notes

create_path[source]

need_make_label_encoder[source]

get_or_make_label_encoder[source]

cluster_alphnum[source]

filter_empty[source]

infer_shape_and_type_from_dict[source]

get_transformer_main_model[source]

get_embedding_table_from_model[source]

get_shape_list[source]

gather_indexes[source]

dispatch_features[source]

create_dict_from_nested_model[source]

variable_summaries[source]

set_phase[source]

get_phase[source]

set_is_pyspark[source]

get_is_pyspark[source]

class TFRedundantWarningFilter[source]

compress_tf_warnings[source]

`load_transformer_tokenizer`[source]

`load_transformer_config`[source]

`load_transformer_model`[source]

`get_label_encoder_save_path`[source]

`class` `LabelEncoder`[source]

`create_path`[source]

`need_make_label_encoder`[source]

`get_or_make_label_encoder`[source]

`cluster_alphnum`[source]

`filter_empty`[source]

`infer_shape_and_type_from_dict`[source]

`get_transformer_main_model`[source]

`get_embedding_table_from_model`[source]

`get_shape_list`[source]

`gather_indexes`[source]

`dispatch_features`[source]

`create_dict_from_nested_model`[source]

`variable_summaries`[source]

`set_phase`[source]

`get_phase`[source]

`set_is_pyspark`[source]

`get_is_pyspark`[source]

`class` `TFRedundantWarningFilter`[source]

`compress_tf_warnings`[source]