Imports

convert_labels_to_ids[source]

convert_labels_to_ids(target:dict, problem:str, problem_type:str, label_encoder, tokenizer=None, decoding_length:int=None, custom_label_handling_fn:Callable=None, tokenized_inputs:dict=None, **kwargs)

create_bert_pretraining[source]

create_bert_pretraining(problem, inputs_list, label_encoder, params, tokenizer)

Slight modification of original code

Raises: ValueError -- Input format not right

mask_inputs_for_mask_lm[source]

mask_inputs_for_mask_lm(inp_text:str, tokenizer:PreTrainedTokenizer, mask_prob=0.1, max_length=128, max_predictions_per_seq=20)

Shortcut functions

collect_modal_name[source]

collect_modal_name(inputs:dict)

infer_modal[source]

infer_modal(inputs:dict)

unify_inputs[source]

unify_inputs(inputs:Union[Dict[KT, VT], str])

unify various input format to single one as following: { 'title': 'this is test', 'image': np.array([1,2,3]), 'class': 'a', 'title_modal_type': 'text', 'image_modal_type': 'array', 'class_modal_type': 'category', 'class_modal_info': 10 }

unify_inputs('this is a test')

# test legacy multimodal input
unify_inputs({'text': 'this is a test',
             'image': np.array([1, 2, 3]), 'class': 0})
{'text': 'this is a test',
 'image': array([1, 2, 3]),
 'class': 0,
 'text_modal_type': 'text',
 'image_modal_type': 'array',
 'class_modal_type': 'category'}

class NextExampleException[source]

NextExampleException() :: Exception

Common base class for all non-exit exceptions.

text_modal_input_handling[source]

text_modal_input_handling(problem:str, modal_name:str, modal_inputs:dict, target:dict=None, params:BaseParams=None, tokenizer:PreTrainedTokenizer=None)

array_modal_input_handling[source]

array_modal_input_handling(problem:str, modal_name:str, modal_inputs:dict, target:dict=None, params:BaseParams=None, tokenizer:PreTrainedTokenizer=None)

category_modal_input_handling[source]

category_modal_input_handling(problem:str, modal_name:str, modal_inputs:dict, target:dict=None, params:BaseParams=None, tokenizer:PreTrainedTokenizer=None, raw_inputs:dict=None)

from m3tl.test_base import TestBase
from m3tl.utils import load_transformer_tokenizer
from m3tl.predefined_problems import generate_fake_data
tb = TestBase()
params = tb.params
WARNING:root:bert_config not exists. will load model from huggingface checkpoint.
Adding new problem weibo_fake_ner, problem type: seq_tag
Adding new problem weibo_fake_multi_cls, problem type: multi_cls
Adding new problem weibo_fake_cls, problem type: cls
Adding new problem weibo_masklm, problem type: masklm
Adding new problem weibo_fake_regression, problem type: regression
Adding new problem weibo_fake_vector_fit, problem type: vector_fit
Adding new problem weibo_premask_mlm, problem type: premask_mlm
problem = 'weibo_fake_ner'
modal_name = 'desc'
modal_inputs = 'this is a test'
target = None
tokenizer = load_transformer_tokenizer(
    tokenizer_name=params.transformer_tokenizer_name, load_module_name=params.transformer_tokenizer_loading)
print(text_modal_input_handling(
    problem=problem, modal_name=modal_name, modal_inputs=modal_inputs, tokenizer=tokenizer, params=params))
fake_data = generate_fake_data('dict')
print(category_modal_input_handling(
    problem=problem, modal_name='inputs_cate', modal_inputs=fake_data[0]['inputs_cate'], tokenizer=tokenizer, params=params, raw_inputs=fake_data[0]))
({'desc_input_ids': [101, 8554, 8310, 143, 10060, 102], 'desc_mask': [1, 1, 1, 1, 1, 1], 'desc_segment_ids': [0, 0, 0, 0, 0, 0]}, {'overflowing_tokens': [], 'num_truncated_tokens': -250, 'input_ids': [101, 8554, 8310, 143, 10060, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0], 'special_tokens_mask': [1, 0, 0, 0, 0, 1], 'attention_mask': [1, 1, 1, 1, 1, 1]})
{'inputs_cate_input_ids': array([0]), 'inputs_cate_mask': [1], 'inputs_cate_segment_ids': array([0], dtype=int32)}

Create bert multimodal features

separate_inputs_labels[source]

separate_inputs_labels(inp:dict)

remove_dict_prefix[source]

remove_dict_prefix(inp:dict, key='inputs_')

Wrapper functions

create_multimodal_bert_features[source]

create_multimodal_bert_features(problem, example_list, label_encoder, params, tokenizer, mode, problem_type, is_seq)

create_multimodal_bert_features_generator[source]

create_multimodal_bert_features_generator(problem, example_list, label_encoder, params, tokenizer, mode, problem_type, is_seq)