Init model in various mode
train
: model will be loaded from huggingface
resume
: model will be loaded from params.ckpt_dir, if params.ckpt_dir dose not contain valid checkpoint, then load from huggingface
transfer
: model will be loaded from params.init_checkpoint, the correspongding path should contain checkpoints saved using m3tl
predict
: model will be loaded from params.ckpt_dir except optimizers' states
eval
: model will be loaded from params.ckpt_dir except optimizers' states, model will be compiled
Args:
- mirrored_strategy (tf.distribute.MirroredStrategy): mirrored strategy
- params (Params): params
- mode (str, optional): Mode, see above explaination. Defaults to 'train'.
- inputs_to_build_model (Dict, optional): A batch of data. Defaults to None.
- model (Model, optional): Keras model. Defaults to None.
Returns:
- model: loaded model
Train Multi-task Bert model
Keyword Arguments:
- problem (str, optional) -- Problems to train. Defaults to 'weibo_ner'
- num_gpus (int, optional) -- Number of GPU to use. Defaults to 1
- num_epochs (int, optional) -- Number of epochs to train. Defaults to 10
- model_dir (str, optional) -- model dir. Defaults to ''
- params (Params, optional) -- Params to define training and models. Defaults to None
- problem_type_dict (dict, optional) -- Key: problem name, value: problem type. Defaults to None
- processing_fn_dict (dict, optional) -- Key: problem name, value: problem data preprocessing fn. Defaults to None
- model (tf.keras.Model, optional): if not provided, it will be created using
create_keras_model
. Defaults to None. - create_tf_record_only (bool, optional): if
True
, the function will only create TFRecord without training model. Defaults to False. - steps_per_epoch (int, optional): steps per epochs, if not provided, train datset will be looped once to calculate steps per epoch. Defaults to None.
- warmup_ratio (float, optional): lr warmup ratio. Defaults to 0.1.
- continue_training (bool, optional): whether to resume training from
model_dir
. Defaults to False. - mirrored_strategy (MirroredStrategy, optional): Tensorflow MirroredStrategy. Defaults to None.
- run_eagerly (bool, optional): Whether to run model eagerly. Defaults to False.
params.use_horovod = False
model = train_bert_multitask(
problem=problem,
num_epochs=1,
params=params,
problem_type_dict=problem_type_dict,
processing_fn_dict=processing_fn_dict,
steps_per_epoch=1,
continue_training=True,
mirrored_strategy=False,
run_eagerly=True
)
test_tup = ({
'text_input_ids': [None, 3],
'text_mask': [None, 3],
'text_segment_ids': [None, 3],
'image_input_ids': [None, 5, 10],
'image_mask': [None, 5],
'image_segment_ids': [None, 5],
'class_input_ids': [None, 1],
'class_mask': [None, 1],
'class_segment_ids': [None, 1]},
{
'text_input_ids': tf.int32,
'text_mask': tf.int32,
'text_segment_ids': tf.int32,
'image_input_ids': tf.float32,
'image_mask': tf.int32,
'image_segment_ids': tf.int32,
'class_input_ids': tf.int32,
'class_mask': tf.int32,
'class_segment_ids': tf.int32})
print(create_tensorspec_from_shape_type(test_tup))
Minimize checkpoint size for prediction.
Since the original checkpoint contains optimizer's variable, for instance, if the use adam, the checkpoint size will be three times of the size of model weights. This function will remove those unused variables in prediction to save space.
Note: if the model is a multimodal model, you have to provide fake_input_list that mimic the structure of real input. Otherwise modal embeddings will be randomly initialized.
Args:
- problem (str): problem
- input_dir (str): input dir
- output_dir (str): output dir
- problem_type_dict (Dict[str, str], optional): problem type dict. Defaults to None.
- fake_input_list (List, optional): fake input list to create dummy dataset
- params (Params, optional): params
tf.get_logger().setLevel('ERROR')
import numpy as np
from m3tl.predefined_problems.test_data import generate_fake_data
fake_inputs = [v for v, _ in generate_fake_data(output_format='gen_dict_tuple')]
# save as SavedModel pb
trim_checkpoint_for_prediction(problem=model.params.problem_str, input_dir=model.params.ckpt_dir,
output_dir=model.params.ckpt_dir+'_pred',
problem_type_dict=problem_type_dict, overwrite=True, fake_input_list=fake_inputs, save_weights_only=False)
trim_checkpoint_for_prediction(
problem=problem, input_dir=model.params.ckpt_dir,
output_dir=model.params.ckpt_dir+'_pred',
problem_type_dict=problem_type_dict, overwrite=True, fake_input_list=fake_inputs)
Evaluate Multi-task Bert model
Keyword Arguments:
- problem (str, optional): problems to evaluate. Defaults to 'weibo_ner'.
- num_gpus (int, optional): number of gpu to use. Defaults to 1.
- model_dir (str, optional): model dir. Defaults to ''.
- params (Params, optional): params. Defaults to None.
- problem_type_dict (dict, optional): Key: problem name, value: problem type. Defaults to None.
- processing_fn_dict (dict, optional): Key: problem name, value: problem data preprocessing fn. Defaults to None.
- model (tf.keras.Model, optional): If not provided, it will be created with
create_keras_model
. Defaults to None. - run_eagerly (bool, optional): Whether to run model eagerly. Defaults to False.
import shutil
shutil.rmtree(model.params.ckpt_dir)
eval_bert_multitask(problem=problem, params=params,
problem_type_dict=problem_type_dict, processing_fn_dict=processing_fn_dict,
model_dir=model.params.ckpt_dir+'_pred')
# provide model instead of dir
eval_bert_multitask(problem=problem, params=params,
problem_type_dict=problem_type_dict, processing_fn_dict=processing_fn_dict,
model=model)
def arr_to_str(inp_arr: np.ndarray) -> str:
l = inp_arr.tolist()
l = [json.dumps(f) for f in l]
return l
def decode_predictions(pred: Dict[str, np.ndarray], params: Params, array_as_str=False) -> Dict[str, Union[int, float, np.ndarray, list, str]]:
parsed_pred = dict()
problem_list = params.problem_list
label_encoder_dict = {p: get_or_make_label_encoder(
params=params, problem=p, mode=PREDICT) for p in problem_list}
for problem, problem_pred_array in pred.items():
# addtional outputs
if problem not in problem_list:
if isinstance(problem_pred_array, np.ndarray):
if array_as_str:
parsed_pred[problem] = arr_to_str(problem_pred_array)
else:
parsed_pred[problem] = problem_pred_array
else:
parsed_pred[problem] = problem_pred_array
continue
label_encoder = label_encoder_dict[problem]
support_problem_type = [
'multi_cls',
'cls',
'seq_tag',
'regression',
'masklm',
'premask_mlm',
'vectorfit'
]
problem_type = params.get_problem_type(problem=problem)
if problem_type not in support_problem_type:
logger.warning("trying to decode prediction of unsupported problem type"
" {}, if any error raised, please disable decode prediction.".format(problem_type))
is_multi_cls = params.get_problem_type(problem=problem) == 'multi_cls'
is_cls = params.get_problem_type(problem=problem) == 'cls'
is_seq_tag = params.get_problem_type(problem=problem) == 'seq_tag'
is_regression = params.get_problem_type(
problem=problem) == 'regression'
if is_regression:
parsed_pred[problem] = problem_pred_array
continue
# get pred from prob
if is_multi_cls:
problem_pred = problem_pred_array >= 0.5
elif is_cls or is_seq_tag:
problem_pred = np.argmax(problem_pred_array, axis=-1)
# problem_pred = problem_pred_array
else:
problem_pred = problem_pred_array
# sequence labels
if is_seq_tag:
parsed_problem_pred = np.apply_along_axis(
label_encoder.inverse_transform, axis=1, arr=problem_pred)
else:
if isinstance(label_encoder, MultiLabelBinarizer) or isinstance(label_encoder, LabelEncoder):
parsed_problem_pred = label_encoder.inverse_transform(
problem_pred)
elif isinstance(label_encoder, PreTrainedTokenizer):
parsed_problem_pred = np.apply_along_axis(
label_encoder.convert_ids_to_tokens, axis=1, arr=problem_pred
)
else:
parsed_problem_pred = problem_pred_array
parsed_pred[problem] = parsed_problem_pred
return parsed_pred
pred, model = predict_bert_multitask(
problem='weibo_fake_ner',
inputs=fake_inputs*20, model_dir=model.params.ckpt_dir,
problem_type_dict=problem_type_dict,
processing_fn_dict=processing_fn_dict, return_model=True,
params=params)