-
Notifications
You must be signed in to change notification settings - Fork 35
/
Copy pathtrain.py
72 lines (61 loc) · 2.01 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import hydra
from omegaconf import DictConfig
from data import get_data, get_collators
from model import get_model
from trainer import load_trainer
from evals import get_evaluator
@hydra.main(version_base=None, config_path="../configs", config_name="train.yaml")
def main(cfg: DictConfig):
"""Entry point of the code to train models
Args:
cfg (DictConfig): Config to train
"""
mode = cfg.get("mode", "train")
model_cfg = cfg.model
template_args = model_cfg.template_args
assert model_cfg is not None, "Invalid model yaml passed in train config."
model, tokenizer = get_model(model_cfg)
# Load Dataset
data_cfg = cfg.data
data = get_data(
data_cfg, mode=mode, tokenizer=tokenizer, template_args=template_args
)
# Load collator
collator_cfg = cfg.collator
collator = get_collators(collator_cfg, tokenizer=tokenizer)
# Get Trainer
trainer_cfg = cfg.trainer
assert trainer_cfg is not None, ValueError("Please set trainer")
# Get Evaluator
evaluator = None
eval_cfgs = cfg.get("eval", None)
if eval_cfgs:
assert len(eval_cfgs) <= 1, ValueError(
"Only one evaluation supported while training"
)
eval_name, eval_cfg = next(iter(eval_cfgs.items()))
evaluator = get_evaluator(
eval_name,
eval_cfg,
template_args=template_args,
model=model,
tokenizer=tokenizer,
)
trainer, trainer_args = load_trainer(
trainer_cfg=trainer_cfg,
model=model,
train_dataset=data.get("train", None),
eval_dataset=data.get("eval", None),
tokenizer=tokenizer,
data_collator=collator,
evaluator=evaluator,
template_args=template_args,
)
if trainer_args.do_train:
trainer.train()
trainer.save_state()
trainer.save_model(trainer_args.output_dir)
if trainer_args.do_eval:
trainer.evaluate(metric_key_prefix="eval")
if __name__ == "__main__":
main()