Skip to content

Commit

Permalink
fix test
Browse files Browse the repository at this point in the history
  • Loading branch information
omukazu committed Jun 24, 2024
1 parent 1fd4946 commit 14b32ed
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 127 deletions.
68 changes: 32 additions & 36 deletions tests/data/modules/permitted_tokens.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"input_tokens": ["<pad>"],
"permitted_tokens": ["<extra_id_0>"]
},
"target_morpheme": "init"
"target_property": "init"
},
"case002": {
"surfs": ["計算", "", "", "よる", "言語", "理解", "", "実現", "する"],
Expand All @@ -21,7 +21,7 @@
"input_tokens": ["<pad>", "<extra_id_0>"],
"permitted_tokens": ["", "計算"]
},
"target_morpheme": "surf"
"target_property": "surf"
},
"case003": {
"surfs": ["計算", "", "", "よる", "言語", "理解", "", "実現", "する"],
Expand All @@ -33,7 +33,7 @@
"input_tokens": ["<pad>", "<extra_id_0>", ""],
"permitted_tokens": [""]
},
"target_morpheme": "surf"
"target_property": "surf"
},
"case004": {
"surfs": ["計算", "", "", "よる", "言語", "理解", "", "実現", "する"],
Expand All @@ -45,105 +45,101 @@
"input_tokens": ["<pad>", "<extra_id_0>", "計算"],
"permitted_tokens": ["<extra_id_1>"]
},
"target_morpheme": "surf"
"target_property": "surf"
},
"case005": {
"surfs": ["計算", "", "", "よる", "言語", "理解", "", "実現", "する"],
"t5": {
"input_tokens": ["<pad>", "<extra_id_0>", "計算", "<extra_id_1>"],
"permitted_tokens": "reading_candidates",
"banned_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "</s>"]
"prohibited_tokens": ["<extra_id_2>"]
},
"mt5": {
"input_tokens": ["<pad>", "<extra_id_0>", "計算", "<extra_id_1>"],
"permitted_tokens": "reading_candidates",
"banned_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "</s>"]
"prohibited_tokens": ["<extra_id_2>"]
},
"target_morpheme": "reading"
"target_property": "reading"
},
"case006": {
"surfs": ["計算", "", "", "よる", "言語", "理解", "", "実現", "する"],
"t5": {
"input_tokens": ["<pad>", "<extra_id_0>", "計算", "<extra_id_1>", "▁けい"],
"permitted_tokens": "reading_candidates",
"banned_tokens": ["</s>"]
"permitted_tokens": "reading_candidates"
},
"mt5": {
"input_tokens": ["<pad>", "<extra_id_0>", "計算", "<extra_id_1>", "けい"],
"permitted_tokens": "reading_candidates",
"banned_tokens": ["</s>"]
"permitted_tokens": "reading_candidates"
},
"target_morpheme": "reading"
"target_property": "reading"
},
"case007": {
"surfs": ["計算", "", "", "よる", "言語", "理解", "", "実現", "する"],
"t5": {
"input_tokens": ["<pad>", "<extra_id_0>", "計算", "<extra_id_1>", "▁けい", "さん", "<extra_id_2>"],
"permitted_tokens": [],
"banned_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "</s>"]
"prohibited_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<pad>", "</s>"]
},
"mt5": {
"input_tokens": ["<pad>", "<extra_id_0>", "計算", "<extra_id_1>", "けい", "さん", "<extra_id_2>"],
"permitted_tokens": [],
"banned_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "</s>"]
"prohibited_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<pad>", "</s>"]
},
"target_morpheme": "lemma"
"target_property": "lemma"
},
"case008": {
"surfs": ["計算", "", "", "よる", "言語", "理解", "", "実現", "する"],
"t5": {
"input_tokens": ["<pad>", "<extra_id_0>", "計算", "<extra_id_1>", "▁けい", "さん", "<extra_id_2>", "計算"],
"permitted_tokens": [],
"banned_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "</s>"]
"prohibited_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_4>", "<extra_id_5>", "<pad>", "</s>"]
},
"mt5": {
"input_tokens": ["<pad>", "<extra_id_0>", "計算", "<extra_id_1>", "けい", "さん", "<extra_id_2>", "計算"],
"permitted_tokens": [],
"banned_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "</s>"]
"prohibited_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_4>", "<extra_id_5>", "<pad>", "</s>"]
},
"target_morpheme": "lemma"
"target_property": "lemma"
},
"case009": {
"surfs": ["計算", "", "", "よる", "言語", "理解", "", "実現", "する"],
"t5": {
"input_tokens": ["<pad>", "<extra_id_0>", "計算", "<extra_id_1>", "▁けい", "さん", "<extra_id_2>", "計算", "<extra_id_3>"],
"permitted_tokens": [],
"banned_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "</s>"]
"prohibited_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_5>", "<pad>", "</s>"]
},
"mt5": {
"input_tokens": ["<pad>", "<extra_id_0>", "計算", "<extra_id_1>", "けい", "さん", "<extra_id_2>", "計算", "<extra_id_3>"],
"permitted_tokens": [],
"banned_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "</s>"]
"prohibited_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_5>", "<pad>", "</s>"]
},
"target_morpheme": "canon"
"target_property": "canon"
},
"case010": {
"surfs": ["計算", "", "", "よる", "言語", "理解", "", "実現", "する"],
"t5": {
"input_tokens": ["<pad>", "<extra_id_0>", "計算", "<extra_id_1>", "▁けい", "さん", "<extra_id_2>", "計算", "<extra_id_3>", "計算"],
"permitted_tokens": [],
"banned_tokens": ["<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "</s>"]
"prohibited_tokens": ["<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_5>", "<pad>", "</s>"]
},
"mt5": {
"input_tokens": ["<pad>", "<extra_id_0>", "計算", "<extra_id_1>", "けい", "さん", "<extra_id_2>", "計算", "<extra_id_3>", "計算"],
"permitted_tokens": [],
"banned_tokens": ["<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "</s>"]
"prohibited_tokens": ["<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_5>", "<pad>", "</s>"]
},
"target_morpheme": "canon"
"target_property": "canon"
},
"case011": {
"surfs": ["計算", "", "", "よる", "言語", "理解", "", "実現", "する"],
"t5": {
"input_tokens": ["<pad>", "<extra_id_0>", "計算", "<extra_id_1>", "▁けい", "さん", "<extra_id_2>", "計算", "<extra_id_3>", "計算", "/", "けい", "さん", "<extra_id_0>"],
"permitted_tokens": [""],
"banned_tokens": ["<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "</s>"]
"permitted_tokens": [""]
},
"mt5": {
"input_tokens": ["<pad>", "<extra_id_0>", "計算", "<extra_id_1>", "けい", "さん", "<extra_id_2>", "計算", "<extra_id_3>", "計算", "/", "けい", "さん", "<extra_id_0>"],
"permitted_tokens": [""],
"banned_tokens": ["<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "</s>"]
"permitted_tokens": [""]
},
"target_morpheme": "surf"
"target_property": "surf"
},
"case012": {
"surfs": ["計算"],
Expand All @@ -155,34 +151,34 @@
"input_tokens": ["<pad>", "<extra_id_0>", "計算"],
"permitted_tokens": ["<extra_id_1>"]
},
"target_morpheme": "surf"
"target_property": "surf"
},
"case013": {
"surfs": ["計算"],
"t5": {
"input_tokens": ["<pad>", "<extra_id_0>", "計算", "<extra_id_1>", "けい", "さん", "<extra_id_2>", "計算", "<extra_id_3>"],
"permitted_tokens": [],
"banned_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "</s>"]
"prohibited_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_5>", "<pad>", "</s>"]
},
"mt5": {
"input_tokens": ["<pad>", "<extra_id_0>", "計算", "<extra_id_1>", "けい", "さん", "<extra_id_2>", "計算", "<extra_id_3>"],
"permitted_tokens": [],
"banned_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "</s>"]
"prohibited_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_5>", "<pad>", "</s>"]
},
"target_morpheme": "canon"
"target_property": "canon"
},
"case014": {
"surfs": ["計算"],
"t5": {
"input_tokens": ["<pad>", "<extra_id_0>", "計算", "<extra_id_1>", "けい", "さん", "<extra_id_2>", "計算", "<extra_id_3>", "計算", "/", "けい", "さん"],
"permitted_tokens": [],
"banned_tokens": []
"prohibited_tokens": []
},
"mt5": {
"input_tokens": ["<pad>", "<extra_id_0>", "計算", "<extra_id_1>", "けい", "さん", "<extra_id_2>", "計算", "<extra_id_3>", "計算", "/", "けい", "さん"],
"permitted_tokens": [],
"banned_tokens": []
"prohibited_tokens": []
},
"target_morpheme": "canon"
"target_property": "canon"
}
}
Loading

0 comments on commit 14b32ed

Please sign in to comment.