Skip to content

Commit

Permalink
tweak
Browse files Browse the repository at this point in the history
  • Loading branch information
omukazu committed May 2, 2024
1 parent 054ee32 commit 68ba9f4
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/kwja/datamodule/datasets/word.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
)
from kwja.utils.kanjidic import KanjiDic
from kwja.utils.logging_util import track
from kwja.utils.normalization import normalize_text
from kwja.utils.reading_prediction import ReadingAligner, get_reading2reading_id

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -133,7 +134,7 @@ def _load_examples(self, doc_id2document: Dict[str, Document]) -> List[WordExamp
examples = []
example_id = 0
for document in track(doc_id2document.values(), description="Loading examples"):
tokenizer_input: Union[List[str], str] = [m.text for m in document.morphemes]
tokenizer_input: Union[List[str], str] = [normalize_text(m.text) for m in document.morphemes]
encoding: Encoding = self.tokenizer(
tokenizer_input,
padding=PaddingStrategy.DO_NOT_PAD,
Expand Down

0 comments on commit 68ba9f4

Please sign in to comment.