diff --git a/src/dom_tokenizers/pre_tokenizers/dom_snapshot.py b/src/dom_tokenizers/pre_tokenizers/dom_snapshot.py index 6770820..0658135 100644 --- a/src/dom_tokenizers/pre_tokenizers/dom_snapshot.py +++ b/src/dom_tokenizers/pre_tokenizers/dom_snapshot.py @@ -15,7 +15,7 @@ from tokenizers import NormalizedString, PreTokenizedString from unidecode import unidecode -from .pre_tokenizer import BasePreTokenizer as PreTokenizer +from .pre_tokenizer import PreTokenizer class DOMSnapshotPreTokenizer(PreTokenizer): diff --git a/src/dom_tokenizers/pre_tokenizers/pre_tokenizer.py b/src/dom_tokenizers/pre_tokenizers/pre_tokenizer.py index 0d14f8c..7dab6a6 100644 --- a/src/dom_tokenizers/pre_tokenizers/pre_tokenizer.py +++ b/src/dom_tokenizers/pre_tokenizers/pre_tokenizer.py @@ -1,9 +1,9 @@ import weakref -from tokenizers.pre_tokenizers import PreTokenizer +from tokenizers.pre_tokenizers import PreTokenizer as _PreTokenizer -class BasePreTokenizer: +class PreTokenizer: @classmethod def hook_into(cls, tokenizer): """Reconfigure `tokenizer` for DOM-aware pre-tokenization. @@ -34,7 +34,7 @@ def bind_to(self, tokenizer): self._tokenizer = weakref.proxy(tokenizer) # Install ourself as the tokenizer's pre-tokenizer. - backend.pre_tokenizer = PreTokenizer.custom(self) + backend.pre_tokenizer = _PreTokenizer.custom(self) # Attempt to detect and postpone any lowercasing applied to # our input until after the base64 detection and handling is