Skip to content

Commit

Permalink
Documentation updates
Browse files Browse the repository at this point in the history
  • Loading branch information
gbenson committed May 16, 2024
1 parent 61f46bb commit e599219
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 8 deletions.
21 changes: 19 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,25 @@
<p style="float: right">
<a href="https://badge.fury.io/py/dom-tokenizers">
<img alt="Build" src="https://badge.fury.io/py/dom-tokenizers.svg">
</a>
<a href="https://github.com/gbenson/dom-tokenizers/blob/master/LICENSE">
<img alt="GitHub" src="https://img.shields.io/github/license/gbenson/dom-tokenizers.svg?color=blue">
</a>
</p>

# DOM tokenizers

HTML DOM-aware tokenizers for Hugging Face language models.
DOM-aware tokenizers for [🤗 Hugging Face](https://huggingface.co/)
language models.

## Installation

### With PIP
```sh
pip install dom-tokenizers[train]
```

## Setup for development
### From sources

```sh
git clone https://github.com/gbenson/dom-tokenizers.git
Expand Down
17 changes: 11 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,21 @@
name = "dom-tokenizers"
version = "0.0.1"
authors = [{ name = "Gary Benson" }]
description = "HTML DOM-aware tokenizers for Hugging Face language models"
description = "DOM-aware tokenizers for Hugging Face language models"
readme = "README.md"
license = { file = "LICENSE" }
license = { text = "Apache Software License (Apache-2.0)" }
requires-python = ">=3.10" # match..case
classifiers = [
"Programming Language :: Python :: 3",
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"Intended Audience :: Education",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Development Status :: 4 - Beta",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Internet :: WWW/HTTP",
"Topic :: Software Development :: Libraries",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
Expand All @@ -25,8 +31,7 @@ dependencies = [

[project.urls]
Homepage = "https://github.com/gbenson/dom-tokenizers"
Repository = "https://github.com/gbenson/dom-tokenizers"
"Bug Tracker" = "https://github.com/gbenson/dom-tokenizers/issues"
#Source = "https://github.com/gbenson/dom-tokenizers"

[project.optional-dependencies]
dev = [
Expand Down

0 comments on commit e599219

Please sign in to comment.