Skip to content

Commit

Permalink
Merge pull request #83 from bact/fix-to-str
Browse files Browse the repository at this point in the history
[nlpo3-python]: Add Bound to PyString and PyModule
  • Loading branch information
bact authored Nov 11, 2024
2 parents 5d5221e + 931fd5e commit 8074526
Show file tree
Hide file tree
Showing 18 changed files with 214 additions and 68 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/test-main-lib.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Test main lib (Rust crate)
name: Test main lib
on:
push:
branches:
Expand All @@ -19,8 +19,6 @@ on:

jobs:
test:
runs-on: ${{ matrix.os }}

strategy:
fail-fast: false
matrix:
Expand All @@ -30,6 +28,8 @@ jobs:
- os: windows-latest
bitness: 32

runs-on: ${{ matrix.os }}

steps:
- name: Checkout source code
uses: actions/checkout@v4
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/test-nlpo3-cli.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Test nlpo3-cli (command line)
name: Test nlpo3-cli
on:
push:
branches:
Expand All @@ -17,8 +17,6 @@ defaults:

jobs:
test:
runs-on: ${{ matrix.os }}

strategy:
fail-fast: false
matrix:
Expand All @@ -28,9 +26,11 @@ jobs:
- os: windows-latest
bitness: 32

runs-on: ${{ matrix.os }}

steps:
- name: Checkout source code
uses: actions/checkout@master
uses: actions/checkout@v4

- name: Setup Rust toolchain - non-win32
uses: actions-rs/toolchain@v1
Expand Down
69 changes: 69 additions & 0 deletions .github/workflows/test-nlpo3-python.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
name: Test nlpo3-python
on:
push:
branches:
- main
paths:
- 'nlpo3-python/**'
- '!notebooks/'
- '!LICENSE'
- '!*.md'
pull_request:
branches:
- main
paths:
- 'nlpo3-python/**'
- '!notebooks/'
- '!LICENSE'
- '!*.md'

defaults:
run:
working-directory: nlpo3-python

jobs:
test:
strategy:
fail-fast: false
matrix:
os: [macos-latest, ubuntu-latest, windows-latest]
python-version: ["3.13", "3.12", "3.11", "3.10", "3.9", "3.8", "3.7"]
bitness: [64] # 32, 64
include:
- os: windows-latest
python-version: "3.9"
bitness: 32
exclude:
- os: macos-latest
python-version: "3.7"

runs-on: ${{ matrix.os }}

steps:
- name: Checkout source code
uses: actions/checkout@v4

- name: Setup Rust toolchain
uses: actions-rust-lang/setup-rust-toolchain@v1

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: "pip"

- name: Build wheel
run: |
pip install -U pip
pip install -U build setuptools setuptools-rust wheel
python -m build --wheel
- name: Install wheel
run: pip install --no-index --find-links=dist nlpo3
# Since we don't know the exact name of the wheel from previous step,
# use --find-links instead.

- name: Test
run: |
cd tests
python -m unittest
6 changes: 6 additions & 0 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ type: software
authors:
- family-names: Suntorntip
given-names: Thanathip
- family-names: Suriyawongkul
given-names: Arthit
orcid: "https://orcid.org/0000-0002-9698-1899"
- family-names: Phatthiyaphaibun
given-names: Wannaphong
orcid: "https://orcid.org/0000-0002-4153-4354"
repository-code: "https://github.com/PyThaiNLP/nlpo3/"
repository: "https://github.com/PyThaiNLP/nlpo3/"
url: "https://github.com/PyThaiNLP/nlpo3/"
Expand Down
8 changes: 4 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ name = "nlpo3"
version = "1.4.0"
edition = "2018"
license = "Apache-2.0"
authors = ["Thanathip Suntorntip Gorlph"]
authors = ["Thanathip Suntorntip Gorlph", "Arthit Suriyawongkul"]
description = "Thai natural language processing library, with Python and Node bindings"
categories = ["text-processing"]
keywords = ["thai", "tokenizer", "nlp", "word-segmentation"]
homepage = "https://github.com/PyThaiNLP/nlpo3/"
documentation = "https://github.com/PyThaiNLP/nlpo3/blob/main/README.md"
repository = "https://github.com/PyThaiNLP/nlpo3/"
documentation = "https://github.com/PyThaiNLP/nlpo3/blob/main/README.md"
readme = "README.md"
keywords = ["thai", "tokenizer", "nlp", "word-segmentation"]
categories = ["text-processing"]
exclude = [
".gitignore",
".github/*",
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -191,5 +191,6 @@ Issues:

## License

nlpO3 is copyrighted by its authors and licensed under terms of the Apache
Software License 2.0 (Apache-2.0) - see file [LICENSE](./LICENSE) for details.
nlpO3 is copyrighted by its authors
and licensed under terms of the Apache Software License 2.0 (Apache-2.0).
See file [LICENSE](./LICENSE) for details.
6 changes: 3 additions & 3 deletions nlpo3-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ edition = "2018"
license = "Apache-2.0"
authors = ["Vee Satayamas <5ssgdxltv@relay.firefox.com>"]
description = "Command line interface for nlpO3, a Thai natural language processing library"
documentation = "https://github.com/PyThaiNLP/nlpo3/tree/main/nlpo3-cli/README.md"
categories = ["text-processing", "command-line-utilities"]
keywords = ["thai", "tokenizer", "nlp", "word-segmentation", "cli"]
homepage = "https://github.com/PyThaiNLP/nlpo3/tree/main/nlpo3-cli/"
repository = "https://github.com/PyThaiNLP/nlpo3/"
documentation = "https://github.com/PyThaiNLP/nlpo3/tree/main/nlpo3-cli/README.md"
readme = "README.md"
keywords = ["thai", "tokenizer", "nlp", "word-segmentation", "cli"]
categories = ["text-processing", "command-line-utilities"]

[[bin]]
name = "nlpo3"
Expand Down
6 changes: 6 additions & 0 deletions nlpo3-cli/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,9 @@ nlpo3 help
```bash
echo "ฉันกินข้าว" | nlpo3 segment
```

## License

nlpo3-cli is copyrighted by its authors
and licensed under terms of the Apache Software License 2.0 (Apache-2.0).
See file [LICENSE](./LICENSE) for details.
6 changes: 3 additions & 3 deletions nlpo3-nodejs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ edition = "2018"
license = "Apache-2.0"
authors = ["Thanathip Suntorntip Gorlph"]
description = "Node binding for nlpO3 Thai language processing library"
documentation = "https://github.com/PyThaiNLP/nlpo3/tree/main/nlpo3-nodejs/README.md"
categories = ["text-processing"]
keywords = ["thai", "tokenizer", "nlp", "word-segmentation"]
homepage = "https://github.com/PyThaiNLP/nlpo3/tree/main/nlpo3-nodejs/"
repository = "https://github.com/PyThaiNLP/nlpo3/"
documentation = "https://github.com/PyThaiNLP/nlpo3/tree/main/nlpo3-nodejs/README.md"
readme = "README.md"
keywords = ["thai", "tokenizer", "nlp", "word-segmentation"]
categories = ["text-processing"]
exclude = ["index.node"]

[lib]
Expand Down
6 changes: 6 additions & 0 deletions nlpo3-nodejs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,9 @@ Please report issues at <https://github.com/PyThaiNLP/nlpo3/issues>
## TODO

- Find a way to build binaries and publish on npm.

## License

nlpO3 Node binding is copyrighted by its authors
and licensed under terms of the Apache Software License 2.0 (Apache-2.0).
See file [LICENSE](./LICENSE) for details.
22 changes: 16 additions & 6 deletions nlpo3-nodejs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,28 @@ use neon::prelude::*;
use nlpo3::tokenizer::{newmm::NewmmTokenizer, tokenizer_trait::Tokenizer};

lazy_static! {
static ref DICT_COLLECTION: Mutex<HashMap<String, Box<NewmmTokenizer>>> =
static ref TOKENIZER_COLLECTION: Mutex<HashMap<String, Box<NewmmTokenizer>>> =
Mutex::new(HashMap::new());
}

// Load a dictionary file to a tokenizer,
// and add that tokenizer to the tokenizer collection.
//
// Dictionary file must one word per line.
// If successful, will insert a NewmmTokenizer to TOKENIZER_COLLECTION.
// returns a tuple of string of loading result and a boolean
fn load_dict(mut cx: FunctionContext) -> JsResult<JsString> {
let mut dict_col_lock = DICT_COLLECTION.lock().unwrap();
let mut tokenizer_col_lock = TOKENIZER_COLLECTION.lock().unwrap();
let file_path = cx.argument::<JsString>(0)?.value(&mut cx);
let dict_name = cx.argument::<JsString>(1)?.value(&mut cx);
if let Some(_) = dict_col_lock.get(&dict_name) {
if let Some(_) = tokenizer_col_lock.get(&dict_name) {
Ok(cx.string(format!(
"Failed: dictionary {} exists, please use another name.",
dict_name
)))
} else {
let tokenizer = NewmmTokenizer::new(&file_path);
dict_col_lock.insert(dict_name.to_owned(), Box::new(tokenizer));
tokenizer_col_lock.insert(dict_name.to_owned(), Box::new(tokenizer));

Ok(cx.string(format!(
"Successful: dictionary name {} from file {} has been successfully loaded",
Expand All @@ -33,13 +39,17 @@ fn load_dict(mut cx: FunctionContext) -> JsResult<JsString> {
}
}

// Break text into tokens.
// Use newmm algorithm.
/// Can use multithreading, but takes a lot of memory.
/// returns an array of string
fn segment(mut cx: FunctionContext) -> JsResult<JsArray> {
let text = cx.argument::<JsString>(0)?.value(&mut cx);
let dict_name = cx.argument::<JsString>(1)?.value(&mut cx);
let safe = cx.argument::<JsBoolean>(2)?.value(&mut cx);
let parallel = cx.argument::<JsBoolean>(3)?.value(&mut cx);
if let Some(loaded_dict) = DICT_COLLECTION.lock().unwrap().get(&dict_name) {
let result = loaded_dict.segment_to_string(&text, safe, parallel);
if let Some(loaded_tokenizer) = TOKENIZER_COLLECTION.lock().unwrap().get(&dict_name) {
let result = loaded_tokenizer.segment_to_string(&text, safe, parallel);
let js_result_array = JsArray::new(&mut cx, result.len() as u32);
for (i, obj) in result.iter().enumerate() {
let js_string = cx.string(obj);
Expand Down
2 changes: 1 addition & 1 deletion nlpo3-python/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 21 additions & 4 deletions nlpo3-python/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,30 @@
[package]
name = "nlpo3-python"
version = "1.3.1-dev"
version = "1.3.1"
edition = "2018"
license = "Apache-2.0"
authors = ["Thanathip Suntorntip Gorlph"]
authors = [
"Thanathip Suntorntip Gorlph",
"Arthit Suriyawongkul",
"Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>",
]
description = "Python binding for nlpO3 Thai language processing library"
exclude = ["notebooks"]
keywords = ["thai", "tokenizer", "nlp", "word-segmentation", "python"]
categories = ["text-processing"]
keywords = ["thai", "tokenizer", "nlp", "word-segmentation", "python"]
homepage = "https://github.com/PyThaiNLP/nlpo3/tree/main/nlpo3-python"
repository = "https://github.com/PyThaiNLP/nlpo3/"
documentation = "https://github.com/PyThaiNLP/nlpo3/blob/main/nlpo3-python/README.md"
readme = "README.md"
exclude = [
".gitignore",
".github/",
"build/",
"dist/",
"notebooks/",
"target",
"tests/",
"*.sh",
]

[lib]
name = "_nlpo3_python_backend"
Expand Down
25 changes: 23 additions & 2 deletions nlpo3-python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ SPDX-License-Identifier: Apache-2.0
# nlpO3 Python binding

[![PyPI](https://img.shields.io/pypi/v/nlpo3.svg "PyPI")](https://pypi.python.org/pypi/nlpo3)
[![Python 3.6](https://img.shields.io/badge/python-3.6-blue.svg "Python 3.6")](https://www.python.org/downloads/)
[![Python 3.7](https://img.shields.io/badge/python-3.7-blue.svg "Python 3.7")](https://www.python.org/downloads/)
[![Apache-2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg "Apache-2.0")](https://opensource.org/license/apache-2-0)

Python binding for nlpO3, a Thai natural language processing library in Rust.
Expand Down Expand Up @@ -86,7 +86,7 @@ segment("สวัสดีครับ", dict_name="dict_name", safe=True)
### Requirements

- [Rust 2018 Edition](https://www.rust-lang.org/tools/install)
- Python 3.6 or newer
- Python 3.7 or newer (PyO3's minimum supported version)
- Python Development Headers
- Ubuntu: `sudo apt-get install python3-dev`
- macOS: No action needed
Expand All @@ -103,6 +103,27 @@ python -m build
This should generate a wheel file, in `dist/` directory,
which can be installed by pip.

To install a wheel from a local directory:

```bash
pip install dist/nlpo3-1.3.1-cp311-cp311-macosx_12_0_x86_64.whl
```

## Test

To run the Python unit test:

```bash
cd tests
python -m unittest
```

## Issues

Please report issues at <https://github.com/PyThaiNLP/nlpo3/issues>

## License

nlpO3 Python binding is copyrighted by its authors
and licensed under terms of the Apache Software License 2.0 (Apache-2.0).
See file [LICENSE](./LICENSE) for details.
7 changes: 3 additions & 4 deletions nlpo3-python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ build-backend = "setuptools.build_meta"

[project]
name = "nlpo3"
version = "1.3.1-dev"
version = "1.3.1"
description = "Python binding for nlpO3 Thai language processing library in Rust"
readme = "README.md"
requires-python = ">=3.6"
requires-python = ">=3.7"
license = { text = "Apache-2.0" }
keywords = ["thai", "tokenizer", "nlp", "word-segmentation", "pythainlp"]
authors = [
Expand All @@ -18,7 +18,6 @@ authors = [
classifiers = [
"Development Status :: 5 - Production/Stable",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
Expand All @@ -38,7 +37,7 @@ homepage = "https://github.com/PyThaiNLP/nlpo3/"
repository = "https://github.com/PyThaiNLP/nlpo3/"

[tool.poetry.dependencies]
python = "^3.6"
python = "^3.7"

[tool.poetry.dev-dependencies]
pytest = "*"
Expand Down
Loading

0 comments on commit 8074526

Please sign in to comment.