Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add new dependencies of spacy-models #28345

Merged
merged 7 commits into from
Nov 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions recipes/curated-tokenizers/conda_build_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
libgrpc:
- "1.67"
libprotobuf:
- 5.28.2
52 changes: 52 additions & 0 deletions recipes/curated-tokenizers/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
{% set version = "0.0.9" %}

package:
name: curated-tokenizers
version: {{ version }}

source:
url: https://pypi.org/packages/source/c/curated-tokenizers/curated-tokenizers-{{ version }}.tar.gz
sha256: c93d47e54ab3528a6db2796eeb4bdce5d44e8226c671e42c2f23522ab1d0ce25
patches:
- patches/0001-use-our-own-sentencepiece-abseil.patch

build:
script: python -m pip install . -vv
number: 0

requirements:
build:
- {{ stdlib("c") }}
- {{ compiler("c") }}
- {{ compiler("cxx") }}
host:
- python
- pip
- setuptools
- cython
- libprotobuf
- libsentencepiece
run:
- python
- regex

test:
requires:
- pip
imports:
- curated_tokenizers
commands:
- pip check

about:
home: https://github.com/explosion/curated-transformers
summary: 'Lightweight piece tokenization library'
license: MIT
license_family: MIT
license_file: LICENSE
dev_url: https://github.com/explosion/curated-tokenizers

extra:
recipe-maintainers:
- h-vetinari
- conda-forge/spacy
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
From cc968a797278f4862db877a9449fadcc60f455b7 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Mon, 25 Nov 2024 08:07:00 +1100
Subject: [PATCH] use our own sentencepiece/abseil

---
curated_tokenizers/_spp.pxd | 2 +-
setup.py | 38 +++++++++++++++++++++++++------------
2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/curated_tokenizers/_spp.pxd b/curated_tokenizers/_spp.pxd
index 29f5ec2..4630b45 100644
--- a/curated_tokenizers/_spp.pxd
+++ b/curated_tokenizers/_spp.pxd
@@ -3,7 +3,7 @@ from libcpp.memory cimport shared_ptr
from libcpp.string cimport string
from libcpp.vector cimport vector

-cdef extern from "builtin_pb/sentencepiece.pb.h" namespace "sentencepiece":
+cdef extern from "sentencepiece.pb.h" namespace "sentencepiece":
cdef cppclass SentencePieceText_SentencePiece:
uint32_t id() const
const string & piece() const
diff --git a/setup.py b/setup.py
index d0aa11b..0306041 100755
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,6 @@
#!/usr/bin/env python
+import pathlib
+import os
import sys
from setuptools.command.build_ext import build_ext
from setuptools import Extension, setup, find_packages
@@ -79,18 +81,17 @@ MOD_NAMES = [
]
COMPILE_OPTIONS = {
"msvc": [
+ "/std:c++17",
"/Ox",
"/EHsc",
- "/D_USE_INTERNAL_STRING_VIEW",
"/DHAVE_PTHREAD",
"/wd4018",
"/wd4514",
],
"other": [
- "--std=c++14",
+ "--std=c++17",
"-Wno-sign-compare" "-Wno-strict-prototypes",
"-Wno-unused-function",
- "-D_USE_INTERNAL_STRING_VIEW",
"-pthread",
"-DHAVE_PTHREAD=1",
],
@@ -139,21 +140,34 @@ def setup_package():
if len(sys.argv) > 1 and sys.argv[1] == "clean":
return clean(root / "curated_tokenizers")

+ library_path = pathlib.Path(os.environ['PREFIX'])
+ EXTENSION_LIBRARIES = []
+ if "win32" in sys.platform:
+ absl_libs = (
+ 'abseil_dll', 'absl_log_flags', 'absl_flags_commandlineflag', 'absl_flags_commandlineflag_internal',
+ 'absl_flags_config', 'absl_flags_internal', 'absl_flags_marshalling', 'absl_flags_parse',
+ 'absl_flags_private_handle_accessor', 'absl_flags_program_name', 'absl_flags_reflection',
+ 'absl_flags_usage', 'absl_flags_usage_internal',
+ )
+ EXTENSION_LIBRARIES += ["sentencepiece_import", "libprotobuf"]
+ library_path = library_path / "Library"
+ else:
+ absl_glob = pathlib.Path(os.environ['PREFIX']).glob('lib/libabsl_*.so')
+ absl_libs = tuple(lib.stem[3:] for lib in absl_glob)
+ EXTENSION_LIBRARIES += ["sentencepiece", "protobuf"]
+
+ EXTENSION_LIBRARIES += absl_libs
+
ext_modules = [
Extension(
"curated_tokenizers._spp",
- ["curated_tokenizers/_spp.pyx"]
- + ABSL_SRC
- + PROTOBUF_LIGHT_SRC
- + SENTENCEPIECE_SRC
- + SENTENCEPIECE_PROTOBUF_SRC,
+ ["curated_tokenizers/_spp.pyx"],
include_dirs=[
"curated_tokenizers",
- "sentencepiece",
- "sentencepiece/src",
- "sentencepiece/src/builtin_pb",
- "sentencepiece/third_party/protobuf-lite",
+ str(library_path / "include")
],
+ libraries=EXTENSION_LIBRARIES,
+ library_dirs=[str(library_path)],
language="c++",
),
Extension(
46 changes: 46 additions & 0 deletions recipes/curated-transformers/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{% set version = "0.1.1" %}
{% set python_min = python_min|default("3.9") %}

package:
name: curated-transformers
version: {{ version }}

source:
url: https://pypi.org/packages/source/c/curated-transformers/curated-transformers-{{ version }}.tar.gz
sha256: 4671f03314df30efda2ec2b59bc7692ea34fcea44cb65382342c16684e8a2119

build:
noarch: python
script: python -m pip install . -vv
number: 0

requirements:
host:
- python {{ python_min }}.*
- pip
- setuptools
run:
- python >={{ python_min }}
- pytorch

test:
requires:
- python {{ python_min }}
- pip
imports:
- curated_transformers
commands:
- pip check

about:
home: https://github.com/explosion/curated-transformers
summary: '🤖 A PyTorch library of curated Transformer models and their composable components'
license: MIT
license_family: MIT
license_file: LICENSE
dev_url: https://github.com/explosion/curated-transformers

extra:
recipe-maintainers:
- h-vetinari
- conda-forge/spacy
51 changes: 51 additions & 0 deletions recipes/spacy-curated-transformers/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{% set version = "0.3.0" %}
{% set python_min = python_min|default("3.9") %}

package:
name: spacy-curated-transformers
version: {{ version }}

source:
url: https://pypi.org/packages/source/s/spacy_curated_transformers/spacy_curated_transformers-{{ version }}.tar.gz
sha256: 989a6bf2aa7becd1ac8c3be5f245cd489223d4e16e7218f6b69479c7e2689937

build:
noarch: python
script: python -m pip install . -vv
number: 0

requirements:
host:
- python {{ python_min }}.*
- pip
- setuptools
run:
- python >={{ python_min }}
- curated-tokenizers >=0.0.9,<0.1.0
- curated-transformers >=0.1.0,<0.2.0
- pytorch >=1.12.0
- spacy >=3.7.0,<4.0.0
- srsly
- thinc >=8.1.6,<9.1.0

test:
requires:
- python {{ python_min }}
- pip
imports:
- spacy_curated_transformers
commands:
- pip check

about:
home: https://github.com/explosion/spacy-curated-transformers
summary: 'spaCy entry points for Curated Transformers'
license: MIT
license_family: MIT
license_file: LICENSE
dev_url: https://github.com/explosion/spacy-curated-transformers

extra:
recipe-maintainers:
- h-vetinari
- conda-forge/spacy
16 changes: 16 additions & 0 deletions recipes/spacy-pkuseg/conda_build_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# we only want to override numpy, but need to override the whole zip
numpy:
- "2.0"
- "2.0"
- "2.0"
- "2.0"
python:
- 3.9.* *_cpython
- 3.10.* *_cpython
- 3.11.* *_cpython
- 3.12.* *_cpython
python_impl:
- cpython
- cpython
- cpython
- cpython
53 changes: 53 additions & 0 deletions recipes/spacy-pkuseg/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{% set version = "1.0.0" %}

package:
name: spacy-pkuseg
version: {{ version }}

source:
url: https://pypi.org/packages/source/s/spacy-pkuseg/spacy_pkuseg-{{ version }}.tar.gz
sha256: 33531ea8e13fc09ebe3b40bd97e84d07ccd5a1fe67fa8e84173769a25ac03158

build:
script: python -m pip install . -vv
number: 0

requirements:
build:
- {{ stdlib("c") }}
- {{ compiler("c") }}
- {{ compiler("cxx") }}
host:
- python
- pip
- setuptools
- cython
- numpy
run:
- python
- srsly >=2.3.0,<3.0.0

test:
requires:
- pip
imports:
- spacy_pkuseg
commands:
- pip check

about:
home: https://github.com/explosion/spacy-pkuseg
summary: 'Chinese word segmentation toolkit for spaCy'
description: |
This package is a fork of [pkuseg-python](https://github.com/lancopku/pkuseg-python)
that simplifies installation and serialization for use with spaCy.
The underlying segmentation tools remain unmodified.
license: MIT
license_family: MIT
license_file: LICENSE
dev_url: https://github.com/explosion/spacy-pkuseg

extra:
recipe-maintainers:
- h-vetinari
- conda-forge/spacy
Loading