summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sci-libs/tokenizers/files/tokenizers-0.15.2-test.patch39
-rw-r--r--sci-libs/tokenizers/tokenizers-0.15.2-r1.ebuild (renamed from sci-libs/tokenizers/tokenizers-0.15.2.ebuild)15
2 files changed, 49 insertions, 5 deletions
diff --git a/sci-libs/tokenizers/files/tokenizers-0.15.2-test.patch b/sci-libs/tokenizers/files/tokenizers-0.15.2-test.patch
new file mode 100644
index 000000000000..01a872cb846a
--- /dev/null
+++ b/sci-libs/tokenizers/files/tokenizers-0.15.2-test.patch
@@ -0,0 +1,39 @@
+--- a/tests/bindings/test_trainers.py 2024-04-07 18:21:19.443506351 +0200
++++ b/tests/bindings/test_trainers.py 2024-04-07 18:21:54.893466083 +0200
+@@ -295,8 +295,8 @@
+ tokenizer.pre_tokenizer = pre_tokenizers.Sequence(
+ [pre_tokenizers.Whitespace(), pre_tokenizers.Digits(individual_digits=True)]
+ )
+- tokenizer.train(files=["data/big.txt"], trainer=trainer)
++ tokenizer.train(files=["tests/data/big.txt"], trainer=trainer)
+
+- tokenizer.save("data/tokenizer.json")
++ tokenizer.save("tests/data/tokenizer.json")
+
+- tokenizer.from_file("data/tokenizer.json")
++ tokenizer.from_file("tests/data/tokenizer.json")
+--- a/tests/documentation/test_tutorial_train_from_iterators.py 2024-04-07 18:19:08.653593406 +0200
++++ b/tests/documentation/test_tutorial_train_from_iterators.py 2024-04-07 18:19:39.206906910 +0200
+@@ -40,7 +40,7 @@
+ def setup_gzip_files(self, train_files):
+ with open(train_files["small"], "rt") as small:
+ for n in range(3):
+- path = f"data/my-file.{n}.gz"
++ path = f"tests/data/my-file.{n}.gz"
+ with gzip.open(path, "wt") as f:
+ f.write(small.read())
+
+@@ -87,11 +87,11 @@
+ # START single_gzip
+ import gzip
+
+- with gzip.open("data/my-file.0.gz", "rt") as f:
++ with gzip.open("tests/data/my-file.0.gz", "rt") as f:
+ tokenizer.train_from_iterator(f, trainer=trainer)
+ # END single_gzip
+ # START multi_gzip
+- files = ["data/my-file.0.gz", "data/my-file.1.gz", "data/my-file.2.gz"]
++ files = ["tests/data/my-file.0.gz", "tests/data/my-file.1.gz", "tests/data/my-file.2.gz"]
+
+ def gzip_iterator():
+ for path in files:
diff --git a/sci-libs/tokenizers/tokenizers-0.15.2.ebuild b/sci-libs/tokenizers/tokenizers-0.15.2-r1.ebuild
index d2da8b88ac9b..ed6b224ac702 100644
--- a/sci-libs/tokenizers/tokenizers-0.15.2.ebuild
+++ b/sci-libs/tokenizers/tokenizers-0.15.2-r1.ebuild
@@ -6,8 +6,9 @@
EAPI=8
DISTUTILS_USE_PEP517=maturin
-PYTHON_COMPAT=( python3_{9..12} )
+PYTHON_COMPAT=( python3_{10..12} )
DISTUTILS_EXT=1
+DISTUTILS_SINGLE_IMPL=1
CRATES="
adler@1.0.2
@@ -290,9 +291,13 @@ LICENSE+="
"
SLOT="0"
KEYWORDS="~amd64"
-RESTRICT="test"
-BDEPEND="dev-python/setuptools-rust[${PYTHON_USEDEP}]"
+BDEPEND="
+ test? ( sci-libs/datasets[${PYTHON_SINGLE_USEDEP}] )
+ $(python_gen_cond_dep '
+ dev-python/setuptools-rust[${PYTHON_USEDEP}]
+ ')
+"
distutils_enable_tests pytest
@@ -305,6 +310,7 @@ src_unpack() {
src_prepare() {
default
cd bindings/python
+ eapply "${FILESDIR}"/${P}-test.patch
distutils-r1_src_prepare
}
@@ -327,8 +333,7 @@ src_test() {
# Tests do not work
#cargo_src_test
cd ../bindings/python
- # Need dataset module
- #distutils-r1_src_test
+ distutils-r1_src_test
}
src_install() {