diff options
-rw-r--r-- | sci-libs/tokenizers/files/tokenizers-0.15.2-test.patch | 39 | ||||
-rw-r--r-- | sci-libs/tokenizers/tokenizers-0.15.2-r1.ebuild (renamed from sci-libs/tokenizers/tokenizers-0.15.2.ebuild) | 15 |
2 files changed, 49 insertions, 5 deletions
diff --git a/sci-libs/tokenizers/files/tokenizers-0.15.2-test.patch b/sci-libs/tokenizers/files/tokenizers-0.15.2-test.patch new file mode 100644 index 000000000000..01a872cb846a --- /dev/null +++ b/sci-libs/tokenizers/files/tokenizers-0.15.2-test.patch @@ -0,0 +1,39 @@ +--- a/tests/bindings/test_trainers.py 2024-04-07 18:21:19.443506351 +0200 ++++ b/tests/bindings/test_trainers.py 2024-04-07 18:21:54.893466083 +0200 +@@ -295,8 +295,8 @@ + tokenizer.pre_tokenizer = pre_tokenizers.Sequence( + [pre_tokenizers.Whitespace(), pre_tokenizers.Digits(individual_digits=True)] + ) +- tokenizer.train(files=["data/big.txt"], trainer=trainer) ++ tokenizer.train(files=["tests/data/big.txt"], trainer=trainer) + +- tokenizer.save("data/tokenizer.json") ++ tokenizer.save("tests/data/tokenizer.json") + +- tokenizer.from_file("data/tokenizer.json") ++ tokenizer.from_file("tests/data/tokenizer.json") +--- a/tests/documentation/test_tutorial_train_from_iterators.py 2024-04-07 18:19:08.653593406 +0200 ++++ b/tests/documentation/test_tutorial_train_from_iterators.py 2024-04-07 18:19:39.206906910 +0200 +@@ -40,7 +40,7 @@ + def setup_gzip_files(self, train_files): + with open(train_files["small"], "rt") as small: + for n in range(3): +- path = f"data/my-file.{n}.gz" ++ path = f"tests/data/my-file.{n}.gz" + with gzip.open(path, "wt") as f: + f.write(small.read()) + +@@ -87,11 +87,11 @@ + # START single_gzip + import gzip + +- with gzip.open("data/my-file.0.gz", "rt") as f: ++ with gzip.open("tests/data/my-file.0.gz", "rt") as f: + tokenizer.train_from_iterator(f, trainer=trainer) + # END single_gzip + # START multi_gzip +- files = ["data/my-file.0.gz", "data/my-file.1.gz", "data/my-file.2.gz"] ++ files = ["tests/data/my-file.0.gz", "tests/data/my-file.1.gz", "tests/data/my-file.2.gz"] + + def gzip_iterator(): + for path in files: diff --git a/sci-libs/tokenizers/tokenizers-0.15.2.ebuild b/sci-libs/tokenizers/tokenizers-0.15.2-r1.ebuild index d2da8b88ac9b..ed6b224ac702 100644 --- a/sci-libs/tokenizers/tokenizers-0.15.2.ebuild +++ b/sci-libs/tokenizers/tokenizers-0.15.2-r1.ebuild @@ -6,8 +6,9 @@ EAPI=8 DISTUTILS_USE_PEP517=maturin -PYTHON_COMPAT=( python3_{9..12} ) +PYTHON_COMPAT=( python3_{10..12} ) DISTUTILS_EXT=1 +DISTUTILS_SINGLE_IMPL=1 CRATES=" adler@1.0.2 @@ -290,9 +291,13 @@ LICENSE+=" " SLOT="0" KEYWORDS="~amd64" -RESTRICT="test" -BDEPEND="dev-python/setuptools-rust[${PYTHON_USEDEP}]" +BDEPEND=" + test? ( sci-libs/datasets[${PYTHON_SINGLE_USEDEP}] ) + $(python_gen_cond_dep ' + dev-python/setuptools-rust[${PYTHON_USEDEP}] + ') +" distutils_enable_tests pytest @@ -305,6 +310,7 @@ src_unpack() { src_prepare() { default cd bindings/python + eapply "${FILESDIR}"/${P}-test.patch distutils-r1_src_prepare } @@ -327,8 +333,7 @@ src_test() { # Tests do not work #cargo_src_test cd ../bindings/python - # Need dataset module - #distutils-r1_src_test + distutils-r1_src_test } src_install() { |