diff options
-rw-r--r-- | dev-python/cchardet/cchardet-2.1.7-r1.ebuild | 30 | ||||
-rw-r--r-- | dev-python/cchardet/files/cchardet-2.1.7-pytest.patch | 120 |
2 files changed, 150 insertions, 0 deletions
diff --git a/dev-python/cchardet/cchardet-2.1.7-r1.ebuild b/dev-python/cchardet/cchardet-2.1.7-r1.ebuild new file mode 100644 index 000000000000..4f344e9f6e57 --- /dev/null +++ b/dev-python/cchardet/cchardet-2.1.7-r1.ebuild @@ -0,0 +1,30 @@ +# Copyright 2021-2022 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=8 + +DISTUTILS_USE_PEP517=setuptools +PYTHON_COMPAT=( python3_{8..11} ) +inherit distutils-r1 + +DESCRIPTION="High speed universal character encoding detector" +HOMEPAGE=" + https://github.com/PyYoshi/cChardet + https://pypi.org/project/cchardet/ +" +SRC_URI="mirror://pypi/${PN:0:1}/${PN}/${P}.tar.gz" + +LICENSE="MPL-1.1" +SLOT="0" +KEYWORDS="~amd64 ~arm ~x86" + +BDEPEND=" + dev-python/cython[${PYTHON_USEDEP}] +" + +PATCHES=( + # https://github.com/PyYoshi/cChardet/pull/78 + "${FILESDIR}/${P}-pytest.patch" +) + +distutils_enable_tests pytest diff --git a/dev-python/cchardet/files/cchardet-2.1.7-pytest.patch b/dev-python/cchardet/files/cchardet-2.1.7-pytest.patch new file mode 100644 index 000000000000..11f38579c184 --- /dev/null +++ b/dev-python/cchardet/files/cchardet-2.1.7-pytest.patch @@ -0,0 +1,120 @@ +https://github.com/PyYoshi/cChardet/pull/78 + +From: q0w <43147888+q0w@users.noreply.github.com> +Date: Wed, 17 Nov 2021 14:50:41 +0300 +Subject: [PATCH 02/13] Use pytest + +--- /dev/null ++++ b/src/tests/cchardet_test.py +@@ -0,0 +1,111 @@ ++import glob ++import os ++ ++import cchardet ++ ++SKIP_LIST = [ ++ 'src/tests/testdata/ja/utf-16le.txt', ++ 'src/tests/testdata/ja/utf-16be.txt', ++ 'src/tests/testdata/es/iso-8859-15.txt', ++ 'src/tests/testdata/da/iso-8859-1.txt', ++ 'src/tests/testdata/he/iso-8859-8.txt' ++] ++ ++# Python can't decode encoding ++SKIP_LIST_02 = [ ++ 'src/tests/testdata/vi/viscii.txt', ++ 'src/tests/testdata/zh/euc-tw.txt' ++] ++SKIP_LIST_02.extend(SKIP_LIST) ++ ++ ++def test_ascii(): ++ detected_encoding = cchardet.detect(b'abcdefghijklmnopqrstuvwxyz') ++ assert 'ascii' == detected_encoding['encoding'].lower() ++ ++ ++def test_detect(): ++ testfiles = glob.glob('src/tests/testdata/*/*.txt') ++ for testfile in testfiles: ++ if testfile.replace("\\", "/") in SKIP_LIST: ++ continue ++ ++ base = os.path.basename(testfile) ++ expected_charset = os.path.splitext(base)[0] ++ with open(testfile, 'rb') as f: ++ msg = f.read() ++ detected_encoding = cchardet.detect(msg) ++ assert expected_charset.lower() == detected_encoding['encoding'].lower() ++ ++ ++def test_detector(): ++ detector = cchardet.UniversalDetector() ++ with open("src/tests/samples/wikipediaJa_One_Thousand_and_One_Nights_SJIS.txt", 'rb') as f: ++ line = f.readline() ++ while line: ++ detector.feed(line) ++ if detector.done: ++ break ++ line = f.readline() ++ detector.close() ++ detected_encoding = detector.result ++ assert "shift_jis" == detected_encoding['encoding'].lower() ++ ++ ++def test_github_issue_20(): ++ """ ++ https://github.com/PyYoshi/cChardet/issues/20 ++ """ ++ msg = b'\x8f' ++ ++ cchardet.detect(msg) ++ ++ detector = cchardet.UniversalDetector() ++ detector.feed(msg) ++ detector.close() ++ ++ ++def test_decode(): ++ testfiles = glob.glob('src/tests/testdata/*/*.txt') ++ for testfile in testfiles: ++ if testfile.replace("\\", "/") in SKIP_LIST_02: ++ continue ++ ++ base = os.path.basename(testfile) ++ expected_charset = os.path.splitext(base)[0] ++ with open(testfile, 'rb') as f: ++ msg = f.read() ++ detected_encoding = cchardet.detect(msg) ++ try: ++ msg.decode(detected_encoding["encoding"]) ++ except LookupError as e: ++ print("LookupError: { file=%s, encoding=%s }" % ( ++ testfile, detected_encoding["encoding"])) ++ raise e ++ ++ ++def test_utf8_with_bom(): ++ sample = b'\xEF\xBB\xBF' ++ detected_encoding = cchardet.detect(sample) ++ assert "utf-8-sig" == detected_encoding['encoding'].lower() ++ ++ ++def test_null_bytes(): ++ sample = b'ABC\x00\x80\x81' ++ detected_encoding = cchardet.detect(sample) ++ ++ assert detected_encoding['encoding'] is None ++ ++# def test_iso8859_2_csv(self): ++# testfile = 'tests/samples/iso8859-2.csv' ++# with open(testfile, 'rb') as f: ++# msg = f.read() ++# detected_encoding = cchardet.detect(msg) ++# eq_( ++# "iso8859-2", ++# detected_encoding['encoding'].lower(), ++# 'Expected %s, but got %s' % ( ++# "iso8859-2", ++# detected_encoding['encoding'].lower() ++# ) ++# ) |