diff options
author | Brett Cannon <brett@python.org> | 2016-09-09 14:57:09 -0700 |
---|---|---|
committer | Brett Cannon <brett@python.org> | 2016-09-09 14:57:09 -0700 |
commit | a721abac299bb6529021000a71847486d531b41a (patch) | |
tree | 8355a69b891cfcdaad8a5fd62870231b7f940696 /Lib | |
parent | Merge heads (diff) | |
download | cpython-a721abac299bb6529021000a71847486d531b41a.tar.gz cpython-a721abac299bb6529021000a71847486d531b41a.tar.bz2 cpython-a721abac299bb6529021000a71847486d531b41a.zip |
Issue #26331: Implement the parsing part of PEP 515.
Thanks to Georg Brandl for the patch.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/_pydecimal.py | 10 | ||||
-rw-r--r-- | Lib/test/test_complex.py | 14 | ||||
-rw-r--r-- | Lib/test/test_decimal.py | 10 | ||||
-rw-r--r-- | Lib/test/test_float.py | 24 | ||||
-rw-r--r-- | Lib/test/test_grammar.py | 89 | ||||
-rw-r--r-- | Lib/test/test_int.py | 21 | ||||
-rw-r--r-- | Lib/test/test_tokenize.py | 30 | ||||
-rw-r--r-- | Lib/test/test_types.py | 1 | ||||
-rw-r--r-- | Lib/tokenize.py | 17 |
9 files changed, 195 insertions, 21 deletions
diff --git a/Lib/_pydecimal.py b/Lib/_pydecimal.py index 21e875c31c4..6318a49ce70 100644 --- a/Lib/_pydecimal.py +++ b/Lib/_pydecimal.py @@ -589,7 +589,7 @@ class Decimal(object): # From a string # REs insist on real strings, so we can too. if isinstance(value, str): - m = _parser(value.strip()) + m = _parser(value.strip().replace("_", "")) if m is None: if context is None: context = getcontext() @@ -4125,7 +4125,7 @@ class Context(object): This will make it round up for that operation. """ rounding = self.rounding - self.rounding= type + self.rounding = type return rounding def create_decimal(self, num='0'): @@ -4134,10 +4134,10 @@ class Context(object): This method implements the to-number operation of the IBM Decimal specification.""" - if isinstance(num, str) and num != num.strip(): + if isinstance(num, str) and (num != num.strip() or '_' in num): return self._raise_error(ConversionSyntax, - "no trailing or leading whitespace is " - "permitted.") + "trailing or leading whitespace and " + "underscores are not permitted.") d = Decimal(num, context=self) if d._isnan() and len(d._int) > self.prec - self.clamp: diff --git a/Lib/test/test_complex.py b/Lib/test/test_complex.py index 0ef9a7a1098..6633a7ae54b 100644 --- a/Lib/test/test_complex.py +++ b/Lib/test/test_complex.py @@ -1,5 +1,7 @@ import unittest from test import support +from test.test_grammar import (VALID_UNDERSCORE_LITERALS, + INVALID_UNDERSCORE_LITERALS) from random import random from math import atan2, isnan, copysign @@ -377,6 +379,18 @@ class ComplexTest(unittest.TestCase): self.assertAlmostEqual(complex(complex1(1j)), 2j) self.assertRaises(TypeError, complex, complex2(1j)) + def test_underscores(self): + # check underscores + for lit in VALID_UNDERSCORE_LITERALS: + if not any(ch in lit for ch in 'xXoObB'): + self.assertEqual(complex(lit), eval(lit)) + self.assertEqual(complex(lit), complex(lit.replace('_', ''))) + for lit in INVALID_UNDERSCORE_LITERALS: + if lit in ('0_7', '09_99'): # octals are not recognized here + continue + if not any(ch in lit for ch in 'xXoObB'): + self.assertRaises(ValueError, complex, lit) + def test_hash(self): for x in range(-30, 30): self.assertEqual(hash(x), hash(complex(x, 0))) diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py index 7492f5466f0..617a37eec82 100644 --- a/Lib/test/test_decimal.py +++ b/Lib/test/test_decimal.py @@ -554,6 +554,10 @@ class ExplicitConstructionTest(unittest.TestCase): self.assertEqual(str(Decimal(' -7.89')), '-7.89') self.assertEqual(str(Decimal(" 3.45679 ")), '3.45679') + # underscores + self.assertEqual(str(Decimal('1_3.3e4_0')), '1.33E+41') + self.assertEqual(str(Decimal('1_0_0_0')), '1000') + # unicode whitespace for lead in ["", ' ', '\u00a0', '\u205f']: for trail in ["", ' ', '\u00a0', '\u205f']: @@ -578,6 +582,9 @@ class ExplicitConstructionTest(unittest.TestCase): # embedded NUL self.assertRaises(InvalidOperation, Decimal, "12\u00003") + # underscores don't prevent errors + self.assertRaises(InvalidOperation, Decimal, "1_2_\u00003") + @cpython_only def test_from_legacy_strings(self): import _testcapi @@ -772,6 +779,9 @@ class ExplicitConstructionTest(unittest.TestCase): self.assertRaises(InvalidOperation, nc.create_decimal, "xyz") self.assertRaises(ValueError, nc.create_decimal, (1, "xyz", -25)) self.assertRaises(TypeError, nc.create_decimal, "1234", "5678") + # no whitespace and underscore stripping is done with this method + self.assertRaises(InvalidOperation, nc.create_decimal, " 1234") + self.assertRaises(InvalidOperation, nc.create_decimal, "12_34") # too many NaN payload digits nc.prec = 3 diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index 68b212e1959..ac8473db503 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -1,4 +1,3 @@ - import fractions import operator import os @@ -9,6 +8,8 @@ import time import unittest from test import support +from test.test_grammar import (VALID_UNDERSCORE_LITERALS, + INVALID_UNDERSCORE_LITERALS) from math import isinf, isnan, copysign, ldexp INF = float("inf") @@ -60,6 +61,27 @@ class GeneralFloatCases(unittest.TestCase): float(b'.' + b'1'*1000) float('.' + '1'*1000) + def test_underscores(self): + for lit in VALID_UNDERSCORE_LITERALS: + if not any(ch in lit for ch in 'jJxXoObB'): + self.assertEqual(float(lit), eval(lit)) + self.assertEqual(float(lit), float(lit.replace('_', ''))) + for lit in INVALID_UNDERSCORE_LITERALS: + if lit in ('0_7', '09_99'): # octals are not recognized here + continue + if not any(ch in lit for ch in 'jJxXoObB'): + self.assertRaises(ValueError, float, lit) + # Additional test cases; nan and inf are never valid as literals, + # only in the float() constructor, but we don't allow underscores + # in or around them. + self.assertRaises(ValueError, float, '_NaN') + self.assertRaises(ValueError, float, 'Na_N') + self.assertRaises(ValueError, float, 'IN_F') + self.assertRaises(ValueError, float, '-_INF') + self.assertRaises(ValueError, float, '-INF_') + # Check that we handle bytes values correctly. + self.assertRaises(ValueError, float, b'0_.\xff9') + def test_non_numeric_input_types(self): # Test possible non-numeric types for the argument x, including # subclasses of the explicitly documented accepted types. diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py index 109013f5e2f..914aa679441 100644 --- a/Lib/test/test_grammar.py +++ b/Lib/test/test_grammar.py @@ -16,6 +16,87 @@ from collections import ChainMap from test import ann_module2 import test +# These are shared with test_tokenize and other test modules. +# +# Note: since several test cases filter out floats by looking for "e" and ".", +# don't add hexadecimal literals that contain "e" or "E". +VALID_UNDERSCORE_LITERALS = [ + '0_0_0', + '4_2', + '1_0000_0000', + '0b1001_0100', + '0xffff_ffff', + '0o5_7_7', + '1_00_00.5', + '1_00_00.5e5', + '1_00_00e5_1', + '1e1_0', + '.1_4', + '.1_4e1', + '0b_0', + '0x_f', + '0o_5', + '1_00_00j', + '1_00_00.5j', + '1_00_00e5_1j', + '.1_4j', + '(1_2.5+3_3j)', + '(.5_6j)', +] +INVALID_UNDERSCORE_LITERALS = [ + # Trailing underscores: + '0_', + '42_', + '1.4j_', + '0x_', + '0b1_', + '0xf_', + '0o5_', + '0 if 1_Else 1', + # Underscores in the base selector: + '0_b0', + '0_xf', + '0_o5', + # Old-style octal, still disallowed: + '0_7', + '09_99', + # Multiple consecutive underscores: + '4_______2', + '0.1__4', + '0.1__4j', + '0b1001__0100', + '0xffff__ffff', + '0x___', + '0o5__77', + '1e1__0', + '1e1__0j', + # Underscore right before a dot: + '1_.4', + '1_.4j', + # Underscore right after a dot: + '1._4', + '1._4j', + '._5', + '._5j', + # Underscore right after a sign: + '1.0e+_1', + '1.0e+_1j', + # Underscore right before j: + '1.4_j', + '1.4e5_j', + # Underscore right before e: + '1_e1', + '1.4_e1', + '1.4_e1j', + # Underscore right after e: + '1e_1', + '1.4e_1', + '1.4e_1j', + # Complex cases with parens: + '(1+1.5_j_)', + '(1+1.5_j)', +] + class TokenTests(unittest.TestCase): @@ -95,6 +176,14 @@ class TokenTests(unittest.TestCase): self.assertEqual(1 if 0else 0, 0) self.assertRaises(SyntaxError, eval, "0 if 1Else 0") + def test_underscore_literals(self): + for lit in VALID_UNDERSCORE_LITERALS: + self.assertEqual(eval(lit), eval(lit.replace('_', ''))) + for lit in INVALID_UNDERSCORE_LITERALS: + self.assertRaises(SyntaxError, eval, lit) + # Sanity check: no literal begins with an underscore + self.assertRaises(NameError, eval, "_0") + def test_string_literals(self): x = ''; y = ""; self.assertTrue(len(x) == 0 and x == y) x = '\''; y = "'"; self.assertTrue(len(x) == 1 and x == y and ord(x) == 39) diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index 8847f4ce972..14bbd6192a0 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -2,6 +2,8 @@ import sys import unittest from test import support +from test.test_grammar import (VALID_UNDERSCORE_LITERALS, + INVALID_UNDERSCORE_LITERALS) L = [ ('0', 0), @@ -212,6 +214,25 @@ class IntTestCases(unittest.TestCase): self.assertEqual(int('2br45qc', 35), 4294967297) self.assertEqual(int('1z141z5', 36), 4294967297) + def test_underscores(self): + for lit in VALID_UNDERSCORE_LITERALS: + if any(ch in lit for ch in '.eEjJ'): + continue + self.assertEqual(int(lit, 0), eval(lit)) + self.assertEqual(int(lit, 0), int(lit.replace('_', ''), 0)) + for lit in INVALID_UNDERSCORE_LITERALS: + if any(ch in lit for ch in '.eEjJ'): + continue + self.assertRaises(ValueError, int, lit, 0) + # Additional test cases with bases != 0, only for the constructor: + self.assertEqual(int("1_00", 3), 9) + self.assertEqual(int("0_100"), 100) # not valid as a literal! + self.assertEqual(int(b"1_00"), 100) # byte underscore + self.assertRaises(ValueError, int, "_100") + self.assertRaises(ValueError, int, "+_100") + self.assertRaises(ValueError, int, "1__00") + self.assertRaises(ValueError, int, "100_") + @support.cpython_only def test_small_ints(self): # Bug #3236: Return small longs from PyLong_FromString diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 4c469a890f8..5a81a5f11a4 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -3,7 +3,9 @@ from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP, STRING, ENDMARKER, ENCODING, tok_name, detect_encoding, open as tokenize_open, Untokenizer) from io import BytesIO -from unittest import TestCase, mock, main +from unittest import TestCase, mock +from test.test_grammar import (VALID_UNDERSCORE_LITERALS, + INVALID_UNDERSCORE_LITERALS) import os import token @@ -185,6 +187,21 @@ def k(x): NUMBER '3.14e159' (1, 4) (1, 12) """) + def test_underscore_literals(self): + def number_token(s): + f = BytesIO(s.encode('utf-8')) + for toktype, token, start, end, line in tokenize(f.readline): + if toktype == NUMBER: + return token + return 'invalid token' + for lit in VALID_UNDERSCORE_LITERALS: + if '(' in lit: + # this won't work with compound complex inputs + continue + self.assertEqual(number_token(lit), lit) + for lit in INVALID_UNDERSCORE_LITERALS: + self.assertNotEqual(number_token(lit), lit) + def test_string(self): # String literals self.check_tokenize("x = ''; y = \"\"", """\ @@ -1529,11 +1546,10 @@ class TestRoundtrip(TestCase): tempdir = os.path.dirname(fn) or os.curdir testfiles = glob.glob(os.path.join(tempdir, "test*.py")) - # Tokenize is broken on test_unicode_identifiers.py because regular - # expressions are broken on the obscure unicode identifiers in it. - # *sigh* With roundtrip extended to test the 5-tuple mode of - # untokenize, 7 more testfiles fail. Remove them also until the - # failure is diagnosed. + # Tokenize is broken on test_pep3131.py because regular expressions are + # broken on the obscure unicode identifiers in it. *sigh* + # With roundtrip extended to test the 5-tuple mode of untokenize, + # 7 more testfiles fail. Remove them also until the failure is diagnosed. testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py")) for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'): @@ -1565,4 +1581,4 @@ class TestRoundtrip(TestCase): if __name__ == "__main__": - main() + unittest.main() diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index a202196bd2f..382ca03e5ad 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -48,6 +48,7 @@ class TypesTests(unittest.TestCase): def test_float_constructor(self): self.assertRaises(ValueError, float, '') self.assertRaises(ValueError, float, '5\0') + self.assertRaises(ValueError, float, '5_5\0') def test_zero_division(self): try: 5.0 / 0.0 diff --git a/Lib/tokenize.py b/Lib/tokenize.py index ec79ec886da..825aa906460 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -120,16 +120,17 @@ Comment = r'#[^\r\n]*' Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment) Name = r'\w+' -Hexnumber = r'0[xX][0-9a-fA-F]+' -Binnumber = r'0[bB][01]+' -Octnumber = r'0[oO][0-7]+' -Decnumber = r'(?:0+|[1-9][0-9]*)' +Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+' +Binnumber = r'0[bB](?:_?[01])+' +Octnumber = r'0[oO](?:_?[0-7])+' +Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)' Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) -Exponent = r'[eE][-+]?[0-9]+' -Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent) -Expfloat = r'[0-9]+' + Exponent +Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*' +Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?', + r'\.[0-9](?:_?[0-9])*') + maybe(Exponent) +Expfloat = r'[0-9](?:_?[0-9])*' + Exponent Floatnumber = group(Pointfloat, Expfloat) -Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]') +Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]') Number = group(Imagnumber, Floatnumber, Intnumber) # Return the empty string, plus all of the valid string prefixes. |