diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2018-12-22 11:18:40 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-12-22 11:18:40 +0200 |
commit | 8ac658114dec4964479baecfbc439fceb40eaa79 (patch) | |
tree | e66c4c3beda293a6fdf01763306697d15d0af157 /Lib/tokenize.py | |
parent | bpo-22703: IDLE: Improve Code Context and Zoom Height menu labels (GH-11214) (diff) | |
download | cpython-8ac658114dec4964479baecfbc439fceb40eaa79.tar.gz cpython-8ac658114dec4964479baecfbc439fceb40eaa79.tar.bz2 cpython-8ac658114dec4964479baecfbc439fceb40eaa79.zip |
bpo-30455: Generate all token related code and docs from Grammar/Tokens. (GH-10370)
"Include/token.h", "Lib/token.py" (containing now some data moved from
"Lib/tokenize.py") and new files "Parser/token.c" (containing the code
moved from "Parser/tokenizer.c") and "Doc/library/token-list.inc" (included
in "Doc/library/token.rst") are now generated from "Grammar/Tokens" by
"Tools/scripts/generate_token.py". The script overwrites files only if
needed and can be used on the read-only sources tree.
"Lib/symbol.py" is now generated by "Tools/scripts/generate_symbol_py.py"
instead of been executable itself.
Added new make targets "regen-token" and "regen-symbol" which are now
dependencies of "regen-all".
The documentation contains now strings for operators and punctuation tokens.
Diffstat (limited to 'Lib/tokenize.py')
-rw-r--r-- | Lib/tokenize.py | 66 |
1 files changed, 6 insertions, 60 deletions
diff --git a/Lib/tokenize.py b/Lib/tokenize.py index fce010bc5e7..cf1ecc99a94 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -32,6 +32,7 @@ import itertools as _itertools import re import sys from token import * +from token import EXACT_TOKEN_TYPES cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) @@ -41,55 +42,6 @@ __all__ = token.__all__ + ["tokenize", "generate_tokens", "detect_encoding", "untokenize", "TokenInfo"] del token -EXACT_TOKEN_TYPES = { - '(': LPAR, - ')': RPAR, - '[': LSQB, - ']': RSQB, - ':': COLON, - ',': COMMA, - ';': SEMI, - '+': PLUS, - '-': MINUS, - '*': STAR, - '/': SLASH, - '|': VBAR, - '&': AMPER, - '<': LESS, - '>': GREATER, - '=': EQUAL, - '.': DOT, - '%': PERCENT, - '{': LBRACE, - '}': RBRACE, - '==': EQEQUAL, - '!=': NOTEQUAL, - '<=': LESSEQUAL, - '>=': GREATEREQUAL, - '~': TILDE, - '^': CIRCUMFLEX, - '<<': LEFTSHIFT, - '>>': RIGHTSHIFT, - '**': DOUBLESTAR, - '+=': PLUSEQUAL, - '-=': MINEQUAL, - '*=': STAREQUAL, - '/=': SLASHEQUAL, - '%=': PERCENTEQUAL, - '&=': AMPEREQUAL, - '|=': VBAREQUAL, - '^=': CIRCUMFLEXEQUAL, - '<<=': LEFTSHIFTEQUAL, - '>>=': RIGHTSHIFTEQUAL, - '**=': DOUBLESTAREQUAL, - '//': DOUBLESLASH, - '//=': DOUBLESLASHEQUAL, - '...': ELLIPSIS, - '->': RARROW, - '@': AT, - '@=': ATEQUAL, -} - class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')): def __repr__(self): annotated_type = '%d (%s)' % (self.type, tok_name[self.type]) @@ -163,17 +115,11 @@ Triple = group(StringPrefix + "'''", StringPrefix + '"""') String = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'", StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"') -# Because of leftmost-then-longest match semantics, be sure to put the -# longest operators first (e.g., if = came before ==, == would get -# recognized as two instances of =). -Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=", - r"//=?", r"->", - r"[+\-*/%&@|^=<>]=?", - r"~") - -Bracket = '[][(){}]' -Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]') -Funny = group(Operator, Bracket, Special) +# Sorting in reverse order puts the long operators before their prefixes. +# Otherwise if = came before ==, == would get recognized as two instances +# of =. +Special = group(*map(re.escape, sorted(EXACT_TOKEN_TYPES, reverse=True))) +Funny = group(r'\r?\n', Special) PlainToken = group(Number, Funny, String, Name) Token = Ignore + PlainToken |