aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2014-01-09 18:41:59 +0200
committerSerhiy Storchaka <storchaka@gmail.com>2014-01-09 18:41:59 +0200
commit7282ff6d5b56825e74c0715aea86e927d2fd339f (patch)
tree5bdc07d3601764c5cecdb78c276151f4ba03eef6 /Lib/tokenize.py
parentmerge 3.3 (diff)
parentIssue #18960: Fix bugs with Python source code encoding in the second line. (diff)
downloadcpython-7282ff6d5b56825e74c0715aea86e927d2fd339f.tar.gz
cpython-7282ff6d5b56825e74c0715aea86e927d2fd339f.tar.bz2
cpython-7282ff6d5b56825e74c0715aea86e927d2fd339f.zip
Issue #18960: Fix bugs with Python source code encoding in the second line.
* The first line of Python script could be executed twice when the source encoding (not equal to 'utf-8') was specified on the second line. * Now the source encoding declaration on the second line isn't effective if the first line contains anything except a comment. * As a consequence, 'python -x' works now again with files with the source encoding declarations specified on the second file, and can be used again to make Python batch files on Windows. * The tokenize module now ignore the source encoding declaration on the second line if the first line contains anything except a comment. * IDLE now ignores the source encoding declaration on the second line if the first line contains anything except a comment. * 2to3 and the findnocoding.py script now ignore the source encoding declaration on the second line if the first line contains anything except a comment.
Diffstat (limited to 'Lib/tokenize.py')
-rw-r--r--Lib/tokenize.py3
1 files changed, 3 insertions, 0 deletions
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index f614aeb164f..7785c98f8b4 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -32,6 +32,7 @@ from codecs import lookup, BOM_UTF8
import collections
from io import TextIOWrapper
cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
+blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
import token
__all__ = token.__all__ + ["COMMENT", "tokenize", "detect_encoding",
@@ -409,6 +410,8 @@ def detect_encoding(readline):
encoding = find_cookie(first)
if encoding:
return encoding, [first]
+ if not blank_re.match(first):
+ return default, [first]
second = read_or_stop()
if not second: