aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2013-10-27 08:07:46 +0200
committerSerhiy Storchaka <storchaka@gmail.com>2013-10-27 08:07:46 +0200
commit1985f7b133d2ff1f695354c50a09a7c859a1d5a4 (patch)
treeb4b22575877c830ff8aba95d0875e9986e6cdb40 /Lib/sre_compile.py
parentIssue Issue #15663: merge from 3.3 (diff)
parentIssue #19405: Fixed outdated comments in the _sre module. (diff)
downloadcpython-1985f7b133d2ff1f695354c50a09a7c859a1d5a4.tar.gz
cpython-1985f7b133d2ff1f695354c50a09a7c859a1d5a4.tar.bz2
cpython-1985f7b133d2ff1f695354c50a09a7c859a1d5a4.zip
Issue #19405: Fixed outdated comments in the _sre module.
Diffstat (limited to 'Lib/sre_compile.py')
-rw-r--r--Lib/sre_compile.py10
1 files changed, 5 insertions, 5 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index e194aaace76..691659daf42 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -270,10 +270,10 @@ def _mk_bitmap(bits):
# set is constructed. Then, this bitmap is sliced into chunks of 256
# characters, duplicate chunks are eliminated, and each chunk is
# given a number. In the compiled expression, the charset is
-# represented by a 16-bit word sequence, consisting of one word for
-# the number of different chunks, a sequence of 256 bytes (128 words)
+# represented by a 32-bit word sequence, consisting of one word for
+# the number of different chunks, a sequence of 256 bytes (64 words)
# of chunk numbers indexed by their original chunk position, and a
-# sequence of chunks (16 words each).
+# sequence of 256-bit chunks (8 words each).
# Compression is normally good: in a typical charset, large ranges of
# Unicode will be either completely excluded (e.g. if only cyrillic
@@ -286,9 +286,9 @@ def _mk_bitmap(bits):
# less significant byte is a bit index in the chunk (just like the
# CHARSET matching).
-# In UCS-4 mode, the BIGCHARSET opcode still supports only subsets
+# The BIGCHARSET opcode still supports only subsets
# of the basic multilingual plane; an efficient representation
-# for all of UTF-16 has not yet been developed. This means,
+# for all of Unicode has not yet been developed. This means,
# in particular, that negated charsets cannot be represented as
# bigcharsets.