aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2013-11-23 18:52:23 +0200
committerSerhiy Storchaka <storchaka@gmail.com>2013-11-23 18:52:23 +0200
commitbe0c3250b1bbc16eea41dfe0f93f33ad384e9bd4 (patch)
tree75fae9326c61971241ea46ee4dc0de6b68f31897 /Lib/encodings
parentIssue #19308: fix the gdb plugin on gdbs linked with Python 3 (diff)
downloadcpython-be0c3250b1bbc16eea41dfe0f93f33ad384e9bd4.tar.gz
cpython-be0c3250b1bbc16eea41dfe0f93f33ad384e9bd4.tar.bz2
cpython-be0c3250b1bbc16eea41dfe0f93f33ad384e9bd4.zip
Issue #19668: Added support for the cp1125 encoding.
Diffstat (limited to 'Lib/encodings')
-rw-r--r--Lib/encodings/aliases.py6
-rw-r--r--Lib/encodings/cp866.py52
2 files changed, 32 insertions, 26 deletions
diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py
index 331095b1f16..5461aa053a4 100644
--- a/Lib/encodings/aliases.py
+++ b/Lib/encodings/aliases.py
@@ -63,6 +63,12 @@ aliases = {
'csibm1026' : 'cp1026',
'ibm1026' : 'cp1026',
+ # cp1125 codec
+ '1125' : 'cp1125',
+ 'ibm1125' : 'cp1125',
+ 'cp866u' : 'cp1125',
+ 'ruscii' : 'cp1125',
+
# cp1140 codec
'1140' : 'cp1140',
'ibm1140' : 'cp1140',
diff --git a/Lib/encodings/cp866.py b/Lib/encodings/cp866.py
index bec7ae39fba..b1fd69deb07 100644
--- a/Lib/encodings/cp866.py
+++ b/Lib/encodings/cp866.py
@@ -1,4 +1,4 @@
-""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP866.TXT' with gencodec.py.
+""" Python Character Mapping Codec for CP1125
"""#"
@@ -32,7 +32,7 @@ class StreamReader(Codec,codecs.StreamReader):
def getregentry():
return codecs.CodecInfo(
- name='cp866',
+ name='cp1125',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
@@ -159,14 +159,14 @@ decoding_map.update({
0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA
0x00f0: 0x0401, # CYRILLIC CAPITAL LETTER IO
0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO
- 0x00f2: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
- 0x00f3: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE
- 0x00f4: 0x0407, # CYRILLIC CAPITAL LETTER YI
- 0x00f5: 0x0457, # CYRILLIC SMALL LETTER YI
- 0x00f6: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U
- 0x00f7: 0x045e, # CYRILLIC SMALL LETTER SHORT U
- 0x00f8: 0x00b0, # DEGREE SIGN
- 0x00f9: 0x2219, # BULLET OPERATOR
+ 0x00f2: 0x0490, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+ 0x00f3: 0x0491, # CYRILLIC SMALL LETTER GHE WITH UPTURN
+ 0x00f4: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+ 0x00f5: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE
+ 0x00f6: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+ 0x00f7: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+ 0x00f8: 0x0407, # CYRILLIC CAPITAL LETTER YI
+ 0x00f9: 0x0457, # CYRILLIC SMALL LETTER YI
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x221a, # SQUARE ROOT
0x00fc: 0x2116, # NUMERO SIGN
@@ -420,14 +420,14 @@ decoding_table = (
'\u044f' # 0x00ef -> CYRILLIC SMALL LETTER YA
'\u0401' # 0x00f0 -> CYRILLIC CAPITAL LETTER IO
'\u0451' # 0x00f1 -> CYRILLIC SMALL LETTER IO
- '\u0404' # 0x00f2 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
- '\u0454' # 0x00f3 -> CYRILLIC SMALL LETTER UKRAINIAN IE
- '\u0407' # 0x00f4 -> CYRILLIC CAPITAL LETTER YI
- '\u0457' # 0x00f5 -> CYRILLIC SMALL LETTER YI
- '\u040e' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHORT U
- '\u045e' # 0x00f7 -> CYRILLIC SMALL LETTER SHORT U
- '\xb0' # 0x00f8 -> DEGREE SIGN
- '\u2219' # 0x00f9 -> BULLET OPERATOR
+ '\u0490' # 0x00f2 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+ '\u0491' # 0x00f3 -> CYRILLIC SMALL LETTER GHE WITH UPTURN
+ '\u0404' # 0x00f4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
+ '\u0454' # 0x00f5 -> CYRILLIC SMALL LETTER UKRAINIAN IE
+ '\u0406' # 0x00f6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+ '\u0456' # 0x00f7 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+ '\u0407' # 0x00f8 -> CYRILLIC CAPITAL LETTER YI
+ '\u0457' # 0x00f9 -> CYRILLIC SMALL LETTER YI
'\xb7' # 0x00fa -> MIDDLE DOT
'\u221a' # 0x00fb -> SQUARE ROOT
'\u2116' # 0x00fc -> NUMERO SIGN
@@ -569,12 +569,11 @@ encoding_map = {
0x007f: 0x007f, # DELETE
0x00a0: 0x00ff, # NO-BREAK SPACE
0x00a4: 0x00fd, # CURRENCY SIGN
- 0x00b0: 0x00f8, # DEGREE SIGN
0x00b7: 0x00fa, # MIDDLE DOT
0x0401: 0x00f0, # CYRILLIC CAPITAL LETTER IO
- 0x0404: 0x00f2, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
- 0x0407: 0x00f4, # CYRILLIC CAPITAL LETTER YI
- 0x040e: 0x00f6, # CYRILLIC CAPITAL LETTER SHORT U
+ 0x0404: 0x00f4, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+ 0x0406: 0x00f6, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+ 0x0407: 0x00f8, # CYRILLIC CAPITAL LETTER YI
0x0410: 0x0080, # CYRILLIC CAPITAL LETTER A
0x0411: 0x0081, # CYRILLIC CAPITAL LETTER BE
0x0412: 0x0082, # CYRILLIC CAPITAL LETTER VE
@@ -640,11 +639,12 @@ encoding_map = {
0x044e: 0x00ee, # CYRILLIC SMALL LETTER YU
0x044f: 0x00ef, # CYRILLIC SMALL LETTER YA
0x0451: 0x00f1, # CYRILLIC SMALL LETTER IO
- 0x0454: 0x00f3, # CYRILLIC SMALL LETTER UKRAINIAN IE
- 0x0457: 0x00f5, # CYRILLIC SMALL LETTER YI
- 0x045e: 0x00f7, # CYRILLIC SMALL LETTER SHORT U
+ 0x0454: 0x00f5, # CYRILLIC SMALL LETTER UKRAINIAN IE
+ 0x0456: 0x00f7, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+ 0x0457: 0x00f9, # CYRILLIC SMALL LETTER YI
+ 0x0490: 0x00f2, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+ 0x0491: 0x00f3, # CYRILLIC SMALL LETTER GHE WITH UPTURN
0x2116: 0x00fc, # NUMERO SIGN
- 0x2219: 0x00f9, # BULLET OPERATOR
0x221a: 0x00fb, # SQUARE ROOT
0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL