Tweak the GB18030 converter to map 0x8135F437 to U+E7C7.
diff --git a/ChangeLog b/ChangeLog
index a782e57..a98e0f9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2012-05-13 Bruno Haible <bruno@clisp.org>
+
+ Tweak the GB18030 converter to map 0x8135F437 to U+E7C7.
+ * lib/gb18030.h (gb18030_pua2charset, gb18030_wctomb): Remove mapping
+ from U+E7C7 to 0xA8BC.
+ * lib/gb18030uni.h (gb18030uni_mbtowc): Treat 0x8135F437 as a special
+ case.
+ (gb18030uni_wctomb): Treat U+E7C7 as a special case.
+ * tests/GB18030-BMP.TXT: Map 0x8135F437 to U+E7C7, not U+1E3F.
+ * tests/GB18030.IRREVERSIBLE.TXT: Remove irreversible mappings for
+ 0x8135F437 and U+E7C7.
+
2012-04-28 Bruno Haible <bruno@clisp.org>
Switch to autoconf 2.69 and automake 1.12.
diff --git a/lib/gb18030.h b/lib/gb18030.h
index a2301e3..1d75987 100644
--- a/lib/gb18030.h
+++ b/lib/gb18030.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 1999-2001, 2005 Free Software Foundation, Inc.
+ * Copyright (C) 1999-2001, 2005, 2012 Free Software Foundation, Inc.
* This file is part of the GNU LIBICONV Library.
*
* The GNU LIBICONV Library is free software; you can redistribute it
@@ -144,7 +144,7 @@
* p. 12 0xA7C2..0xA7D0 U+E7A0..U+E7AE
* p. 12 0xA7F2..0xA7FE U+E7AF..U+E7BB
* p. 82 0xA896..0xA8A0 U+E7BC..U+E7C6
- * p. 12 0xA8BC [glyphs here!!] U+E7C7
+ * p. 12 0x8135F437 U+E7C7
* p. 255 0x8336C830 U+E7C8
* p. 12 0xA8C1..0xA8C4 U+E7C9..U+E7CC
* p. 12 0xA8EA..0xA8FE U+E7CD..U+E7E1
@@ -266,7 +266,7 @@
}
}
-static const unsigned short gb18030_pua2charset[32*3] = {
+static const unsigned short gb18030_pua2charset[31*3] = {
/* Unicode range GB18030 range */
0xe766, 0xe76b, 0xa2ab, /*.. 0xa2b0, */
0xe76d, 0xe76d, 0xa2e4,
@@ -282,7 +282,6 @@
0xe7a0, 0xe7ae, 0xa7c2, /*.. 0xa7d0, */
0xe7af, 0xe7bb, 0xa7f2, /*.. 0xa7fe, */
0xe7bc, 0xe7c6, 0xa896, /*.. 0xa8a0, */
- 0xe7c7, 0xe7c7, 0xa8bc,
0xe7c9, 0xe7cc, 0xa8c1, /*.. 0xa8c4, */
0xe7cd, 0xe7e1, 0xa8ea, /*.. 0xa8fe, */
0xe7e2, 0xe7e2, 0xa958,
@@ -340,7 +339,7 @@
} else {
/* User-defined characters, two-byte part of range U+E766..U+E864 */
unsigned int k1 = 0;
- unsigned int k2 = 32;
+ unsigned int k2 = 31;
/* Invariant: We know that if wc occurs in Unicode interval in
gb18030_pua2charset, it does so at a k with k1 <= k < k2. */
while (k1 < k2) {
diff --git a/lib/gb18030uni.h b/lib/gb18030uni.h
index ffdf338..aa7515b 100644
--- a/lib/gb18030uni.h
+++ b/lib/gb18030uni.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 1999-2001, 2005 Free Software Foundation, Inc.
+ * Copyright (C) 1999-2001, 2005, 2012 Free Software Foundation, Inc.
* This file is part of the GNU LIBICONV Library.
*
* The GNU LIBICONV Library is free software; you can redistribute it
@@ -182,22 +182,26 @@
if (c4 >= 0x30 && c4 <= 0x39) {
unsigned int i = (((c1 - 0x81) * 10 + (c2 - 0x30)) * 126 + (c3 - 0x81)) * 10 + (c4 - 0x30);
if (i >= 0 && i <= 39419) {
- unsigned int k1 = 0;
- unsigned int k2 = 205;
- while (k1 < k2) {
- unsigned int k = (k1 + k2) / 2;
- if (i <= gb18030uni_charset2uni_ranges[2*k+1])
- k2 = k;
- else if (i >= gb18030uni_charset2uni_ranges[2*k+2])
- k1 = k + 1;
- else
- return RET_ILSEQ;
+ if (i == 7457) {
+ *pwc = 0xe7c7;
+ } else {
+ unsigned int k1 = 0;
+ unsigned int k2 = 205;
+ while (k1 < k2) {
+ unsigned int k = (k1 + k2) / 2;
+ if (i <= gb18030uni_charset2uni_ranges[2*k+1])
+ k2 = k;
+ else if (i >= gb18030uni_charset2uni_ranges[2*k+2])
+ k1 = k + 1;
+ else
+ return RET_ILSEQ;
+ }
+ {
+ unsigned int diff = gb18030uni_ranges[k1];
+ *pwc = (ucs4_t) (i + diff);
+ }
}
- {
- unsigned int diff = gb18030uni_ranges[k1];
- *pwc = (ucs4_t) (i + diff);
- return 4;
- }
+ return 4;
}
}
return RET_ILSEQ;
@@ -221,26 +225,30 @@
if (n >= 4) {
unsigned int i = wc;
if (i >= 0x0080 && i <= 0xffff) {
- unsigned int k1 = 0;
- unsigned int k2 = 205;
- while (k1 < k2) {
- unsigned int k = (k1 + k2) / 2;
- if (i <= gb18030uni_uni2charset_ranges[2*k+1])
- k2 = k;
- else if (i >= gb18030uni_uni2charset_ranges[2*k+2])
- k1 = k + 1;
- else
- return RET_ILUNI;
+ if (i == 0xe7c7) {
+ i = 7457;
+ } else {
+ unsigned int k1 = 0;
+ unsigned int k2 = 205;
+ while (k1 < k2) {
+ unsigned int k = (k1 + k2) / 2;
+ if (i <= gb18030uni_uni2charset_ranges[2*k+1])
+ k2 = k;
+ else if (i >= gb18030uni_uni2charset_ranges[2*k+2])
+ k1 = k + 1;
+ else
+ return RET_ILUNI;
+ }
+ {
+ unsigned int diff = gb18030uni_ranges[k1];
+ i -= diff;
+ }
}
- {
- unsigned int diff = gb18030uni_ranges[k1];
- i -= diff;
- r[3] = (i % 10) + 0x30; i = i / 10;
- r[2] = (i % 126) + 0x81; i = i / 126;
- r[1] = (i % 10) + 0x30; i = i / 10;
- r[0] = i + 0x81;
- return 4;
- }
+ r[3] = (i % 10) + 0x30; i = i / 10;
+ r[2] = (i % 126) + 0x81; i = i / 126;
+ r[1] = (i % 10) + 0x30; i = i / 10;
+ r[0] = i + 0x81;
+ return 4;
}
return RET_ILUNI;
}
diff --git a/tests/GB18030-BMP.TXT b/tests/GB18030-BMP.TXT
index 0006e4b..6df3bd5 100644
--- a/tests/GB18030-BMP.TXT
+++ b/tests/GB18030-BMP.TXT
@@ -7583,7 +7583,7 @@
0x8135F434 0x1E3C
0x8135F435 0x1E3D
0x8135F436 0x1E3E
-0x8135F437 0x1E3F
+0x8135F437 0xE7C7
0x8135F438 0x1E40
0x8135F439 0x1E41
0x8135F530 0x1E42
diff --git a/tests/GB18030.IRREVERSIBLE.TXT b/tests/GB18030.IRREVERSIBLE.TXT
index 1dd1904..5e84bc3 100644
--- a/tests/GB18030.IRREVERSIBLE.TXT
+++ b/tests/GB18030.IRREVERSIBLE.TXT
@@ -1,4 +1,3 @@
-0x8135F437 0x1E3F
0x82359037 0x9FB4
0x82359038 0x9FB5
0x82359039 0x9FB6
@@ -33,7 +32,6 @@
0xA6EC 0xE794
0xA6ED 0xE795
0xA6F3 0xE796
-0xA8BC 0xE7C7
0xFE51 0xE816
0xFE52 0xE817
0xFE53 0xE818