GB18030: Help transitioning away from PUA code points.

* lib/gb18030ext.h (gb18030_2005_ext_wctomb): Remove function.
(gb18030ext_wctomb): Renamed from gb18030_2022_ext_wctomb.
* lib/gb18030uni.h (gb18030_2005_uni_wctomb): Map 6 Ext-B code points to
4-bytes sequences.
(gb18030_2022_uni_wctomb): Small refactoring.
* lib/gb18030_2005.h (gb18030_2005_pua2charset): Map 6 PUA code points
to 4-bytes sequences instead of 2-bytes sequences.
(gb18030_2005_wctomb): Update accordingly. Invoke gb18030ext_wctomb
instead of gb18030_2005_ext_wctomb.
* lib/gb18030_2022.h (gb18030_2022_wctomb): Invoke gb18030ext_wctomb
instead of gb18030_2022_ext_wctomb.
* tests/GB18030-2005.IRREVERSIBLE.TXT: Update the inverse mappings of 6
Ext-B code points and 6 PUA code points.
* NEWS: Mention it.
diff --git a/ChangeLog b/ChangeLog
index e95e3b9..d077b4d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,21 @@
+2023-05-29  Bruno Haible  <bruno@clisp.org>
+
+	GB18030: Help transitioning away from PUA code points.
+	* lib/gb18030ext.h (gb18030_2005_ext_wctomb): Remove function.
+	(gb18030ext_wctomb): Renamed from gb18030_2022_ext_wctomb.
+	* lib/gb18030uni.h (gb18030_2005_uni_wctomb): Map 6 Ext-B code points to
+	4-bytes sequences.
+	(gb18030_2022_uni_wctomb): Small refactoring.
+	* lib/gb18030_2005.h (gb18030_2005_pua2charset): Map 6 PUA code points
+	to 4-bytes sequences instead of 2-bytes sequences.
+	(gb18030_2005_wctomb): Update accordingly. Invoke gb18030ext_wctomb
+	instead of gb18030_2005_ext_wctomb.
+	* lib/gb18030_2022.h (gb18030_2022_wctomb): Invoke gb18030ext_wctomb
+	instead of gb18030_2022_ext_wctomb.
+	* tests/GB18030-2005.IRREVERSIBLE.TXT: Update the inverse mappings of 6
+	Ext-B code points and 6 PUA code points.
+	* NEWS: Mention it.
+
 2023-05-24  Bruno Haible  <bruno@clisp.org>
 
 	man pages: List a fifth condition when iconv(3) may stop.
diff --git a/NEWS b/NEWS
index 5321981..364b741 100644
--- a/NEWS
+++ b/NEWS
@@ -3,7 +3,8 @@
 * GB18030 is now an alias for GB18030:2005. A new converter for GB18030:2022
   is added. Since this encoding merely cleans up a few private-use-area
   mappings, you can continue to use the GB18030 converter, for backward
-  compatibility.
+  compatibility. Its Unicode to GB18030 conversion direction has been
+  enhanced, to help transitioning away from PUA code points.
 * When converting from/to an EBCDIC encoding, a non-standard way of
   converting newlines can be requested
     - at the C level, by calling iconvctl with argument ICONV_SET_FROM_SURFACE
diff --git a/lib/gb18030_2005.h b/lib/gb18030_2005.h
index 43f952f..4e7fca5 100644
--- a/lib/gb18030_2005.h
+++ b/lib/gb18030_2005.h
@@ -266,39 +266,43 @@
   }
 }
 
-static const unsigned short gb18030_2005_pua2charset[31*3] = {
-/* Unicode range   GB18030 range */
-  0xe766, 0xe76b,  0xa2ab, /*.. 0xa2b0, */
-  0xe76d, 0xe76d,  0xa2e4,
-  0xe76e, 0xe76f,  0xa2ef, /*.. 0xa2f0, */
-  0xe770, 0xe771,  0xa2fd, /*.. 0xa2fe, */
-  0xe772, 0xe77c,  0xa4f4, /*.. 0xa4fe, */
-  0xe77d, 0xe784,  0xa5f7, /*.. 0xa5fe, */
-  0xe785, 0xe78c,  0xa6b9, /*.. 0xa6c0, */
-  0xe78d, 0xe793,  0xa6d9, /*.. 0xa6df, */
-  0xe794, 0xe795,  0xa6ec, /*.. 0xa6ed, */
-  0xe796, 0xe796,  0xa6f3,
-  0xe797, 0xe79f,  0xa6f6, /*.. 0xa6fe, */
-  0xe7a0, 0xe7ae,  0xa7c2, /*.. 0xa7d0, */
-  0xe7af, 0xe7bb,  0xa7f2, /*.. 0xa7fe, */
-  0xe7bc, 0xe7c6,  0xa896, /*.. 0xa8a0, */
-  0xe7c9, 0xe7cc,  0xa8c1, /*.. 0xa8c4, */
-  0xe7cd, 0xe7e1,  0xa8ea, /*.. 0xa8fe, */
-  0xe7e2, 0xe7e2,  0xa958,
-  0xe7e3, 0xe7e3,  0xa95b,
-  0xe7e4, 0xe7e6,  0xa95d, /*.. 0xa95f, */
-  0xe7f4, 0xe800,  0xa997, /*.. 0xa9a3, */
-  0xe801, 0xe80f,  0xa9f0, /*.. 0xa9fe, */
-  0xe810, 0xe814,  0xd7fa, /*.. 0xd7fe, */
-  0xe816, 0xe818,  0xfe51, /*.. 0xfe53, */
-  0xe81e, 0xe81e,  0xfe59,
-  0xe826, 0xe826,  0xfe61,
-  0xe82b, 0xe82c,  0xfe66, /*.. 0xfe67, */
-  0xe831, 0xe832,  0xfe6c, /*.. 0xfe6d, */
-  0xe83b, 0xe83b,  0xfe76,
-  0xe843, 0xe843,  0xfe7e,
-  0xe854, 0xe855,  0xfe90, /*.. 0xfe91, */
-  0xe864, 0xe864,  0xfea0,
+static const struct { unsigned short uni[2]; unsigned int charset; } gb18030_2005_pua2charset[35] = {
+/*    Unicode range      GB18030 range */
+  { { 0xe766, 0xe76b },  0xa2ab /*.. 0xa2b0, */ },
+  { { 0xe76d, 0xe76d },  0xa2e4                 },
+  { { 0xe76e, 0xe76f },  0xa2ef /*.. 0xa2f0, */ },
+  { { 0xe770, 0xe771 },  0xa2fd /*.. 0xa2fe, */ },
+  { { 0xe772, 0xe77c },  0xa4f4 /*.. 0xa4fe, */ },
+  { { 0xe77d, 0xe784 },  0xa5f7 /*.. 0xa5fe, */ },
+  { { 0xe785, 0xe78c },  0xa6b9 /*.. 0xa6c0, */ },
+  { { 0xe78d, 0xe793 },  0xa6d9 /*.. 0xa6df, */ },
+  { { 0xe794, 0xe795 },  0xa6ec /*.. 0xa6ed, */ },
+  { { 0xe796, 0xe796 },  0xa6f3                 },
+  { { 0xe797, 0xe79f },  0xa6f6 /*.. 0xa6fe, */ },
+  { { 0xe7a0, 0xe7ae },  0xa7c2 /*.. 0xa7d0, */ },
+  { { 0xe7af, 0xe7bb },  0xa7f2 /*.. 0xa7fe, */ },
+  { { 0xe7bc, 0xe7c6 },  0xa896 /*.. 0xa8a0, */ },
+  { { 0xe7c9, 0xe7cc },  0xa8c1 /*.. 0xa8c4, */ },
+  { { 0xe7cd, 0xe7e1 },  0xa8ea /*.. 0xa8fe, */ },
+  { { 0xe7e2, 0xe7e2 },  0xa958                 },
+  { { 0xe7e3, 0xe7e3 },  0xa95b                 },
+  { { 0xe7e4, 0xe7e6 },  0xa95d /*.. 0xa95f, */ },
+  { { 0xe7f4, 0xe800 },  0xa997 /*.. 0xa9a3, */ },
+  { { 0xe801, 0xe80f },  0xa9f0 /*.. 0xa9fe, */ },
+  { { 0xe810, 0xe814 },  0xd7fa /*.. 0xd7fe, */ },
+  { { 0xe816, 0xe816 },  0x95329031             },
+  { { 0xe817, 0xe817 },  0x95329033             },
+  { { 0xe818, 0xe818 },  0x95329730             },
+  { { 0xe81e, 0xe81e },  0xfe59                 },
+  { { 0xe826, 0xe826 },  0xfe61                 },
+  { { 0xe82b, 0xe82c },  0xfe66 /*.. 0xfe67, */ },
+  { { 0xe831, 0xe831 },  0x9536b937             },
+  { { 0xe832, 0xe832 },  0xfe6d                 },
+  { { 0xe83b, 0xe83b },  0x9630ba35             },
+  { { 0xe843, 0xe843 },  0xfe7e                 },
+  { { 0xe854, 0xe854 },  0xfe90                 },
+  { { 0xe855, 0xe855 },  0x9635b630             },
+  { { 0xe864, 0xe864 },  0xfea0                 },
 };
 
 static int
@@ -316,7 +320,7 @@
   if (ret != RET_ILUNI)
     return ret;
 
-  ret = gb18030_2005_ext_wctomb(conv,r,wc,n);
+  ret = gb18030ext_wctomb(conv,r,wc,n);
   if (ret != RET_ILUNI)
     return ret;
 
@@ -337,23 +341,32 @@
           return 2;
         }
       } else {
-        /* User-defined characters, two-byte part of range U+E766..U+E864 */
+        /* User-defined characters, two-byte part and 6 four-byte mappings in
+           range U+E766..U+E864 */
         unsigned int k1 = 0;
-        unsigned int k2 = 31;
+        unsigned int k2 = 35;
         /* Invariant: We know that if wc occurs in Unicode interval in
            gb18030_2005_pua2charset, it does so at a k with  k1 <= k < k2. */
         while (k1 < k2) {
           unsigned int k = (k1 + k2) / 2;
-          if (wc < gb18030_2005_pua2charset[k*3+0])
+          if (wc < gb18030_2005_pua2charset[k].uni[0])
             k2 = k;
-          else if (wc > gb18030_2005_pua2charset[k*3+1])
+          else if (wc > gb18030_2005_pua2charset[k].uni[1])
             k1 = k + 1;
           else {
-            unsigned short c =
-              gb18030_2005_pua2charset[k*3+2] + (wc - gb18030_2005_pua2charset[k*3+0]);
-            r[0] = (c >> 8);
-            r[1] = (c & 0xff);
-            return 2;
+            unsigned int c =
+              gb18030_2005_pua2charset[k].charset + (wc - gb18030_2005_pua2charset[k].uni[0]);
+            if (c < 0x10000) {
+              r[0] = (c >> 8);
+              r[1] = c & 0xff;
+              return 2;
+            } else {
+              r[0] = (c >> 24);
+              r[1] = (c >> 16) & 0xff;
+              r[2] = (c >> 8) & 0xff;
+              r[3] = c & 0xff;
+              return 4;
+            }
           }
         }
       }
diff --git a/lib/gb18030_2022.h b/lib/gb18030_2022.h
index f3e762c..4d70a92 100644
--- a/lib/gb18030_2022.h
+++ b/lib/gb18030_2022.h
@@ -151,7 +151,7 @@
   if (ret != RET_ILUNI)
     return ret;
 
-  ret = gb18030_2022_ext_wctomb(conv,r,wc,n);
+  ret = gb18030ext_wctomb(conv,r,wc,n);
   if (ret != RET_ILUNI)
     return ret;
 
diff --git a/lib/gb18030ext.h b/lib/gb18030ext.h
index 894efeb..97f01da 100644
--- a/lib/gb18030ext.h
+++ b/lib/gb18030ext.h
@@ -357,93 +357,7 @@
 };
 
 static int
-gb18030_2005_ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
-{
-  if (n >= 2) {
-    unsigned short c = 0;
-    if (wc == 0x01f9)
-      c = 0xa8bf;
-    else if (wc == 0x1e3f)
-      c = 0xa8bc;
-    else if (wc == 0x20ac)
-      c = 0xa2e3;
-    else if (wc >= 0x2e80 && wc < 0x2ed0)
-      c = gb18030ext_page2e[wc-0x2e80];
-    else if (wc >= 0x2ff0 && wc < 0x3000)
-      c = gb18030ext_page2f[wc-0x2ff0];
-    else if (wc == 0x303e)
-      c = 0xa989;
-    else if (wc >= 0x3440 && wc < 0x3478)
-      c = gb18030ext_page34[wc-0x3440];
-    else if (wc == 0x359e)
-      c = 0xfe5a;
-    else if (wc >= 0x3608 && wc < 0x3620)
-      c = gb18030ext_page36[wc-0x3608];
-    else if (wc == 0x3918)
-      c = 0xfe60;
-    else if (wc == 0x396e)
-      c = 0xfe5f;
-    else if (wc >= 0x39c8 && wc < 0x39e0)
-      c = gb18030ext_page39[wc-0x39c8];
-    else if (wc == 0x3a73)
-      c = 0xfe64;
-    else if (wc == 0x3b4e)
-      c = 0xfe68;
-    else if (wc == 0x3c6e)
-      c = 0xfe69;
-    else if (wc == 0x3ce0)
-      c = 0xfe6a;
-    else if (wc == 0x4056)
-      c = 0xfe6f;
-    else if (wc == 0x415f)
-      c = 0xfe70;
-    else if (wc == 0x4337)
-      c = 0xfe72;
-    else if (wc >= 0x43a8 && wc < 0x43e0)
-      c = gb18030ext_page43[wc-0x43a8];
-    else if (wc == 0x44d6)
-      c = 0xfe7b;
-    else if (wc >= 0x4648 && wc < 0x4668)
-      c = gb18030ext_page46[wc-0x4648];
-    else if (wc >= 0x4720 && wc < 0x4730)
-      c = gb18030ext_page47_1[wc-0x4720];
-    else if (wc >= 0x4778 && wc < 0x4790)
-      c = gb18030ext_page47_2[wc-0x4778];
-    else if (wc >= 0x4940 && wc < 0x49b8)
-      c = gb18030ext_page49[wc-0x4940];
-    else if (wc >= 0x4c70 && wc < 0x4ca8)
-      c = gb18030ext_page4c[wc-0x4c70];
-    else if (wc >= 0x4d10 && wc < 0x4d20)
-      c = gb18030ext_page4d[wc-0x4d10];
-    else if (wc == 0x4dae)
-      c = 0xfe9f;
-    else if (wc >= 0x9fb4 && wc < 0x9fbc)
-      c = gb18030ext_page9f[wc-0x9fb0];
-    else if (wc >= 0xfe10 && wc < 0xfe1a)
-      c = gb18030ext_pagefe[wc-0xfe10];
-    else if (wc == 0x20087)
-      c = 0xfe51;
-    else if (wc == 0x20089)
-      c = 0xfe52;
-    else if (wc == 0x200cc)
-      c = 0xfe53;
-    else if (wc == 0x215d7)
-      c = 0xfe6c;
-    else if (wc == 0x2298f)
-      c = 0xfe76;
-    else if (wc == 0x241fe)
-      c = 0xfe91;
-    if (c != 0) {
-      r[0] = (c >> 8); r[1] = (c & 0xff);
-      return 2;
-    }
-    return RET_ILUNI;
-  }
-  return RET_TOOSMALL;
-}
-
-static int
-gb18030_2022_ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
+gb18030ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
 {
   if (n >= 2) {
     unsigned short c = 0;
diff --git a/lib/gb18030uni.h b/lib/gb18030uni.h
index f2f2838..1f5a2cc 100644
--- a/lib/gb18030uni.h
+++ b/lib/gb18030uni.h
@@ -301,13 +301,14 @@
 gb18030_2005_uni_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
 {
   if (n >= 4) {
-    unsigned int i = wc;
-    if (i >= 0x0080 && i <= 0xffff) {
-      if (i == 0xe7c7) {
+    unsigned int i;
+    if (wc >= 0x0080 && wc <= 0xffff) {
+      if (wc == 0xe7c7) {
         i = 7457;
       } else {
         unsigned int k1 = 0;
         unsigned int k2 = 205;
+        i = wc;
         while (k1 < k2) {
           unsigned int k = (k1 + k2) / 2;
           if (i <= gb18030uni_uni2charset_ranges[2*k+1])
@@ -322,13 +323,28 @@
           i -= diff;
         }
       }
-      r[3] = (i % 10) + 0x30; i = i / 10;
-      r[2] = (i % 126) + 0x81; i = i / 126;
-      r[1] = (i % 10) + 0x30; i = i / 10;
-      r[0] = i + 0x81;
-      return 4;
-    }
-    return RET_ILUNI;
+    } else if (wc >= 0x20087 && wc <= 0x241fe) {
+      if (wc == 0x20087)
+        i = 0x3e2cf;
+      else if (wc == 0x20089)
+        i = 0x3e2d1;
+      else if (wc == 0x200cc)
+        i = 0x3e314;
+      else if (wc == 0x215d7)
+        i = 0x3f81f;
+      else if (wc == 0x2298f)
+        i = 0x40bd7;
+      else if (wc == 0x241fe)
+        i = 0x42446;
+      else
+        return RET_ILUNI;
+    } else
+      return RET_ILUNI;
+    r[3] = (i % 10) + 0x30; i = i / 10;
+    r[2] = (i % 126) + 0x81; i = i / 126;
+    r[1] = (i % 10) + 0x30; i = i / 10;
+    r[0] = i + 0x81;
+    return 4;
   }
   return RET_TOOSMALL;
 }
@@ -337,17 +353,18 @@
 gb18030_2022_uni_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
 {
   if (n >= 4) {
-    unsigned int i = wc;
-    if (i >= 0x0080 && i <= 0xffff) {
-      if (i == 0xe7c7) {
+    if (wc >= 0x0080 && wc <= 0xffff) {
+      unsigned int i;
+      if (wc == 0xe7c7) {
         i = 7457;
-      } else if (i >= 0xe78d && i <= 0xe796) {
-        i = 39076 + gb18030_2022_uni2charset_pua2[i-0xe78d];
-      } else if (i >= 0xe81e && i <= 0xe864 && gb18030_2022_uni2charset_pua1[i-0xe81e]) {
-        i = 19056 + gb18030_2022_uni2charset_pua1[i-0xe81e];
+      } else if (wc >= 0xe78d && wc <= 0xe796) {
+        i = 39076 + gb18030_2022_uni2charset_pua2[wc-0xe78d];
+      } else if (wc >= 0xe81e && wc <= 0xe864 && gb18030_2022_uni2charset_pua1[wc-0xe81e]) {
+        i = 19056 + gb18030_2022_uni2charset_pua1[wc-0xe81e];
       } else {
         unsigned int k1 = 0;
         unsigned int k2 = 205;
+        i = wc;
         while (k1 < k2) {
           unsigned int k = (k1 + k2) / 2;
           if (i <= gb18030uni_uni2charset_ranges[2*k+1])
diff --git a/tests/GB18030-2005.IRREVERSIBLE.TXT b/tests/GB18030-2005.IRREVERSIBLE.TXT
index 5e84bc3..48692ea 100644
--- a/tests/GB18030-2005.IRREVERSIBLE.TXT
+++ b/tests/GB18030-2005.IRREVERSIBLE.TXT
@@ -16,12 +16,12 @@
 0x84318333	0xFE17
 0x84318334	0xFE18
 0x84318335	0xFE19
-0x95329031	0x20087
-0x95329033	0x20089
-0x95329730	0x200CC
-0x9536B937	0x215D7
-0x9630BA35	0x2298F
-0x9635B630	0x241FE
+0x95329031	0xE816
+0x95329033	0xE817
+0x95329730	0xE818
+0x9536B937	0xE831
+0x9630BA35	0xE83B
+0x9635B630	0xE855
 0xA6D9	0xE78D
 0xA6DA	0xE78E
 0xA6DB	0xE78F
@@ -32,17 +32,17 @@
 0xA6EC	0xE794
 0xA6ED	0xE795
 0xA6F3	0xE796
-0xFE51	0xE816
-0xFE52	0xE817
-0xFE53	0xE818
+0xFE51	0x20087
+0xFE52	0x20089
+0xFE53	0x200CC
 0xFE59	0xE81E
 0xFE61	0xE826
 0xFE66	0xE82B
 0xFE67	0xE82C
-0xFE6C	0xE831
+0xFE6C	0x215D7
 0xFE6D	0xE832
-0xFE76	0xE83B
+0xFE76	0x2298F
 0xFE7E	0xE843
 0xFE90	0xE854
-0xFE91	0xE855
+0xFE91	0x241FE
 0xFEA0	0xE864