UTF-8: Reject surrogates and out-of-range code points.
diff --git a/ChangeLog b/ChangeLog
index 2818f2d..69c7f7f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2016-11-17  Bruno Haible  <bruno@clisp.org>
+
+	UTF-8: Reject surrogates and out-of-range code points.
+	* lib/utf8.h (utf8_mbtowc, utf8_wctomb): Reject code points in the
+	range 0xD800..0xDFFF and >= 0x110000.
+	* tests/genutf8.c (main): Don't emit mappings for 0xD800..0xDFFF.
+
 2016-10-22  Bruno Haible  <bruno@clisp.org>
 
 	Switch to libtool 2.4.6.
diff --git a/NEWS b/NEWS
index aebc36c..cb2a5fb 100644
--- a/NEWS
+++ b/NEWS
@@ -1,4 +1,5 @@
 New in 1.15:
+* The UTF-8 converter now rejects surrogates and out-of-range code points.
 * Added ISO-2022-JP-MS converter.
 * Updated the CP1255 converter to map one more character.
 * The functions now support strings longer than 2 GB.
diff --git a/lib/utf8.h b/lib/utf8.h
index 8fab264..016ac52 100644
--- a/lib/utf8.h
+++ b/lib/utf8.h
@@ -45,7 +45,8 @@
     if (n < 3)
       return RET_TOOFEW(0);
     if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
-          && (c >= 0xe1 || s[1] >= 0xa0)))
+          && (c >= 0xe1 || s[1] >= 0xa0)
+          && (c != 0xed || s[1] < 0xa0)))
       return RET_ILSEQ;
     *pwc = ((ucs4_t) (c & 0x0f) << 12)
            | ((ucs4_t) (s[1] ^ 0x80) << 6)
@@ -56,41 +57,14 @@
       return RET_TOOFEW(0);
     if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
           && (s[3] ^ 0x80) < 0x40
-          && (c >= 0xf1 || s[1] >= 0x90)))
+          && (c >= 0xf1 || s[1] >= 0x90)
+          && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))))
       return RET_ILSEQ;
     *pwc = ((ucs4_t) (c & 0x07) << 18)
            | ((ucs4_t) (s[1] ^ 0x80) << 12)
            | ((ucs4_t) (s[2] ^ 0x80) << 6)
            | (ucs4_t) (s[3] ^ 0x80);
     return 4;
-  } else if (c < 0xfc && sizeof(ucs4_t)*8 >= 32) {
-    if (n < 5)
-      return RET_TOOFEW(0);
-    if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
-          && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
-          && (c >= 0xf9 || s[1] >= 0x88)))
-      return RET_ILSEQ;
-    *pwc = ((ucs4_t) (c & 0x03) << 24)
-           | ((ucs4_t) (s[1] ^ 0x80) << 18)
-           | ((ucs4_t) (s[2] ^ 0x80) << 12)
-           | ((ucs4_t) (s[3] ^ 0x80) << 6)
-           | (ucs4_t) (s[4] ^ 0x80);
-    return 5;
-  } else if (c < 0xfe && sizeof(ucs4_t)*8 >= 32) {
-    if (n < 6)
-      return RET_TOOFEW(0);
-    if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
-          && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
-          && (s[5] ^ 0x80) < 0x40
-          && (c >= 0xfd || s[1] >= 0x84)))
-      return RET_ILSEQ;
-    *pwc = ((ucs4_t) (c & 0x01) << 30)
-           | ((ucs4_t) (s[1] ^ 0x80) << 24)
-           | ((ucs4_t) (s[2] ^ 0x80) << 18)
-           | ((ucs4_t) (s[3] ^ 0x80) << 12)
-           | ((ucs4_t) (s[4] ^ 0x80) << 6)
-           | (ucs4_t) (s[5] ^ 0x80);
-    return 6;
   } else
     return RET_ILSEQ;
 }
@@ -103,21 +77,18 @@
     count = 1;
   else if (wc < 0x800)
     count = 2;
-  else if (wc < 0x10000)
-    count = 3;
-  else if (wc < 0x200000)
+  else if (wc < 0x10000) {
+    if (wc < 0xd800 || wc >= 0xe000)
+      count = 3;
+    else
+      return RET_ILUNI;
+  } else if (wc < 0x110000)
     count = 4;
-  else if (wc < 0x4000000)
-    count = 5;
-  else if (wc <= 0x7fffffff)
-    count = 6;
   else
     return RET_ILUNI;
   if (n < count)
     return RET_TOOSMALL;
   switch (count) { /* note: code falls through cases! */
-    case 6: r[5] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x4000000;
-    case 5: r[4] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x200000;
     case 4: r[3] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x10000;
     case 3: r[2] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x800;
     case 2: r[1] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0xc0;
diff --git a/tests/genutf8.c b/tests/genutf8.c
index 85086fb..e20477a 100644
--- a/tests/genutf8.c
+++ b/tests/genutf8.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2000, 2004-2005, 2012 Free Software Foundation, Inc.
+/* Copyright (C) 2000, 2004-2005, 2012, 2016 Free Software Foundation, Inc.
    This file is part of the GNU LIBICONV Library.
 
    The GNU LIBICONV Library is free software; you can redistribute it
@@ -39,11 +39,14 @@
   for (i1 = 2; i1 < 32; i1++)
     for (i2 = 0; i2 < 64; i2++)
       printf("0x%02X%02X\t0x%04X\n", 0xc0+i1,0x80+i2, (i1<<6)+i2);
-  /* Range 0x0800..0xffff */
+  /* Range 0x0800..0xffff, except 0xd800..0xdfff */
   for (i1 = 0; i1 < 16; i1++)
     for (i2 = (i1==0 ? 32 : 0); i2 < 64; i2++)
-      for (i3 = 0; i3 < 64; i3++)
-        printf("0x%02X%02X%02X\t0x%04X\n", 0xe0+i1,0x80+i2,0x80+i3, (i1<<12)+(i2<<6)+i3);
+      for (i3 = 0; i3 < 64; i3++) {
+        int u = (i1<<12)+(i2<<6)+i3;
+        if (!(u >= 0xd800 && u < 0xe000))
+          printf("0x%02X%02X%02X\t0x%04X\n", 0xe0+i1,0x80+i2,0x80+i3, u);
+      }
 
   if (ferror(stdout) || fclose(stdout))
     exit(1);