Revamp of format validation in 'string.format'

When calling 'sprintf', not all conversion specifiers accept all
flags; some combinations are undefined behavior.
diff --git a/lstrlib.c b/lstrlib.c
index 74501f7..e3b8df0 100644
--- a/lstrlib.c
+++ b/lstrlib.c
@@ -1090,13 +1090,31 @@
 
 
 /* valid flags in a format specification */
-#if !defined(L_FMTFLAGS)
-#define L_FMTFLAGS	"-+ #0"
+#if !defined(L_FMTFLAGSF)
+
+/* valid flags for a, A, e, E, f, F, g, and G conversions */
+#define L_FMTFLAGSF	"-+#0 "
+
+/* valid flags for o, x, and X conversions */
+#define L_FMTFLAGSX	"-#0"
+
+/* valid flags for d and i conversions */
+#define L_FMTFLAGSI	"-+0 "
+
+/* valid flags for u conversions */
+#define L_FMTFLAGSU	"-0"
+
+/* valid flags for c, p, and s conversions */
+#define L_FMTFLAGSC	"-"
+
 #endif
 
 
 /*
-** maximum size of each format specification (such as "%-099.99d")
+** Maximum size of each format specification (such as "%-099.99d"):
+** Initial '%', flags (up to 5), width (2), period, precision (2),
+** length modifier (8), conversion specifier, and final '\0', plus some
+** extra.
 */
 #define MAX_FORMAT	32
 
@@ -1189,25 +1207,53 @@
 }
 
 
-static const char *scanformat (lua_State *L, const char *strfrmt, char *form) {
-  const char *p = strfrmt;
-  while (*p != '\0' && strchr(L_FMTFLAGS, *p) != NULL) p++;  /* skip flags */
-  if ((size_t)(p - strfrmt) >= sizeof(L_FMTFLAGS)/sizeof(char))
-    luaL_error(L, "invalid format (repeated flags)");
-  if (isdigit(uchar(*p))) p++;  /* skip width */
-  if (isdigit(uchar(*p))) p++;  /* (2 digits at most) */
-  if (*p == '.') {
-    p++;
-    if (isdigit(uchar(*p))) p++;  /* skip precision */
-    if (isdigit(uchar(*p))) p++;  /* (2 digits at most) */
+static const char *get2digits (const char *s) {
+  if (isdigit(uchar(*s))) {
+    s++;
+    if (isdigit(uchar(*s))) s++;  /* (2 digits at most) */
   }
-  if (isdigit(uchar(*p)))
-    luaL_error(L, "invalid format (width or precision too long)");
+  return s;
+}
+
+
+/*
+** Chech whether a conversion specification is valid. When called,
+** first character in 'form' must be '%' and last character must
+** be a valid conversion specifier. 'flags' are the accepted flags;
+** 'precision' signals whether to accept a precision.
+*/
+static void checkformat (lua_State *L, const char *form, const char *flags,
+                                       int precision) {
+  const char *spec = form + 1;  /* skip '%' */
+  spec += strspn(spec, flags);  /* skip flags */
+  if (*spec != '0') {  /* a width cannot start with '0' */
+    spec = get2digits(spec);  /* skip width */
+    if (*spec == '.' && precision) {
+      spec++;
+      spec = get2digits(spec);  /* skip precision */
+    }
+  }
+  if (!isalpha(uchar(*spec)))  /* did not go to the end? */
+    luaL_error(L, "invalid conversion specification: '%s'", form);
+}
+
+
+/*
+** Get a conversion specification and copy it to 'form'.
+** Return the address of its last character.
+*/
+static const char *getformat (lua_State *L, const char *strfrmt,
+                                            char *form) {
+  /* spans flags, width, and precision ('0' is included as a flag) */
+  size_t len = strspn(strfrmt, L_FMTFLAGSF "123456789.");
+  len++;  /* adds following character (should be the specifier) */
+  /* still needs space for '%', '\0', plus a length modifier */
+  if (len >= MAX_FORMAT - 10)
+    luaL_error(L, "invalid format (too long)");
   *(form++) = '%';
-  memcpy(form, strfrmt, ((p - strfrmt) + 1) * sizeof(char));
-  form += (p - strfrmt) + 1;
-  *form = '\0';
-  return p;
+  memcpy(form, strfrmt, len * sizeof(char));
+  *(form + len) = '\0';
+  return strfrmt + len - 1;
 }
 
 
@@ -1230,6 +1276,7 @@
   size_t sfl;
   const char *strfrmt = luaL_checklstring(L, arg, &sfl);
   const char *strfrmt_end = strfrmt+sfl;
+  const char *flags;
   luaL_Buffer b;
   luaL_buffinit(L, &b);
   while (strfrmt < strfrmt_end) {
@@ -1239,25 +1286,35 @@
       luaL_addchar(&b, *strfrmt++);  /* %% */
     else { /* format item */
       char form[MAX_FORMAT];  /* to store the format ('%...') */
-      int maxitem = MAX_ITEM;
-      char *buff = luaL_prepbuffsize(&b, maxitem);  /* to put formatted item */
-      int nb = 0;  /* number of bytes in added item */
+      int maxitem = MAX_ITEM;  /* maximum length for the result */
+      char *buff = luaL_prepbuffsize(&b, maxitem);  /* to put result */
+      int nb = 0;  /* number of bytes in result */
       if (++arg > top)
         return luaL_argerror(L, arg, "no value");
-      strfrmt = scanformat(L, strfrmt, form);
+      strfrmt = getformat(L, strfrmt, form);
       switch (*strfrmt++) {
         case 'c': {
+          checkformat(L, form, L_FMTFLAGSC, 0);
           nb = l_sprintf(buff, maxitem, form, (int)luaL_checkinteger(L, arg));
           break;
         }
         case 'd': case 'i':
-        case 'o': case 'u': case 'x': case 'X': {
+          flags = L_FMTFLAGSI;
+          goto intcase;
+        case 'u':
+          flags = L_FMTFLAGSU;
+          goto intcase;
+        case 'o': case 'x': case 'X':
+          flags = L_FMTFLAGSX;
+         intcase: {
           lua_Integer n = luaL_checkinteger(L, arg);
+          checkformat(L, form, flags, 1);
           addlenmod(form, LUA_INTEGER_FRMLEN);
           nb = l_sprintf(buff, maxitem, form, (LUAI_UACINT)n);
           break;
         }
         case 'a': case 'A':
+          checkformat(L, form, L_FMTFLAGSF, 1);
           addlenmod(form, LUA_NUMBER_FRMLEN);
           nb = lua_number2strx(L, buff, maxitem, form,
                                   luaL_checknumber(L, arg));
@@ -1268,12 +1325,14 @@
           /* FALLTHROUGH */
         case 'e': case 'E': case 'g': case 'G': {
           lua_Number n = luaL_checknumber(L, arg);
+          checkformat(L, form, L_FMTFLAGSF, 1);
           addlenmod(form, LUA_NUMBER_FRMLEN);
           nb = l_sprintf(buff, maxitem, form, (LUAI_UACNUMBER)n);
           break;
         }
         case 'p': {
           const void *p = lua_topointer(L, arg);
+          checkformat(L, form, L_FMTFLAGSC, 0);
           if (p == NULL) {  /* avoid calling 'printf' with argument NULL */
             p = "(null)";  /* result */
             form[strlen(form) - 1] = 's';  /* format it as a string */
@@ -1294,7 +1353,8 @@
             luaL_addvalue(&b);  /* keep entire string */
           else {
             luaL_argcheck(L, l == strlen(s), arg, "string contains zeros");
-            if (!strchr(form, '.') && l >= 100) {
+            checkformat(L, form, L_FMTFLAGSC, 1);
+            if (strchr(form, '.') == NULL && l >= 100) {
               /* no precision and string is too long to be formatted */
               luaL_addvalue(&b);  /* keep entire string */
             }
diff --git a/manual/manual.of b/manual/manual.of
index 664b5c1..ea9a030 100644
--- a/manual/manual.of
+++ b/manual/manual.of
@@ -7078,8 +7078,10 @@
 which must be a string.
 The format string follows the same rules as the @ANSI{sprintf}.
 The only differences are that the conversion specifiers and modifiers
-@T{*}, @id{h}, @id{L}, @id{l}, and @id{n} are not supported
+@id{F}, @id{n}, @T{*}, @id{h}, @id{L}, and @id{l} are not supported
 and that there is an extra specifier, @id{q}.
+Both width and precision, when present,
+are limited to two digits.
 
 The specifier @id{q} formats booleans, nil, numbers, and strings
 in a way that the result is a valid constant in Lua source code.
@@ -7099,7 +7101,7 @@
 "a string with \"quotes\" and \
  new line"
 }
-This specifier does not support modifiers (flags, width, length).
+This specifier does not support modifiers (flags, width, precision).
 
 The conversion specifiers
 @id{A}, @id{a}, @id{E}, @id{e}, @id{f},
diff --git a/testes/strings.lua b/testes/strings.lua
index 61a06a2..184fa65 100644
--- a/testes/strings.lua
+++ b/testes/strings.lua
@@ -202,13 +202,11 @@
               "\0\xe4\0b8c\0")
 assert(string.format('') == "")
 assert(string.format("%c",34)..string.format("%c",48)..string.format("%c",90)..string.format("%c",100) ==
-       string.format("%c%c%c%c", 34, 48, 90, 100))
+       string.format("%1c%-c%-1c%c", 34, 48, 90, 100))
 assert(string.format("%s\0 is not \0%s", 'not be', 'be') == 'not be\0 is not \0be')
 assert(string.format("%%%d %010d", 10, 23) == "%10 0000000023")
 assert(tonumber(string.format("%f", 10.3)) == 10.3)
-x = string.format('"%-50s"', 'a')
-assert(#x == 52)
-assert(string.sub(x, 1, 4) == '"a  ')
+assert(string.format('"%-50s"', 'a') == '"a' .. string.rep(' ', 49) .. '"')
 
 assert(string.format("-%.20s.20s", string.rep("%", 2000)) ==
                      "-"..string.rep("%", 20)..".20s")
@@ -237,7 +235,6 @@
 
 assert(string.format("\0%s\0", "\0\0\1") == "\0\0\0\1\0")
 checkerror("contains zeros", string.format, "%10s", "\0")
-checkerror("cannot have modifiers", string.format, "%10q", "1")
 
 -- format x tostring
 assert(string.format("%s %s", nil, true) == "nil true")
@@ -341,6 +338,21 @@
 end
 
 
+-- testing some flags  (all these results are required by ISO C)
+assert(string.format("%#12o", 10) == "         012")
+assert(string.format("%#10x", 100) == "      0x64")
+assert(string.format("%#-17X", 100) == "0X64             ")
+assert(string.format("%013i", -100) == "-000000000100")
+assert(string.format("%2.5d", -100) == "-00100")
+assert(string.format("%.u", 0) == "")
+assert(string.format("%+#014.0f", 100) == "+000000000100.")
+assert(string.format("% 1.0E", 100) == " 1E+02")
+assert(string.format("%-16c", 97) == "a               ")
+assert(string.format("%+.3G", 1.5) == "+1.5")
+assert(string.format("% .1g", 2^10) == " 1e+03")
+assert(string.format("%.0s", "alo")  == "")
+assert(string.format("%.s", "alo")  == "")
+
 -- errors in format
 
 local function check (fmt, msg)
@@ -348,13 +360,21 @@
 end
 
 local aux = string.rep('0', 600)
-check("%100.3d", "too long")
+check("%100.3d", "invalid conversion")
 check("%1"..aux..".3d", "too long")
-check("%1.100d", "too long")
+check("%1.100d", "invalid conversion")
 check("%10.1"..aux.."004d", "too long")
 check("%t", "invalid conversion")
-check("%"..aux.."d", "repeated flags")
+check("%"..aux.."d", "too long")
 check("%d %d", "no value")
+check("%010c", "invalid conversion")
+check("%.10c", "invalid conversion")
+check("%0.34s", "invalid conversion")
+check("%#i", "invalid conversion")
+check("%3.1p", "invalid conversion")
+check("%0.s", "invalid conversion")
+check("%10q", "cannot have modifiers")
+check("%F", "invalid conversion")   -- useless and not in C89
 
 
 assert(load("return 1\n--comment without ending EOL")() == 1)