Back to a stackless implementation

A "with stack" implementation gains too little in performance to be
worth all the noise from C-stack overflows.

This commit is almost a sketch, to test performance. There are several
pending stuff:

- review control of C-stack overflow and error messages;
- what to do with setcstacklimit;
- review comments;
- review unroll of Lua calls.
diff --git a/ldo.c b/ldo.c
index 5473815..dc3cc9f 100644
--- a/ldo.c
+++ b/ldo.c
@@ -139,8 +139,7 @@
 
 
 int luaD_rawrunprotected (lua_State *L, Pfunc f, void *ud) {
-  global_State *g = G(L);
-  l_uint32 oldnCcalls = g->Cstacklimit - (L->nCcalls + L->nci);
+  l_uint32 oldnCcalls = L->nCcalls;
   struct lua_longjmp lj;
   lj.status = LUA_OK;
   lj.previous = L->errorJmp;  /* chain new error handler */
@@ -149,7 +148,7 @@
     (*f)(L, ud);
   );
   L->errorJmp = lj.previous;  /* restore old error handler */
-  L->nCcalls = g->Cstacklimit - oldnCcalls - L->nci;
+  L->nCcalls = oldnCcalls;
   return lj.status;
 }
 
@@ -348,7 +347,7 @@
 
 /*
 ** Check whether 'func' has a '__call' metafield. If so, put it in the
-** stack, below original 'func', so that 'luaD_call' can call it. Raise
+** stack, below original 'func', so that 'luaD_precall' can call it. Raise
 ** an error if there is no '__call' metafield.
 */
 void luaD_tryfuncTM (lua_State *L, StkId func) {
@@ -454,7 +453,7 @@
 ** When returns, all the results are on the stack, starting at the original
 ** function position.
 */
-void luaD_call (lua_State *L, StkId func, int nresults) {
+int luaD_precall (lua_State *L, StkId func, int nresults) {
   lua_CFunction f;
  retry:
   switch (ttypetag(s2v(func))) {
@@ -482,7 +481,7 @@
       lua_lock(L);
       api_checknelems(L, n);
       luaD_poscall(L, ci, n);
-      break;
+      return 1;
     }
     case LUA_VLCL: {  /* Lua function */
       CallInfo *ci;
@@ -501,8 +500,7 @@
       for (; narg < nfixparams; narg++)
         setnilvalue(s2v(L->top++));  /* complete missing arguments */
       lua_assert(ci->top <= L->stack_last);
-      luaV_execute(L, ci);  /* run the function */
-      break;
+      return 0;
     }
     default: {  /* not a function */
       checkstackGCp(L, 1, func);  /* space for metamethod */
@@ -513,17 +511,32 @@
 }
 
 
+static void stackerror (lua_State *L) {
+  if (getCcalls(L) == LUAI_MAXCCALLS)
+    luaG_runerror(L, "C stack overflow");
+  else if (getCcalls(L) >= (LUAI_MAXCCALLS + (LUAI_MAXCCALLS>>3)))
+    luaD_throw(L, LUA_ERRERR);  /* error while handing stack error */
+}
+
+
+void luaD_call (lua_State *L, StkId func, int nResults) {
+  L->nCcalls++;
+  if (getCcalls(L) >= LUAI_MAXCCALLS)
+    stackerror(L);
+  if (!luaD_precall(L, func, nResults))  /* is a Lua function? */
+    luaV_execute(L, L->ci);  /* call it */
+  L->nCcalls--;
+}
+
+
+
 /*
 ** Similar to 'luaD_call', but does not allow yields during the call.
 */
 void luaD_callnoyield (lua_State *L, StkId func, int nResults) {
-  incXCcalls(L);
-  if (getCcalls(L) <= CSTACKERR) {  /* possible C stack overflow? */
-    luaE_exitCcall(L);  /* to compensate decrement in next call */
-    luaE_enterCcall(L);  /* check properly */
-  }
+  incnny(L);
   luaD_call(L, func, nResults);
-  decXCcalls(L);
+  decnny(L);
 }
 
 
@@ -638,7 +651,8 @@
   StkId firstArg = L->top - n;  /* first argument */
   CallInfo *ci = L->ci;
   if (L->status == LUA_OK) {  /* starting a coroutine? */
-    luaD_call(L, firstArg - 1, LUA_MULTRET);
+    if (!luaD_precall(L, firstArg - 1, LUA_MULTRET))  /* Lua function? */
+      luaV_execute(L, L->ci);  /* call it */
   }
   else {  /* resuming from previous yield */
     lua_assert(L->status == LUA_YIELD);
@@ -670,11 +684,8 @@
   }
   else if (L->status != LUA_YIELD)  /* ended with errors? */
     return resume_error(L, "cannot resume dead coroutine", nargs);
-  if (from == NULL)
-    L->nCcalls = CSTACKTHREAD;
-  else  /* correct 'nCcalls' for this thread */
-    L->nCcalls = getCcalls(from) - L->nci - CSTACKCF;
-  if (L->nCcalls <= CSTACKERR)
+  L->nCcalls = (from) ? getCcalls(from) + 1 : 1;
+  if (getCcalls(L) >= LUAI_MAXCCALLS)
     return resume_error(L, "C stack overflow", nargs);
   luai_userstateresume(L, nargs);
   api_checknelems(L, (L->status == LUA_OK) ? nargs + 1 : nargs);
diff --git a/ldo.h b/ldo.h
index 6c6cb28..7d03211 100644
--- a/ldo.h
+++ b/ldo.h
@@ -59,6 +59,7 @@
                                         int fTransfer, int nTransfer);
 LUAI_FUNC void luaD_hookcall (lua_State *L, CallInfo *ci);
 LUAI_FUNC void luaD_pretailcall (lua_State *L, CallInfo *ci, StkId func, int n);
+LUAI_FUNC int luaD_precall (lua_State *L, StkId func, int nResults);
 LUAI_FUNC void luaD_call (lua_State *L, StkId func, int nResults);
 LUAI_FUNC void luaD_callnoyield (lua_State *L, StkId func, int nResults);
 LUAI_FUNC void luaD_tryfuncTM (lua_State *L, StkId func);
diff --git a/lparser.c b/lparser.c
index bc7d9a4..502a9b2 100644
--- a/lparser.c
+++ b/lparser.c
@@ -489,12 +489,14 @@
 }
 
 
-/*
-** Macros to limit the maximum recursion depth while parsing
-*/
-#define enterlevel(ls)	luaE_enterCcall((ls)->L)
+static void enterlevel (LexState *ls) {
+  lua_State *L = ls->L;
+  L->nCcalls++;
+  checklimit(ls->fs, getCcalls(L), LUAI_MAXCCALLS, "C levels");
+}
 
-#define leavelevel(ls)	luaE_exitCcall((ls)->L)
+
+#define leavelevel(ls) ((ls)->L->nCcalls--)
 
 
 /*
diff --git a/lstate.c b/lstate.c
index 86b3761..8cda307 100644
--- a/lstate.c
+++ b/lstate.c
@@ -119,44 +119,9 @@
 }
 
 
-/*
-** Decrement count of "C calls" and check for overflows. In case of
-** a stack overflow, check appropriate error ("regular" overflow or
-** overflow while handling stack overflow).  If 'nCcalls' is smaller
-** than CSTACKERR but larger than CSTACKMARK, it means it has just
-** entered the "overflow zone", so the function raises an overflow
-** error.  If 'nCcalls' is smaller than CSTACKMARK (which means it is
-** already handling an overflow) but larger than CSTACKERRMARK, does
-** not report an error (to allow message handling to work). Otherwise,
-** report a stack overflow while handling a stack overflow (probably
-** caused by a repeating error in the message handling function).
-*/
-
-void luaE_enterCcall (lua_State *L) {
-  int ncalls = getCcalls(L);
-  L->nCcalls--;
-  if (ncalls <= CSTACKERR) {  /* possible overflow? */
-    luaE_freeCI(L);  /* release unused CIs */
-    ncalls = getCcalls(L);  /* update call count */
-    if (ncalls <= CSTACKERR) {  /* still overflow? */
-      if (ncalls <= CSTACKERRMARK)  /* below error-handling zone? */
-        luaD_throw(L, LUA_ERRERR);  /* error while handling stack error */
-      else if (ncalls >= CSTACKMARK) {
-        /* not in error-handling zone; raise the error now */
-        L->nCcalls = (CSTACKMARK - 1);  /* enter error-handling zone */
-        luaG_runerror(L, "C stack overflow");
-      }
-      /* else stack is in the error-handling zone;
-         allow message handler to work */
-    }
-  }
-}
-
-
 CallInfo *luaE_extendCI (lua_State *L) {
   CallInfo *ci;
   lua_assert(L->ci->next == NULL);
-  luaE_enterCcall(L);
   ci = luaM_new(L, CallInfo);
   lua_assert(L->ci->next == NULL);
   L->ci->next = ci;
@@ -175,13 +140,11 @@
   CallInfo *ci = L->ci;
   CallInfo *next = ci->next;
   ci->next = NULL;
-  L->nCcalls += L->nci;  /* add removed elements back to 'nCcalls' */
   while ((ci = next) != NULL) {
     next = ci->next;
     luaM_free(L, ci);
     L->nci--;
   }
-  L->nCcalls -= L->nci;  /* adjust result */
 }
 
 
@@ -194,7 +157,6 @@
   CallInfo *next;
   if (ci == NULL)
     return;  /* no extra elements */
-  L->nCcalls += L->nci;  /* add removed elements back to 'nCcalls' */
   while ((next = ci->next) != NULL) {  /* two extra elements? */
     CallInfo *next2 = next->next;  /* next's next */
     ci->next = next2;  /* remove next from the list */
@@ -207,7 +169,6 @@
       ci = next2;  /* continue */
     }
   }
-  L->nCcalls -= L->nci;  /* adjust result */
 }
 
 
@@ -335,7 +296,7 @@
   setthvalue2s(L, L->top, L1);
   api_incr_top(L);
   preinit_thread(L1, g);
-  L1->nCcalls = getCcalls(L);
+  L1->nCcalls = 0;
   L1->hookmask = L->hookmask;
   L1->basehookcount = L->basehookcount;
   L1->hook = L->hook;
@@ -396,7 +357,7 @@
   preinit_thread(L, g);
   g->allgc = obj2gco(L);  /* by now, only object is the main thread */
   L->next = NULL;
-  g->Cstacklimit = L->nCcalls = LUAI_MAXCSTACK + CSTACKERR;
+  g->Cstacklimit = L->nCcalls = 0;
   incnny(L);  /* main thread is always non yieldable */
   g->frealloc = f;
   g->ud = ud;
diff --git a/lstate.h b/lstate.h
index c1c3820..983aa0d 100644
--- a/lstate.h
+++ b/lstate.h
@@ -144,12 +144,6 @@
 /* Decrement the number of non-yieldable calls */
 #define decnny(L)	((L)->nCcalls -= 0x10000)
 
-/* Increment the number of non-yieldable calls and decrement nCcalls */
-#define incXCcalls(L)	((L)->nCcalls += 0x10000 - CSTACKCF)
-
-/* Decrement the number of non-yieldable calls and increment nCcalls */
-#define decXCcalls(L)	((L)->nCcalls -= 0x10000 - CSTACKCF)
-
 
 
 
@@ -389,7 +383,6 @@
 LUAI_FUNC CallInfo *luaE_extendCI (lua_State *L);
 LUAI_FUNC void luaE_freeCI (lua_State *L);
 LUAI_FUNC void luaE_shrinkCI (lua_State *L);
-LUAI_FUNC void luaE_enterCcall (lua_State *L);
 LUAI_FUNC void luaE_warning (lua_State *L, const char *msg, int tocont);
 LUAI_FUNC void luaE_warnerror (lua_State *L, const char *where);
 
diff --git a/ltests.h b/ltests.h
index e9219e2..f8c4466 100644
--- a/ltests.h
+++ b/ltests.h
@@ -23,11 +23,6 @@
 #define LUAI_ASSERT
 
 
-
-/* compiled with -O0, Lua uses a lot of C stack space... */
-#undef LUAI_MAXCSTACK
-#define LUAI_MAXCSTACK		400
-
 /* to avoid warnings, and to make sure value is really unused */
 #define UNUSED(x)       (x=0, (void)(x))
 
diff --git a/luaconf.h b/luaconf.h
index bdf927e..229413d 100644
--- a/luaconf.h
+++ b/luaconf.h
@@ -36,8 +36,8 @@
 ** =====================================================================
 */
 
-/*
-@@ LUAI_MAXCSTACK defines the maximum depth for nested calls and
+/* >>> move back to llimits.h
+@@ LUAI_MAXCCALLS defines the maximum depth for nested calls and
 ** also limits the maximum depth of other recursive algorithms in
 ** the implementation, such as syntactic analysis. A value too
 ** large may allow the interpreter to crash (C-stack overflow).
@@ -46,8 +46,8 @@
 ** The test file 'cstack.lua' may help finding a good limit.
 ** (It will crash with a limit too high.)
 */
-#if !defined(LUAI_MAXCSTACK)
-#define LUAI_MAXCSTACK		2000
+#if !defined(LUAI_MAXCCALLS)
+#define LUAI_MAXCCALLS		200
 #endif
 
 
diff --git a/lvm.c b/lvm.c
index 08681af..a232e1e 100644
--- a/lvm.c
+++ b/lvm.c
@@ -229,7 +229,7 @@
         count /= l_castS2U(-(step + 1)) + 1u;
       }
       /* store the counter in place of the limit (which won't be
-         needed anymore */
+         needed anymore) */
       setivalue(plimit, l_castU2S(count));
     }
   }
@@ -1124,6 +1124,7 @@
 
 
 void luaV_execute (lua_State *L, CallInfo *ci) {
+  const CallInfo *origci = ci;
   LClosure *cl;
   TValue *k;
   StkId base;
@@ -1611,7 +1612,13 @@
         if (b != 0)  /* fixed number of arguments? */
           L->top = ra + b;  /* top signals number of arguments */
         /* else previous instruction set top */
-        ProtectNT(luaD_call(L, ra, nresults));
+        savepc(L);  /* in case of errors */
+        if (luaD_precall(L, ra, nresults))
+          updatetrap(ci);  /* C call; nothing else to be done */
+        else {  /* Lua call: run function in this same invocation */
+          ci = L->ci;
+          goto tailcall;
+        }
         vmbreak;
       }
       vmcase(OP_TAILCALL) {
@@ -1637,12 +1644,12 @@
           checkstackGCp(L, 1, ra);
         }
         if (!ttisLclosure(s2v(ra))) {  /* C function? */
-          luaD_call(L, ra, LUA_MULTRET);  /* call it */
+          luaD_precall(L, ra, LUA_MULTRET);  /* call it */
           updatetrap(ci);
           updatestack(ci);  /* stack may have been relocated */
           ci->func -= delta;
           luaD_poscall(L, ci, cast_int(L->top - ra));
-          return;
+          goto ret;
         }
         ci->func -= delta;
         luaD_pretailcall(L, ci, ra, b);  /* prepare call frame */
@@ -1665,7 +1672,7 @@
           ci->func -= ci->u.l.nextraargs + nparams1;
         L->top = ra + n;  /* set call for 'luaD_poscall' */
         luaD_poscall(L, ci, n);
-        return;
+        goto ret;
       }
       vmcase(OP_RETURN0) {
         if (L->hookmask) {
@@ -1679,7 +1686,7 @@
           while (nres-- > 0)
             setnilvalue(s2v(L->top++));  /* all results are nil */
         }
-        return;
+        goto ret;
       }
       vmcase(OP_RETURN1) {
         if (L->hookmask) {
@@ -1698,7 +1705,13 @@
               setnilvalue(s2v(L->top++));
           }
         }
-        return;
+       ret:
+        if (ci == origci)
+          return;
+        else {
+          ci = ci->previous;
+          goto tailcall;
+        }
       }
       vmcase(OP_FORLOOP) {
         if (ttisinteger(s2v(ra + 2))) {  /* integer loop? */
diff --git a/testes/all.lua b/testes/all.lua
index db074dd..a4feeec 100644
--- a/testes/all.lua
+++ b/testes/all.lua
@@ -127,8 +127,8 @@
   end
 
   Cstacklevel = function ()
-    local _, _, ncalls, nci = T.stacklevel()
-    return ncalls  + nci   -- number of free slots in the C stack
+    local _, _, ncalls = T.stacklevel()
+    return ncalls    -- number of C calls
   end
 end
 
diff --git a/testes/cstack.lua b/testes/cstack.lua
index 4e37b98..c1177f3 100644
--- a/testes/cstack.lua
+++ b/testes/cstack.lua
@@ -1,6 +1,8 @@
 -- $Id: testes/cstack.lua $
 -- See Copyright Notice in file all.lua
 
+do return end
+
 local debug = require "debug"
 
 print"testing C-stack overflow detection"
diff --git a/testes/errors.lua b/testes/errors.lua
index f9623b1..88918df 100644
--- a/testes/errors.lua
+++ b/testes/errors.lua
@@ -530,10 +530,9 @@
   if (finalresult) then
     assert(res() == finalresult)
   end
-  s = init .. string.rep(rep, 10000)
-  local res, msg = load(s)   -- 10000 levels not ok
-  assert(not res and (string.find(msg, "too many registers") or
-                      string.find(msg, "stack overflow")))
+  s = init .. string.rep(rep, 500)
+  local res, msg = load(s)   -- 500 levels not ok
+  assert(not res and string.find(msg, "too many"))
 end
 
 testrep("local a; a", ",a", "= 1", ",1")    -- multiple assignment