[smooth] Minor speedup to smooth rasterizer

This speeds up the smooth rasterizer by avoiding a
conditional branches in the hot path. Namely:

- Define a fixed "null cell" which will be pointed
  to whenever the current cell is outside of the current
  target region. This avoids a "ras.cell != NULL"
  check in the FT_INTEGRATE() macro.

- Also use the null cell as a sentinel at the end of
  all ycells[] linked-lists, by setting its x coordinate
  to INT_MAX. This avoids a 'if (!cell)' check in
  gray_set_cell() as well.

- Slightly change the worker struct fields to perform
  a little less operations during rendering.

Example results (on a 2013 Corei5-3337U CPU)

  out/ftbench -p -s10 -t5 -bc /usr/share/fonts/truetype/droid/DroidSansFallbackFull.ttf

    Before: 5.472 us/op
    After:  5.275 us/op

  out/ftbench -p -s60 -t5 -bc /usr/share/fonts/truetype/droid/DroidSansFallbackFull.ttf

    Before: 17.988 us/op
    After:  17.389 us/op
diff --git a/ChangeLog b/ChangeLog
index 790ef56..ea269da 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,17 @@
 2021-07-15  David Turner  <david@freetype.org>
 
+	[smooth] Minor speedup to smooth rasterizer
+
+	This speeds up the smooth rasterizer by avoiding a conditional
+	branches in the hot path.
+
+	* src/smooth/ftgrays.c: Define a null cell used to both as a
+	sentinel for all linked-lists, and to accumulate coverage and
+	area values for "out-of-bounds" cell positions without a
+	conditional check.
+
+2021-07-15  David Turner  <david@freetype.org>
+
 	Replaces download-test-fonts.sh with download-test-fonts.py which
 	does the same work, and also avoids downloading anything if the
 	files are already installed with the right content.
diff --git a/src/smooth/ftgrays.c b/src/smooth/ftgrays.c
index 60cd5e0..e66ec34 100644
--- a/src/smooth/ftgrays.c
+++ b/src/smooth/ftgrays.c
@@ -479,19 +479,24 @@
   {
     ft_jmp_buf  jump_buffer;
 
-    TCoord  min_ex, max_ex;
+    TCoord  min_ex, max_ex;  /* min and max integer pixel coordinates */
     TCoord  min_ey, max_ey;
+    TCoord  count_ey;        /* same as (max_ey - min_ey) */
 
-    PCell       cell;
-    PCell*      ycells;
-    PCell       cells;
-    FT_PtrDist  max_cells;
-    FT_PtrDist  num_cells;
+    PCell       cell;        /* current cell                             */
+    PCell       cell_free;   /* call allocation next free slot           */
+    PCell       cell_limit;  /* cell allocation limit                    */
 
-    TPos    x,  y;
+    PCell*      ycells;      /* array of cell linked-lists, one per      */
+							 /* vertical coordinate in the current band. */
 
-    FT_Outline  outline;
-    TPixmap     target;
+    PCell       cells;       /* cell storage area     */
+    FT_PtrDist  max_cells;   /* cell storage capacity */
+
+    TPos        x,  y;       /* last point position */
+
+    FT_Outline  outline;     /* input outline */
+    TPixmap     target;      /* target pixmap */
 
     FT_Raster_Span_Func  render_span;
     void*                render_span_data;
@@ -502,21 +507,34 @@
 #pragma warning( pop )
 #endif
 
-
 #ifndef FT_STATIC_RASTER
 #define ras  (*worker)
 #else
   static gray_TWorker  ras;
 #endif
 
-#define FT_INTEGRATE( ras, a, b )                                       \
-           if ( ras.cell )                                              \
-             ras.cell->cover += (a), ras.cell->area += (a) * (TArea)(b)
+/* Return a pointer to the "null cell", used as a sentinel at the end   */
+/* of all ycells[] linked lists. Its x coordinate should be maximal     */
+/* to ensure no NULL checks are necessary when looking for an insertion */
+/* point in gray_set_cell(). Other loops should check the cell pointer  */
+/* with CELL_IS_NULL() to detect the end of the list.                   */
+#define NULL_CELL_PTR(ras)  (ras).cells
+
+/* The |x| value of the null cell. Must be the largest possible */
+/* integer value stored in a TCell.x field.                     */
+#define CELL_MAX_X_VALUE    INT_MAX
+
+/* Return true iff |cell| points to the null cell. */
+#define CELL_IS_NULL(cell)  ((cell)->x == CELL_MAX_X_VALUE)
+
+
+#define FT_INTEGRATE( ras, a, b )                                     \
+           ras.cell->cover += (a), ras.cell->area += (a) * (TArea)(b)
 
 
   typedef struct gray_TRaster_
   {
-    void*         memory;
+    void*  memory;
 
   } gray_TRaster, *gray_PRaster;
 
@@ -538,7 +556,7 @@
 
       printf( "%3d:", y );
 
-      for ( ; cell != NULL; cell = cell->next )
+      for ( ; !CELL_IS_NULL(cell); cell = cell->next )
         printf( " (%3d, c:%4d, a:%6d)",
                 cell->x, cell->cover, cell->area );
       printf( "\n" );
@@ -566,11 +584,12 @@
     /* Note that if a cell is to the left of the clipping region, it is    */
     /* actually set to the (min_ex-1) horizontal position.                 */
 
-    if ( ey >= ras.max_ey || ey < ras.min_ey || ex >= ras.max_ex )
-      ras.cell = NULL;
+    TCoord ey_index = ey - ras.min_ey;
+    if ( ey_index < 0 || ey_index >= ras.count_ey || ex >= ras.max_ex )
+      ras.cell = NULL_CELL_PTR(ras);
     else
     {
-      PCell*  pcell = ras.ycells + ey - ras.min_ey;
+      PCell*  pcell = ras.ycells + ey_index;
       PCell   cell;
 
 
@@ -580,7 +599,7 @@
       {
         cell = *pcell;
 
-        if ( !cell || cell->x > ex )
+        if ( cell->x > ex )
           break;
 
         if ( cell->x == ex )
@@ -589,11 +608,11 @@
         pcell = &cell->next;
       }
 
-      if ( ras.num_cells >= ras.max_cells )
+      /* insert new cell */
+      cell = ras.cell_free++;
+      if (cell >= ras.cell_limit)
         ft_longjmp( ras.jump_buffer, 1 );
 
-      /* insert new cell */
-      cell        = ras.cells + ras.num_cells++;
       cell->x     = ex;
       cell->area  = 0;
       cell->cover = 0;
@@ -1218,7 +1237,7 @@
       unsigned char*  line = ras.target.origin - ras.target.pitch * y;
 
 
-      for ( ; cell != NULL; cell = cell->next )
+      for ( ; !CELL_IS_NULL(cell); cell = cell->next )
       {
         if ( cover != 0 && cell->x > x )
         {
@@ -1266,7 +1285,7 @@
       TArea   area;
 
 
-      for ( ; cell != NULL; cell = cell->next )
+      for ( ; !CELL_IS_NULL(cell); cell = cell->next )
       {
         if ( cover != 0 && cell->x > x )
         {
@@ -1646,8 +1665,8 @@
       FT_TRACE7(( "band [%d..%d]: %ld cell%s\n",
                   ras.min_ey,
                   ras.max_ey,
-                  ras.num_cells,
-                  ras.num_cells == 1 ? "" : "s" ));
+                  ras.cell_free - ras.cells.,
+                  ras.cell_free - ras.cells == 1 ? "" : "s" ));
     }
     else
     {
@@ -1690,8 +1709,18 @@
 
     ras.cells     = buffer + n;
     ras.max_cells = (FT_PtrDist)( FT_MAX_GRAY_POOL - n );
+    ras.cell_limit = ras.cells + ras.max_cells;
     ras.ycells    = (PCell*)buffer;
 
+	/* Initialize the null cell is at the start of the 'cells' array. */
+	/* Note that this requires ras.cell_free initialization to skip   */
+	/* over the first entry in the array.                             */
+	PCell null_cell  = NULL_CELL_PTR(ras);
+	null_cell->x     = CELL_MAX_X_VALUE;
+	null_cell->area  = 0;
+	null_cell->cover = 0;
+	null_cell->next  = NULL;;
+
     for ( y = yMin; y < yMax; )
     {
       ras.min_ey = y;
@@ -1705,15 +1734,17 @@
       do
       {
         TCoord  width = band[0] - band[1];
+        TCoord  w;
         int     error;
 
+        for (w = 0; w < width; ++w)
+          ras.ycells[w] = null_cell;
 
-        FT_MEM_ZERO( ras.ycells, height * sizeof ( PCell ) );
-
-        ras.num_cells = 0;
-        ras.cell      = NULL;
+        ras.cell_free = ras.cells + 1;  /* NOTE: Skip over the null cell. */
+        ras.cell      = null_cell;
         ras.min_ey    = band[1];
         ras.max_ey    = band[0];
+        ras.count_ey  = width;
 
         error     = gray_convert_glyph_inner( RAS_VAR, continued );
         continued = 1;