[base, smooth] LCD filtering cleanups.

* src/base/ftlcdlil.c (ft_lcd_filter_fir, _ft_lcd_filter_legacy):
Clean up, start filtering from the bottom-left origin.

* src/smooth/ftsmooth.c (ft_smooth_render_generic): Updated.
diff --git a/ChangeLog b/ChangeLog
index 90247f9..b5069b1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2017-06-17  Alexei Podtelezhnikov  <apodtele@gmail.com>
+
+	[base, smooth] LCD filtering cleanups.
+
+	* src/base/ftlcdlil.c (ft_lcd_filter_fir, _ft_lcd_filter_legacy):
+	Clean up, start filtering from the bottom-left origin.
+
+	* src/smooth/ftsmooth.c (ft_smooth_render_generic): Updated.
+
 2017-06-16  Werner Lemberg  <wl@gnu.org>
 
 	[truetype] Integer overflows.
diff --git a/src/base/ftlcdfil.c b/src/base/ftlcdfil.c
index 65dbf34..6d35e35 100644
--- a/src/base/ftlcdfil.c
+++ b/src/base/ftlcdfil.c
@@ -35,135 +35,113 @@
                      FT_Render_Mode       mode,
                      FT_LcdFiveTapFilter  weights )
   {
-    FT_UInt  width   = (FT_UInt)bitmap->width;
-    FT_UInt  height  = (FT_UInt)bitmap->rows;
+    FT_UInt   width  = (FT_UInt)bitmap->width;
+    FT_UInt   height = (FT_UInt)bitmap->rows;
+    FT_Int    pitch  = bitmap->pitch;
+    FT_Byte*  origin = bitmap->buffer;
 
 
+    /* take care of bitmap flow */
+    if ( pitch > 0 )
+      origin += pitch * (FT_Int)( height - 1 );
+
     /* horizontal in-place FIR filter */
-    if ( mode == FT_RENDER_MODE_LCD && width >= 4 )
+    if ( mode == FT_RENDER_MODE_LCD && width >= 2 )
     {
-      FT_Byte*  line = bitmap->buffer;
+      FT_Byte*  line = origin;
 
 
-      /* take care of bitmap flow */
-      if ( bitmap->pitch < 0 )
-        line -= bitmap->pitch * (FT_Int)( bitmap->rows - 1 );
+      /* `fir' must be at least 32 bit wide, since the sum of */
+      /* the values in `weights' can exceed 0xFF              */
 
-      /* `fir' and `pix' must be at least 32 bit wide, since the sum of */
-      /* the values in `weights' can exceed 0xFF                        */
-
-      for ( ; height > 0; height--, line += bitmap->pitch )
+      for ( ; height > 0; height--, line -= pitch )
       {
-        FT_UInt  fir[4];        /* below, `pix' is used as the 5th element */
-        FT_UInt  val1, xx;
+        FT_UInt  fir[5];
+        FT_UInt  val, xx;
 
 
-        val1   = line[0];
-        fir[0] = weights[2] * val1;
-        fir[1] = weights[3] * val1;
-        fir[2] = weights[4] * val1;
-        fir[3] = 0;
+        val    = line[0];
+        fir[2] = weights[2] * val;
+        fir[3] = weights[3] * val;
+        fir[4] = weights[4] * val;
 
-        val1    = line[1];
-        fir[0] += weights[1] * val1;
-        fir[1] += weights[2] * val1;
-        fir[2] += weights[3] * val1;
-        fir[3] += weights[4] * val1;
+        val    = line[1];
+        fir[1] = fir[2] + weights[1] * val;
+        fir[2] = fir[3] + weights[2] * val;
+        fir[3] = fir[4] + weights[3] * val;
+        fir[4] =          weights[4] * val;
 
         for ( xx = 2; xx < width; xx++ )
         {
-          FT_UInt  val, pix;
-
-
           val    = line[xx];
-          pix    = fir[0] + weights[0] * val;
-          fir[0] = fir[1] + weights[1] * val;
-          fir[1] = fir[2] + weights[2] * val;
-          fir[2] = fir[3] + weights[3] * val;
-          fir[3] =          weights[4] * val;
+          fir[0] = fir[1] + weights[0] * val;
+          fir[1] = fir[2] + weights[1] * val;
+          fir[2] = fir[3] + weights[2] * val;
+          fir[3] = fir[4] + weights[3] * val;
+          fir[4] =          weights[4] * val;
 
-          pix        >>= 8;
-          pix         |= (FT_UInt)-(FT_Int)( pix >> 8 );
-          line[xx - 2] = (FT_Byte)pix;
+          fir[0]     >>= 8;
+          fir[0]      |= (FT_UInt)-(FT_Int)( fir[0] >> 8 );
+          line[xx - 2] = (FT_Byte)fir[0];
         }
 
-        {
-          FT_UInt  pix;
+        fir[1]     >>= 8;
+        fir[1]      |= (FT_UInt)-(FT_Int)( fir[1] >> 8 );
+        line[xx - 2] = (FT_Byte)fir[1];
 
-
-          pix          = fir[0] >> 8;
-          pix         |= (FT_UInt)-(FT_Int)( pix >> 8 );
-          line[xx - 2] = (FT_Byte)pix;
-
-          pix          = fir[1] >> 8;
-          pix         |= (FT_UInt)-(FT_Int)( pix >> 8 );
-          line[xx - 1] = (FT_Byte)pix;
-        }
+        fir[2]     >>= 8;
+        fir[2]      |= (FT_UInt)-(FT_Int)( fir[2] >> 8 );
+        line[xx - 1] = (FT_Byte)fir[2];
       }
     }
 
     /* vertical in-place FIR filter */
-    else if ( mode == FT_RENDER_MODE_LCD_V && height >= 4 )
+    else if ( mode == FT_RENDER_MODE_LCD_V && height >= 2 )
     {
-      FT_Byte*  column = bitmap->buffer;
-      FT_Int    pitch  = bitmap->pitch;
+      FT_Byte*  column = origin;
 
 
-      /* take care of bitmap flow */
-      if ( bitmap->pitch < 0 )
-        column -= bitmap->pitch * (FT_Int)( bitmap->rows - 1 );
-
       for ( ; width > 0; width--, column++ )
       {
         FT_Byte*  col = column;
-        FT_UInt   fir[4];       /* below, `pix' is used as the 5th element */
-        FT_UInt   val1, yy;
+        FT_UInt   fir[5];
+        FT_UInt   val, yy;
 
 
-        val1   = col[0];
-        fir[0] = weights[2] * val1;
-        fir[1] = weights[3] * val1;
-        fir[2] = weights[4] * val1;
-        fir[3] = 0;
-        col   += pitch;
+        val    = col[0];
+        fir[2] = weights[2] * val;
+        fir[3] = weights[3] * val;
+        fir[4] = weights[4] * val;
+        col   -= pitch;
 
-        val1    = col[0];
-        fir[0] += weights[1] * val1;
-        fir[1] += weights[2] * val1;
-        fir[2] += weights[3] * val1;
-        fir[3] += weights[4] * val1;
-        col    += pitch;
+        val    = col[0];
+        fir[1] = fir[2] + weights[1] * val;
+        fir[2] = fir[3] + weights[2] * val;
+        fir[3] = fir[4] + weights[3] * val;
+        fir[4] =          weights[4] * val;
+        col   -= pitch;
 
-        for ( yy = 2; yy < height; yy++ )
+        for ( yy = 2; yy < height; yy++, col -= pitch )
         {
-          FT_UInt  val, pix;
-
-
           val    = col[0];
-          pix    = fir[0] + weights[0] * val;
-          fir[0] = fir[1] + weights[1] * val;
-          fir[1] = fir[2] + weights[2] * val;
-          fir[2] = fir[3] + weights[3] * val;
-          fir[3] =          weights[4] * val;
+          fir[0] = fir[1] + weights[0] * val;
+          fir[1] = fir[2] + weights[1] * val;
+          fir[2] = fir[3] + weights[2] * val;
+          fir[3] = fir[4] + weights[3] * val;
+          fir[4] =          weights[4] * val;
 
-          pix           >>= 8;
-          pix            |= (FT_UInt)-(FT_Int)( pix >> 8 );
-          col[-2 * pitch] = (FT_Byte)pix;
-          col            += pitch;
+          fir[0]        >>= 8;
+          fir[0]         |= (FT_UInt)-(FT_Int)( fir[0] >> 8 );
+          col[pitch * 2]  = (FT_Byte)fir[0];
         }
 
-        {
-          FT_UInt  pix;
+        fir[1]        >>= 8;
+        fir[1]         |= (FT_UInt)-(FT_Int)( fir[1] >> 8 );
+        col[pitch * 2]  = (FT_Byte)fir[1];
 
-
-          pix             = fir[0] >> 8;
-          pix            |= (FT_UInt)-(FT_Int)( pix >> 8 );
-          col[-2 * pitch] = (FT_Byte)pix;
-
-          pix         = fir[1] >> 8;
-          pix        |= (FT_UInt)-(FT_Int)( pix >> 8 );
-          col[-pitch] = (FT_Byte)pix;
-        }
+        fir[2]        >>= 8;
+        fir[2]         |= (FT_UInt)-(FT_Int)( fir[2] >> 8 );
+        col[pitch]      = (FT_Byte)fir[2];
       }
     }
   }
@@ -177,9 +155,10 @@
                          FT_Render_Mode  mode,
                          FT_Byte*        weights )
   {
-    FT_UInt  width  = (FT_UInt)bitmap->width;
-    FT_UInt  height = (FT_UInt)bitmap->rows;
-    FT_Int   pitch  = bitmap->pitch;
+    FT_UInt   width  = (FT_UInt)bitmap->width;
+    FT_UInt   height = (FT_UInt)bitmap->rows;
+    FT_Int    pitch  = bitmap->pitch;
+    FT_Byte*  origin = bitmap->buffer;
 
     static const unsigned int  filters[3][3] =
     {
@@ -191,33 +170,31 @@
     FT_UNUSED( weights );
 
 
+    /* take care of bitmap flow */
+    if ( pitch > 0 )
+      origin += pitch * (FT_Int)( height - 1 );
+
     /* horizontal in-place intra-pixel filter */
     if ( mode == FT_RENDER_MODE_LCD && width >= 3 )
     {
-      FT_Byte*  line = bitmap->buffer;
+      FT_Byte*  line = origin;
 
 
-      /* take care of bitmap flow */
-      if ( bitmap->pitch < 0 )
-        line -= bitmap->pitch * (FT_Int)( bitmap->rows - 1 );
-
-      for ( ; height > 0; height--, line += pitch )
+      for ( ; height > 0; height--, line -= pitch )
       {
         FT_UInt  xx;
 
 
         for ( xx = 0; xx < width; xx += 3 )
         {
-          FT_UInt  r = 0;
-          FT_UInt  g = 0;
-          FT_UInt  b = 0;
+          FT_UInt  r, g, b;
           FT_UInt  p;
 
 
           p  = line[xx];
-          r += filters[0][0] * p;
-          g += filters[0][1] * p;
-          b += filters[0][2] * p;
+          r  = filters[0][0] * p;
+          g  = filters[0][1] * p;
+          b  = filters[0][2] * p;
 
           p  = line[xx + 1];
           r += filters[1][0] * p;
@@ -237,31 +214,24 @@
     }
     else if ( mode == FT_RENDER_MODE_LCD_V && height >= 3 )
     {
-      FT_Byte*  column = bitmap->buffer;
+      FT_Byte*  column = origin;
 
 
-      /* take care of bitmap flow */
-      if ( bitmap->pitch < 0 )
-        column -= bitmap->pitch * (FT_Int)( bitmap->rows - 1 );
-
       for ( ; width > 0; width--, column++ )
       {
-        FT_Byte*  col     = column;
-        FT_Byte*  col_end = col + (FT_Int)height * pitch;
+        FT_Byte*  col = column - 2 * pitch;
 
 
-        for ( ; col < col_end; col += 3 * pitch )
+        for ( ; height > 0; height -= 3, col -= 3 * pitch )
         {
-          FT_UInt  r = 0;
-          FT_UInt  g = 0;
-          FT_UInt  b = 0;
+          FT_UInt  r, g, b;
           FT_UInt  p;
 
 
           p  = col[0];
-          r += filters[0][0] * p;
-          g += filters[0][1] * p;
-          b += filters[0][2] * p;
+          r  = filters[0][0] * p;
+          g  = filters[0][1] * p;
+          b  = filters[0][2] * p;
 
           p  = col[pitch];
           r += filters[1][0] * p;
@@ -275,7 +245,7 @@
 
           col[0]         = (FT_Byte)( r / 65536 );
           col[pitch]     = (FT_Byte)( g / 65536 );
-          col[2 * pitch] = (FT_Byte)( b / 65536 );
+          col[pitch * 2] = (FT_Byte)( b / 65536 );
         }
       }
     }
diff --git a/src/smooth/ftsmooth.c b/src/smooth/ftsmooth.c
index 061d251..fd12e66 100644
--- a/src/smooth/ftsmooth.c
+++ b/src/smooth/ftsmooth.c
@@ -207,10 +207,10 @@
 
       if ( vmul )
       {
-        cbox.yMax += lcd_weights[0] ? 43
-                                    : lcd_weights[1] ? 22 : 0;
-        cbox.yMin -= lcd_weights[4] ? 43
+        cbox.yMax += lcd_weights[4] ? 43
                                     : lcd_weights[3] ? 22 : 0;
+        cbox.yMin -= lcd_weights[0] ? 43
+                                    : lcd_weights[1] ? 22 : 0;
       }
     }
 #endif