poppler/TextOutputDev.cc - third_party/poppler - Git at Google

 //========================================================================
 //
 // TextOutputDev.cc
 //
 // Copyright 1997-2003 Glyph & Cog, LLC
 //
 //========================================================================

 //========================================================================
 //
 // Modified under the Poppler project - http://poppler.freedesktop.org
 //
 // All changes made under the Poppler project to this file are licensed
 // under GPL version 2 or later
 //
 // Copyright (C) 2005-2007 Kristian Høgsberg <krh@redhat.com>
 // Copyright (C) 2005 Nickolay V. Shmyrev <nshmyrev@yandex.ru>
 // Copyright (C) 2006-2008, 2011-2013 Carlos Garcia Campos <carlosgc@gnome.org>
 // Copyright (C) 2006, 2007, 2013 Ed Catmur <ed@catmur.co.uk>
 // Copyright (C) 2006 Jeff Muizelaar <jeff@infidigm.net>
 // Copyright (C) 2007, 2008, 2012, 2017 Adrian Johnson <ajohnson@redneon.com>
 // Copyright (C) 2008 Koji Otani <sho@bbr.jp>
 // Copyright (C) 2008, 2010-2012, 2014-2022, 2024 Albert Astals Cid <aacid@kde.org>
 // Copyright (C) 2008 Pino Toscano <pino@kde.org>
 // Copyright (C) 2008, 2010 Hib Eris <hib@hiberis.nl>
 // Copyright (C) 2009 Ross Moore <ross@maths.mq.edu.au>
 // Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net>
 // Copyright (C) 2010 Brian Ewins <brian.ewins@gmail.com>
 // Copyright (C) 2010, 2021 Marek Kasik <mkasik@redhat.com>
 // Copyright (C) 2010, 2020 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
 // Copyright (C) 2011 Sam Liao <phyomh@gmail.com>
 // Copyright (C) 2012 Horst Prote <prote@fmi.uni-stuttgart.de>
 // Copyright (C) 2012, 2013-2018 Jason Crain <jason@aquaticape.us>
 // Copyright (C) 2012 Peter Breitenlohner <peb@mppmu.mpg.de>
 // Copyright (C) 2013 José Aliste <jaliste@src.gnome.org>
 // Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de>
 // Copyright (C) 2013 Ed Catmur <ed@catmur.co.uk>
 // Copyright (C) 2016 Khaled Hosny <khaledhosny@eglug.org>
 // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
 // Copyright (C) 2018 Sanchit Anand <sanxchit@gmail.com>
 // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
 // Copyright (C) 2018-2022, 2024 Nelson Benítez León <nbenitezl@gmail.com>
 // Copyright (C) 2019 Christian Persch <chpe@src.gnome.org>
 // Copyright (C) 2019, 2022 Oliver Sander <oliver.sander@tu-dresden.de>
 // Copyright (C) 2019 Dan Shea <dan.shea@logical-innovations.com>
 // Copyright (C) 2021 Peter Williams <peter@newton.cx>
 // Copyright (C) 2024 Adam Sampson <ats@offog.org>
 // Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk>
 // Copyright (C) 2024 Stefan Brüns <stefan.bruens@rwth-aachen.de>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
 //
 //========================================================================

 #include <config.h>

 #include <cstdio>
 #include <cstdlib>
 #include <cstddef>
 #include <cmath>
 #include <cfloat>
 #include <cctype>
 #include <algorithm>
 #if defined(_WIN32) || defined(__CYGWIN__)
 #    include <fcntl.h> // for O_BINARY
 #    include <io.h> // for _setmode
 #endif
 #include "goo/gfile.h"
 #include "goo/gmem.h"
 #include "goo/GooString.h"
 #include "poppler-config.h"
 #include "Error.h"
 #include "GlobalParams.h"
 #include "UnicodeMap.h"
 #include "UnicodeTypeTable.h"
 #include "Link.h"
 #include "TextOutputDev.h"
 #include "Page.h"
 #include "Annot.h"
 #include "UTF.h"

 //------------------------------------------------------------------------
 // parameters
 //------------------------------------------------------------------------

 // Each bucket in a text pool includes baselines within a range of
 // this many points.
 #define textPoolStep 4

 // Inter-character space width which will cause addChar to start a new
 // word.
 #define minWordBreakSpace 0.1

 // Negative inter-character space width, i.e., overlap, which will
 // cause addChar to start a new word.
 #define minDupBreakOverlap 0.2

 // Max distance between baselines of two lines within a block, as a
 // fraction of the font size.
 #define maxLineSpacingDelta 1.5

 // Max difference in primary font sizes on two lines in the same
 // block.  Delta1 is used when examining new lines above and below the
 // current block; delta2 is used when examining text that overlaps the
 // current block; delta3 is used when examining text to the left and
 // right of the current block.
 #define maxBlockFontSizeDelta1 0.05
 #define maxBlockFontSizeDelta2 0.6
 #define maxBlockFontSizeDelta3 0.2

 // Max difference in font sizes inside a word.
 #define maxWordFontSizeDelta 0.05

 // Maximum distance between baselines of two words on the same line,
 // e.g., distance between subscript or superscript and the primary
 // baseline, as a fraction of the font size.
 #define maxIntraLineDelta 0.5

 // Minimum inter-word spacing, as a fraction of the font size.  (Only
 // used for raw ordering.)
 #define minWordSpacing 0.15

 // Maximum inter-word spacing, as a fraction of the font size.
 #define maxWordSpacing 1.5

 // Maximum horizontal spacing which will allow a word to be pulled
 // into a block, as a fraction of the font size.
 // This default value can be tweaked via API.
 double TextOutputDev::minColSpacing1_default = 0.7;

 // Minimum spacing between columns, as a fraction of the font size.
 #define minColSpacing2 1.0

 // Maximum vertical spacing between blocks within a flow, as a
 // multiple of the font size.
 #define maxBlockSpacing 2.5

 // Minimum spacing between characters within a word, as a fraction of
 // the font size.
 #define minCharSpacing -0.5

 // Maximum spacing between characters within a word, as a fraction of
 // the font size, when there is no obvious extra-wide character
 // spacing.
 #define maxCharSpacing 0.03

 // When extra-wide character spacing is detected, the inter-character
 // space threshold is set to the minimum inter-character space
 // multiplied by this constant.
 #define maxWideCharSpacingMul 1.3

 // Upper limit on spacing between characters in a word.
 #define maxWideCharSpacing 0.4

 // Max difference in primary,secondary coordinates (as a fraction of
 // the font size) allowed for duplicated text (fake boldface, drop
 // shadows) which is to be discarded.
 #define dupMaxPriDelta 0.1
 #define dupMaxSecDelta 0.2

 // Max width of underlines (in points).
 #define maxUnderlineWidth 3

 // Min distance between baseline and underline (in points).
 //~ this should be font-size-dependent
 #define minUnderlineGap -2

 // Max distance between baseline and underline (in points).
 //~ this should be font-size-dependent
 #define maxUnderlineGap 4

 // Max horizontal distance between edge of word and start of underline
 // (in points).
 //~ this should be font-size-dependent
 #define underlineSlack 1

 // Max distance between edge of text and edge of link border
 #define hyperlinkSlack 2

 // Max distance between characters when combining a base character and
 // combining character
 #define combMaxMidDelta 0.3
 #define combMaxBaseDelta 0.4

 // Text is considered diagonal if abs(tan(angle)) > diagonalThreshold.
 // (Or 1/tan(angle) for 90/270 degrees.)
 #define diagonalThreshold 0.1

 // How opaque a selection on a glyphless font should be. Since the font is
 // glyphless and overlaid over text in image form, this must enable users
 // to read the underlying image. Issue #157
 #define glyphlessSelectionOpacity 0.4

 // Returns whether x is between a and b or equal to a or b.
 // a and b don't need to be sorted.
 #define XBetweenAB(x, a, b) (!(((x) > (a) && (x) > (b)) || ((x) < (a) && (x) < (b))) ? true : false)

 namespace {

 inline bool isAscii7(Unicode uchar)
 {
     return uchar < 128;
 }

 }

 static int reorderText(const Unicode *text, int len, const UnicodeMap *uMap, bool primaryLR, GooString *s, Unicode *u)
 {
     char lre[8], rle[8], popdf[8], buf[8];
     int lreLen = 0, rleLen = 0, popdfLen = 0, n;
     int nCols, i, j, k;

     nCols = 0;

     if (s) {
         lreLen = uMap->mapUnicode(0x202a, lre, sizeof(lre));
         rleLen = uMap->mapUnicode(0x202b, rle, sizeof(rle));
         popdfLen = uMap->mapUnicode(0x202c, popdf, sizeof(popdf));
     }

     if (primaryLR) {
         i = 0;
         while (i < len) {
             // output a left-to-right section
             for (j = i; j < len && !unicodeTypeR(text[j]); ++j) {
                 ;
             }
             for (k = i; k < j; ++k) {
                 if (s) {
                     n = uMap->mapUnicode(text[k], buf, sizeof(buf));
                     s->append(buf, n);
                 }
                 if (u) {
                     u[nCols] = text[k];
                 }
                 ++nCols;
             }
             i = j;
             // output a right-to-left section
             for (j = i; j < len && !(unicodeTypeL(text[j]) || unicodeTypeNum(text[j])); ++j) {
                 ;
             }
             if (j > i) {
                 if (s) {
                     s->append(rle, rleLen);
                 }
                 for (k = j - 1; k >= i; --k) {
                     if (s) {
                         n = uMap->mapUnicode(text[k], buf, sizeof(buf));
                         s->append(buf, n);
                     }
                     if (u) {
                         u[nCols] = text[k];
                     }
                     ++nCols;
                 }
                 if (s) {
                     s->append(popdf, popdfLen);
                 }
                 i = j;
             }
         }
     } else {
         // Note: This code treats numeric characters (European and
         // Arabic/Indic) as left-to-right, which isn't strictly correct
         // (incurs extra LRE/POPDF pairs), but does produce correct
         // visual formatting.
         if (s) {
             s->append(rle, rleLen);
         }
         i = len - 1;
         while (i >= 0) {
             // output a right-to-left section
             for (j = i; j >= 0 && !(unicodeTypeL(text[j]) || unicodeTypeNum(text[j])); --j) {
                 ;
             }
             for (k = i; k > j; --k) {
                 if (s) {
                     n = uMap->mapUnicode(text[k], buf, sizeof(buf));
                     s->append(buf, n);
                 }
                 if (u) {
                     u[nCols] = text[k];
                 }
                 ++nCols;
             }
             i = j;
             // output a left-to-right section
             for (j = i; j >= 0 && !unicodeTypeR(text[j]); --j) {
                 ;
             }
             if (j < i) {
                 if (s) {
                     s->append(lre, lreLen);
                 }
                 for (k = j + 1; k <= i; ++k) {
                     if (s) {
                         n = uMap->mapUnicode(text[k], buf, sizeof(buf));
                         s->append(buf, n);
                     }
                     if (u) {
                         u[nCols] = text[k];
                     }
                     ++nCols;
                 }
                 if (s) {
                     s->append(popdf, popdfLen);
                 }
                 i = j;
             }
         }
         if (s) {
             s->append(popdf, popdfLen);
         }
     }

     return nCols;
 }

 //------------------------------------------------------------------------
 // TextUnderline
 //------------------------------------------------------------------------

 class TextUnderline
 {
 public:
     TextUnderline(double x0A, double y0A, double x1A, double y1A)
     {
         x0 = x0A;
         y0 = y0A;
         x1 = x1A;
         y1 = y1A;
         horiz = y0 == y1;
     }
     ~TextUnderline() { }

     double x0, y0, x1, y1;
     bool horiz;
 };

 //------------------------------------------------------------------------
 // TextLink
 //------------------------------------------------------------------------

 class TextLink
 {
 public:
     TextLink(int xMinA, int yMinA, int xMaxA, int yMaxA, AnnotLink *linkA)
     {
         xMin = xMinA;
         yMin = yMinA;
         xMax = xMaxA;
         yMax = yMaxA;
         link = linkA;
     }
     ~TextLink() { }

     int xMin, yMin, xMax, yMax;
     AnnotLink *link;
 };

 //------------------------------------------------------------------------
 // TextFontInfo
 //------------------------------------------------------------------------

 TextFontInfo::TextFontInfo(const GfxState *state)
 {
     gfxFont = state->getFont();
 #ifdef TEXTOUT_WORD_LIST
     fontName = (gfxFont && gfxFont->getName()) ? new GooString(*gfxFont->getName()) : nullptr;
     flags = gfxFont ? gfxFont->getFlags() : 0;
 #endif
 }

 TextFontInfo::~TextFontInfo()
 {
 #ifdef TEXTOUT_WORD_LIST
     if (fontName) {
         delete fontName;
     }
 #endif
 }

 bool TextFontInfo::matches(const GfxState *state) const
 {
     return state->getFont() == gfxFont;
 }

 bool TextFontInfo::matches(const TextFontInfo *fontInfo) const
 {
     return gfxFont == fontInfo->gfxFont;
 }

 bool TextFontInfo::matches(const Ref *ref) const
 {
     return gfxFont && (*(gfxFont->getID()) == *ref);
 }

 double TextFontInfo::getAscent() const
 {
     return gfxFont ? gfxFont->getAscent() : 0.95;
 }

 double TextFontInfo::getDescent() const
 {
     return gfxFont ? gfxFont->getDescent() : -0.35;
 }

 int TextFontInfo::getWMode() const
 {
     return gfxFont ? gfxFont->getWMode() : 0;
 }

 //------------------------------------------------------------------------
 // TextWord
 //------------------------------------------------------------------------

 TextWord::TextWord(const GfxState *state, int rotA, double fontSizeA)
 {
     rot = rotA;
     fontSize = fontSizeA;
     spaceAfter = false;
     next = nullptr;
     invisible = state->getRender() == 3;

 #ifdef TEXTOUT_WORD_LIST
     GfxRGB rgb;

     if ((state->getRender() & 3) == 1) {
         state->getStrokeRGB(&rgb);
     } else {
         state->getFillRGB(&rgb);
     }
     colorR = colToDbl(rgb.r);
     colorG = colToDbl(rgb.g);
     colorB = colToDbl(rgb.b);
 #endif

     underlined = false;
     link = nullptr;
 }

 TextWord::~TextWord() { }

 void TextWord::addChar(const GfxState *state, TextFontInfo *fontA, double x, double y, double dx, double dy, int charPosA, int charLen, CharCode c, Unicode u, const Matrix &textMatA)
 {
     chars.push_back(CharInfo { u, c, charPosA, 0.0, fontA, textMatA });
     charPosEnd = charPosA + charLen;

     if (len() == 1) {
         setInitialBounds(fontA, x, y);
     }

     if (wMode) { // vertical writing mode
         // NB: the rotation value has been incremented by 1 (in
         // TextPage::beginWord()) for vertical writing mode
         switch (rot) {
         case 0:
             chars.back().edge = x - fontSize;
             xMax = edgeEnd = x;
             break;
         case 1:
             chars.back().edge = y - fontSize;
             yMax = edgeEnd = y;
             break;
         case 2:
             chars.back().edge = x + fontSize;
             xMin = edgeEnd = x;
             break;
         case 3:
             chars.back().edge = y + fontSize;
             yMin = edgeEnd = y;
             break;
         }
     } else { // horizontal writing mode
         switch (rot) {
         case 0:
             chars.back().edge = x;
             xMax = edgeEnd = x + dx;
             break;
         case 1:
             chars.back().edge = y;
             yMax = edgeEnd = y + dy;
             break;
         case 2:
             chars.back().edge = x;
             xMin = edgeEnd = x + dx;
             break;
         case 3:
             chars.back().edge = y;
             yMin = edgeEnd = y + dy;
             break;
         }
     }
 }

 void TextWord::setInitialBounds(TextFontInfo *fontA, double x, double y)
 {
     double ascent = fontA->getAscent() * fontSize;
     double descent = fontA->getDescent() * fontSize;
     wMode = fontA->getWMode();

     if (wMode) { // vertical writing mode
         // NB: the rotation value has been incremented by 1 (in
         // TextPage::beginWord()) for vertical writing mode
         switch (rot) {
         case 0:
             xMin = x - fontSize;
             yMin = y - fontSize;
             yMax = y;
             base = y;
             break;
         case 1:
             xMin = x;
             yMin = y - fontSize;
             xMax = x + fontSize;
             base = x;
             break;
         case 2:
             yMin = y;
             xMax = x + fontSize;
             yMax = y + fontSize;
             base = y;
             break;
         case 3:
             xMin = x - fontSize;
             xMax = x;
             yMax = y + fontSize;
             base = x;
             break;
         }
     } else { // horizontal writing mode
         switch (rot) {
         case 0:
             xMin = x;
             yMin = y - ascent;
             yMax = y - descent;
             if (yMin == yMax) {
                 // this is a sanity check for a case that shouldn't happen -- but
                 // if it does happen, we want to avoid dividing by zero later
                 yMin = y;
                 yMax = y + 1;
             }
             base = y;
             break;
         case 1:
             xMin = x + descent;
             yMin = y;
             xMax = x + ascent;
             if (xMin == xMax) {
                 // this is a sanity check for a case that shouldn't happen -- but
                 // if it does happen, we want to avoid dividing by zero later
                 xMin = x;
                 xMax = x + 1;
             }
             base = x;
             break;
         case 2:
             yMin = y + descent;
             xMax = x;
             yMax = y + ascent;
             if (yMin == yMax) {
                 // this is a sanity check for a case that shouldn't happen -- but
                 // if it does happen, we want to avoid dividing by zero later
                 yMin = y;
                 yMax = y + 1;
             }
             base = y;
             break;
         case 3:
             xMin = x - ascent;
             xMax = x - descent;
             yMax = y;
             if (xMin == xMax) {
                 // this is a sanity check for a case that shouldn't happen -- but
                 // if it does happen, we want to avoid dividing by zero later
                 xMin = x;
                 xMax = x + 1;
             }
             base = x;
             break;
         }
     }
 }

 struct CombiningTable
 {
     Unicode base;
     Unicode comb;
 };

 static const struct CombiningTable combiningTable[] = {
     { 0x0060, 0x0300 }, // grave
     { 0x00a8, 0x0308 }, // dieresis
     { 0x00af, 0x0304 }, // macron
     { 0x00b4, 0x0301 }, // acute
     { 0x00b8, 0x0327 }, // cedilla
     { 0x02c6, 0x0302 }, // circumflex
     { 0x02c7, 0x030c }, // caron
     { 0x02d8, 0x0306 }, // breve
     { 0x02d9, 0x0307 }, // dotaccent
     { 0x02da, 0x030a }, // ring
     { 0x02dc, 0x0303 }, // tilde
     { 0x02dd, 0x030b } // hungarumlaut (double acute accent)
 };

 // returning combining versions of characters
 static Unicode getCombiningChar(Unicode u)
 {
     for (const CombiningTable &combining : combiningTable) {
         if (u == combining.base) {
             return combining.comb;
         }
     }
     return 0;
 }

 bool TextWord::addCombining(const GfxState *state, TextFontInfo *fontA, double fontSizeA, double x, double y, double dx, double dy, int charPosA, int charLen, CharCode c, Unicode u, const Matrix &textMatA)
 {
     if (chars.empty() || wMode != 0 || fontA->getWMode() != 0) {
         return false;
     }

     Unicode cCurrent = getCombiningChar(u);
     if (cCurrent != 0 && unicodeTypeAlphaNum(chars.back().text)) {
         // Current is a combining character, previous is base character
         double maxScaledMidDelta = fabs(edgeEnd - chars.back().edge) * combMaxMidDelta;
         double charMid, charBase, maxScaledBaseDelta;

         // Test if characters overlap
         if (rot == 0 || rot == 2) {
             charMid = x + (dx / 2);
             charBase = y;
             maxScaledBaseDelta = (yMax - yMin) * combMaxBaseDelta;
         } else {
             charMid = y + (dy / 2);
             charBase = x;
             maxScaledBaseDelta = (xMax - xMin) * combMaxBaseDelta;
         }

         double edgeMid = (chars.back().edge + edgeEnd) / 2;
         if (fabs(charMid - edgeMid) >= maxScaledMidDelta || fabs(charBase - base) >= maxScaledBaseDelta) {
             return false;
         }

         // Add character, but don't adjust edge / bounding box because
         // combining character's positioning could be odd.
         chars.emplace_back(CharInfo { cCurrent, c, charPosA, edgeMid, fontA, textMatA });
         charPosEnd = charPosA + charLen;

         return true;
     }

     Unicode cPrev = getCombiningChar(chars.back().text);
     if (cPrev != 0 && unicodeTypeAlphaNum(u)) {
         // Previous is a combining character, current is base character
         double maxScaledBaseDelta = (fontA->getAscent() - fontA->getDescent()) * fontSizeA * combMaxBaseDelta;
         double charMid, charBase, maxScaledMidDelta;

         // Test if characters overlap
         if (rot == 0 || rot == 2) {
             charMid = x + (dx / 2);
             charBase = y;
             maxScaledMidDelta = fabs(dx * combMaxMidDelta);
         } else {
             charMid = y + (dy / 2);
             charBase = x;
             maxScaledMidDelta = fabs(dy * combMaxMidDelta);
         }

         double edgeMid = (chars.back().edge + edgeEnd) / 2;
         if (fabs(charMid - edgeMid) >= maxScaledMidDelta || fabs(charBase - base) >= maxScaledBaseDelta) {
             return false;
         }

         fontSize = fontSizeA;
         // move combining character to after base character
         chars.emplace_back(CharInfo { cPrev, chars.back().charcode, charPosA, edgeMid, chars.back().font, chars.back().textMat });

         auto &lastChar = chars[chars.size() - 2];

         charPosEnd = charPosA + charLen;
         lastChar.text = u;
         lastChar.charcode = c;
         lastChar.font = fontA;
         lastChar.textMat = textMatA;

         if (len() == 2) {
             setInitialBounds(fontA, x, y);
         }

         // Updated edges / bounding box because we changed the base
         // character.
         if (wMode) {
             // FIXME unreachable, wMode == 0
             switch (rot) {
             case 0:
                 lastChar.edge = x - fontSize;
                 xMax = edgeEnd = x;
                 break;
             case 1:
                 lastChar.edge = y - fontSize;
                 yMax = edgeEnd = y;
                 break;
             case 2:
                 lastChar.edge = x + fontSize;
                 xMin = edgeEnd = x;
                 break;
             case 3:
                 lastChar.edge = y + fontSize;
                 yMin = edgeEnd = y;
                 break;
             }
         } else {
             switch (rot) {
             case 0:
                 lastChar.edge = x;
                 xMax = edgeEnd = x + dx;
                 break;
             case 1:
                 lastChar.edge = y;
                 yMax = edgeEnd = y + dy;
                 break;
             case 2:
                 lastChar.edge = x;
                 xMin = edgeEnd = x + dx;
                 break;
             case 3:
                 lastChar.edge = y;
                 yMin = edgeEnd = y + dy;
                 break;
             }
         }

         chars.back().edge = (edgeEnd + lastChar.edge) / 2;
         return true;
     }
     return false;
 }

 void TextWord::merge(TextWord *word)
 {
     if (word->xMin < xMin) {
         xMin = word->xMin;
     }
     if (word->yMin < yMin) {
         yMin = word->yMin;
     }
     if (word->xMax > xMax) {
         xMax = word->xMax;
     }
     if (word->yMax > yMax) {
         yMax = word->yMax;
     }
     chars.insert(chars.end(), word->chars.begin(), word->chars.end());
     edgeEnd = word->edgeEnd;
     charPosEnd = word->charPosEnd;
 }

 inline int TextWord::primaryCmp(const TextWord *word) const
 {
     double cmp;

     cmp = 0; // make gcc happy
     switch (rot) {
     case 0:
         cmp = xMin - word->xMin;
         break;
     case 1:
         cmp = yMin - word->yMin;
         break;
     case 2:
         cmp = word->xMax - xMax;
         break;
     case 3:
         cmp = word->yMax - yMax;
         break;
     }
     return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
 }

 double TextWord::primaryDelta(const TextWord *word) const
 {
     double delta;

     delta = 0; // make gcc happy
     switch (rot) {
     case 0:
         delta = word->xMin - xMax;
         break;
     case 1:
         delta = word->yMin - yMax;
         break;
     case 2:
         delta = xMin - word->xMax;
         break;
     case 3:
         delta = yMin - word->yMax;
         break;
     }
     return delta;
 }

 int TextWord::cmpYX(const void *p1, const void *p2)
 {
     TextWord *word1 = *(TextWord **)p1;
     TextWord *word2 = *(TextWord **)p2;
     double cmp;

     cmp = word1->yMin - word2->yMin;
     if (cmp == 0) {
         cmp = word1->xMin - word2->xMin;
     }
     return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
 }

 #ifdef TEXTOUT_WORD_LIST

 GooString *TextWord::getText() const
 {
     GooString *s;
     const UnicodeMap *uMap;
     char buf[8];

     s = new GooString();
     if (!(uMap = globalParams->getTextEncoding())) {
         return s;
     }
     for (size_t i = 0; i < len(); ++i) {
         auto n = uMap->mapUnicode(chars[i].text, buf, sizeof(buf));
         s->append(buf, n);
     }
     return s;
 }

 void TextWord::getCharBBox(int charIdx, double *xMinA, double *yMinA, double *xMaxA, double *yMaxA) const
 {
     if (charIdx < 0) {
         return;
     }
     size_t uCharIdx = charIdx;
     if (uCharIdx >= len()) {
         return;
     }
     auto startingEdge = chars[uCharIdx].edge;
     auto endingEdge = (uCharIdx + 1 == len()) ? edgeEnd : chars[charIdx + 1].edge;
     switch (rot) {
     case 0:
         *xMinA = startingEdge;
         *xMaxA = endingEdge;
         *yMinA = yMin;
         *yMaxA = yMax;
         break;
     case 1:
         *xMinA = xMin;
         *xMaxA = xMax;
         *yMinA = startingEdge;
         *yMaxA = endingEdge;
         break;
     case 2:
         *xMinA = endingEdge;
         *xMaxA = startingEdge;
         *yMinA = yMin;
         *yMaxA = yMax;
         break;
     case 3:
         *xMinA = xMin;
         *xMaxA = xMax;
         *yMinA = endingEdge;
         *yMaxA = startingEdge;
         break;
     }
 }

 #endif // TEXTOUT_WORD_LIST

 //------------------------------------------------------------------------
 // TextPool
 //------------------------------------------------------------------------

 TextPool::TextPool()
 {
     minBaseIdx = 0;
     maxBaseIdx = -1;
     pool = nullptr;
     cursor = nullptr;
     cursorBaseIdx = -1;
 }

 TextPool::~TextPool()
 {
     int baseIdx;
     TextWord *word, *word2;

     for (baseIdx = minBaseIdx; baseIdx <= maxBaseIdx; ++baseIdx) {
         for (word = pool[baseIdx - minBaseIdx]; word; word = word2) {
             word2 = word->next;
             delete word;
         }
     }
     gfree(pool);
 }

 int TextPool::getBaseIdx(double base) const
 {
     const double baseIdxDouble = base / textPoolStep;
     if (std::isnan(baseIdxDouble) || baseIdxDouble < minBaseIdx) {
         return minBaseIdx;
     }
     if (baseIdxDouble > maxBaseIdx) {
         return maxBaseIdx;
     }
     return (int)baseIdxDouble;
 }

 void TextPool::addWord(TextWord *word)
 {
     int wordBaseIdx, newMinBaseIdx, newMaxBaseIdx, baseIdx;
     TextWord *w0, *w1;

     // expand the array if needed
     wordBaseIdx = (int)(word->base / textPoolStep);
     if (unlikely(wordBaseIdx <= INT_MIN + 128 || wordBaseIdx >= INT_MAX - 128)) {
         error(errSyntaxWarning, -1, "wordBaseIdx out of range");
         delete word;
         return;
     }
     if (minBaseIdx > maxBaseIdx) {
         minBaseIdx = wordBaseIdx - 128;
         maxBaseIdx = wordBaseIdx + 128;
         pool = (TextWord **)gmallocn(maxBaseIdx - minBaseIdx + 1, sizeof(TextWord *));
         for (baseIdx = minBaseIdx; baseIdx <= maxBaseIdx; ++baseIdx) {
             pool[baseIdx - minBaseIdx] = nullptr;
         }
     } else if (wordBaseIdx < minBaseIdx) {
         newMinBaseIdx = wordBaseIdx - 128;
         TextWord **newPool = (TextWord **)gmallocn_checkoverflow(maxBaseIdx - newMinBaseIdx + 1, sizeof(TextWord *));
         if (unlikely(!newPool)) {
             error(errSyntaxWarning, -1, "newPool would overflow");
             delete word;
             return;
         }
         for (baseIdx = newMinBaseIdx; baseIdx < minBaseIdx; ++baseIdx) {
             newPool[baseIdx - newMinBaseIdx] = nullptr;
         }
         memcpy(&newPool[minBaseIdx - newMinBaseIdx], pool, (maxBaseIdx - minBaseIdx + 1) * sizeof(TextWord *));
         gfree(pool);
         pool = newPool;
         minBaseIdx = newMinBaseIdx;
     } else if (wordBaseIdx > maxBaseIdx) {
         newMaxBaseIdx = wordBaseIdx + 128;
         TextWord **reallocatedPool = (TextWord **)greallocn(pool, newMaxBaseIdx - minBaseIdx + 1, sizeof(TextWord *), true /*checkoverflow*/, false /*free_pool*/);
         if (!reallocatedPool) {
             error(errSyntaxWarning, -1, "new pool size would overflow");
             delete word;
             return;
         }
         pool = reallocatedPool;
         for (baseIdx = maxBaseIdx + 1; baseIdx <= newMaxBaseIdx; ++baseIdx) {
             pool[baseIdx - minBaseIdx] = nullptr;
         }
         maxBaseIdx = newMaxBaseIdx;
     }

     // insert the new word
     if (cursor && wordBaseIdx == cursorBaseIdx && word->primaryCmp(cursor) >= 0) {
         w0 = cursor;
         w1 = cursor->next;
     } else {
         w0 = nullptr;
         w1 = pool[wordBaseIdx - minBaseIdx];
     }
     for (; w1 && word->primaryCmp(w1) > 0; w0 = w1, w1 = w1->next) {
         ;
     }
     word->next = w1;
     if (w0) {
         w0->next = word;
     } else {
         pool[wordBaseIdx - minBaseIdx] = word;
     }
     cursor = word;
     cursorBaseIdx = wordBaseIdx;
 }

 //------------------------------------------------------------------------
 // TextLine
 //------------------------------------------------------------------------

 TextLine::TextLine(TextBlock *blkA, int rotA, double baseA)
 {
     blk = blkA;
     rot = rotA;
     base = baseA;
     words = lastWord = nullptr;
     text = nullptr;
     edge = nullptr;
     col = nullptr;
     len = 0;
     convertedLen = 0;
     hyphenated = false;
     next = nullptr;
     xMin = yMin = 0;
     xMax = yMax = -1;
     normalized = nullptr;
     normalized_len = 0;
     normalized_idx = nullptr;
     ascii_translation = nullptr;
     ascii_len = 0;
     ascii_idx = nullptr;
 }

 TextLine::~TextLine()
 {
     TextWord *word;

     while (words) {
         word = words;
         words = words->next;
         delete word;
     }
     gfree(text);
     gfree(edge);
     gfree(col);
     if (normalized) {
         gfree(normalized);
         gfree(normalized_idx);
     }
     if (ascii_translation) {
         gfree(ascii_translation);
         gfree(ascii_idx);
     }
 }

 void TextLine::addWord(TextWord *word)
 {
     if (lastWord) {
         lastWord->next = word;
     } else {
         words = word;
     }
     lastWord = word;

     if (xMin > xMax) {
         xMin = word->xMin;
         xMax = word->xMax;
         yMin = word->yMin;
         yMax = word->yMax;
     } else {
         if (word->xMin < xMin) {
             xMin = word->xMin;
         }
         if (word->xMax > xMax) {
             xMax = word->xMax;
         }
         if (word->yMin < yMin) {
             yMin = word->yMin;
         }
         if (word->yMax > yMax) {
             yMax = word->yMax;
         }
     }
 }

 double TextLine::primaryDelta(const TextLine *line) const
 {
     double delta;

     delta = 0; // make gcc happy
     switch (rot) {
     case 0:
         delta = line->xMin - xMax;
         break;
     case 1:
         delta = line->yMin - yMax;
         break;
     case 2:
         delta = xMin - line->xMax;
         break;
     case 3:
         delta = yMin - line->yMax;
         break;
     }
     return delta;
 }

 int TextLine::primaryCmp(const TextLine *line) const
 {
     double cmp;

     cmp = 0; // make gcc happy
     switch (rot) {
     case 0:
         cmp = xMin - line->xMin;
         break;
     case 1:
         cmp = yMin - line->yMin;
         break;
     case 2:
         cmp = line->xMax - xMax;
         break;
     case 3:
         cmp = line->yMax - yMax;
         break;
     }
     return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
 }

 int TextLine::secondaryCmp(const TextLine *line) const
 {
     double cmp;

     cmp = (rot == 0 || rot == 3) ? base - line->base : line->base - base;
     return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
 }

 int TextLine::cmpYX(const TextLine *line) const
 {
     int cmp;

     if ((cmp = secondaryCmp(line))) {
         return cmp;
     }
     return primaryCmp(line);
 }

 int TextLine::cmpXY(const void *p1, const void *p2)
 {
     TextLine *line1 = *(TextLine **)p1;
     TextLine *line2 = *(TextLine **)p2;
     int cmp;

     if ((cmp = line1->primaryCmp(line2))) {
         return cmp;
     }
     return line1->secondaryCmp(line2);
 }

 void TextLine::coalesce(const UnicodeMap *uMap)
 {
     double space, delta, minSpace;
     bool isUnicode;
     char buf[8];

     if (words->next) {

         // compute the inter-word space threshold
         if (words->len() > 1 || words->next->len() > 1) {
             minSpace = 0;
         } else {
             minSpace = words->primaryDelta(words->next);
             for (auto word0 = words->next, word1 = word0->next; word1 && minSpace > 0; word0 = word1, word1 = word0->next) {
                 if (word1->len() > 1) {
                     minSpace = 0;
                 }
                 delta = word0->primaryDelta(word1);
                 if (delta < minSpace) {
                     minSpace = delta;
                 }
             }
         }
         if (minSpace <= 0) {
             space = maxCharSpacing * words->fontSize;
         } else {
             space = maxWideCharSpacingMul * minSpace;
             if (space > maxWideCharSpacing * words->fontSize) {
                 space = maxWideCharSpacing * words->fontSize;
             }
         }

         // merge words
         auto word0 = words;
         auto word1 = words->next;
         while (word1) {
             if (word0->primaryDelta(word1) >= space) {
                 word0->spaceAfter = true;
                 word0 = word1;
                 word1 = word1->next;
             } else if (word0->chars.back().font == word1->chars.front().font //
                        && word0->underlined == word1->underlined //
                        && fabs(word0->fontSize - word1->fontSize) < maxWordFontSizeDelta * words->fontSize //
                        && word1->chars.front().charPos == word0->charPosEnd) {
                 word0->merge(word1);
                 word0->next = word1->next;
                 delete word1;
                 word1 = word0->next;
             } else {
                 word0 = word1;
                 word1 = word1->next;
             }
         }
     }

     // build the line text
     isUnicode = uMap ? uMap->isUnicode() : false;
     len = 0;
     for (auto word1 = words; word1; word1 = word1->next) {
         len += word1->len();
         if (word1->spaceAfter) {
             ++len;
         }
     }
     text = (Unicode *)gmallocn(len, sizeof(Unicode));
     edge = (double *)gmallocn(len + 1, sizeof(double));
     size_t i = 0;
     for (auto word1 = words; word1; word1 = word1->next) {
         for (size_t j = 0; j < word1->len(); ++j) {
             text[i] = word1->chars[j].text;
             edge[i] = word1->chars[j].edge;
             ++i;
         }
         edge[i] = word1->edgeEnd;
         if (word1->spaceAfter) {
             text[i] = (Unicode)0x0020;
             ++i;
         }
     }

     // compute convertedLen and set up the col array
     col = (int *)gmallocn(len + 1, sizeof(int));
     convertedLen = 0;
     for (int ci = 0; ci < len; ++ci) {
         col[ci] = convertedLen;
         if (isUnicode) {
             ++convertedLen;
         } else if (uMap) {
             convertedLen += uMap->mapUnicode(text[ci], buf, sizeof(buf));
         }
     }
     col[len] = convertedLen;

     // check for hyphen at end of line
     //~ need to check for other chars used as hyphens
     hyphenated = text[len - 1] == (Unicode)'-';
 }

 //------------------------------------------------------------------------
 // TextLineFrag
 //------------------------------------------------------------------------

 class TextLineFrag
 {
 public:
     TextLine *line; // the line object
     int start, len; // offset and length of this fragment
                     //   (in Unicode chars)
     double xMin, xMax; // bounding box coordinates
     double yMin, yMax;
     double base; // baseline virtual coordinate
     int col; // first column

     void init(TextLine *lineA, int startA, int lenA);
     void computeCoords(bool oneRot);

     static int cmpYXPrimaryRot(const void *p1, const void *p2);
     static int cmpYXLineRot(const void *p1, const void *p2);
     static int cmpXYLineRot(const void *p1, const void *p2);
     static int cmpXYColumnPrimaryRot(const void *p1, const void *p2);
     static int cmpXYColumnLineRot(const void *p1, const void *p2);
 };

 void TextLineFrag::init(TextLine *lineA, int startA, int lenA)
 {
     line = lineA;
     start = startA;
     len = lenA;
     col = line->col[start];
 }

 void TextLineFrag::computeCoords(bool oneRot)
 {
     TextBlock *blk;
     double d0, d1, d2, d3, d4;

     if (oneRot) {

         switch (line->rot) {
         case 0:
             xMin = line->edge[start];
             xMax = line->edge[start + len];
             yMin = line->yMin;
             yMax = line->yMax;
             break;
         case 1:
             xMin = line->xMin;
             xMax = line->xMax;
             yMin = line->edge[start];
             yMax = line->edge[start + len];
             break;
         case 2:
             xMin = line->edge[start + len];
             xMax = line->edge[start];
             yMin = line->yMin;
             yMax = line->yMax;
             break;
         case 3:
             xMin = line->xMin;
             xMax = line->xMax;
             yMin = line->edge[start + len];
             yMax = line->edge[start];
             break;
         }
         base = line->base;

     } else {

         if (line->rot == 0 && line->blk->page->primaryRot == 0) {

             xMin = line->edge[start];
             xMax = line->edge[start + len];
             yMin = line->yMin;
             yMax = line->yMax;
             base = line->base;

         } else {

             blk = line->blk;
             d0 = line->edge[start];
             d1 = line->edge[start + len];
             d2 = d3 = d4 = 0; // make gcc happy

             switch (line->rot) {
             case 0:
                 d2 = line->yMin;
                 d3 = line->yMax;
                 d4 = line->base;
                 d0 = (d0 - blk->xMin) / (blk->xMax - blk->xMin);
                 d1 = (d1 - blk->xMin) / (blk->xMax - blk->xMin);
                 d2 = (d2 - blk->yMin) / (blk->yMax - blk->yMin);
                 d3 = (d3 - blk->yMin) / (blk->yMax - blk->yMin);
                 d4 = (d4 - blk->yMin) / (blk->yMax - blk->yMin);
                 break;
             case 1:
                 d2 = line->xMax;
                 d3 = line->xMin;
                 d4 = line->base;
                 d0 = (d0 - blk->yMin) / (blk->yMax - blk->yMin);
                 d1 = (d1 - blk->yMin) / (blk->yMax - blk->yMin);
                 d2 = (blk->xMax - d2) / (blk->xMax - blk->xMin);
                 d3 = (blk->xMax - d3) / (blk->xMax - blk->xMin);
                 d4 = (blk->xMax - d4) / (blk->xMax - blk->xMin);
                 break;
             case 2:
                 d2 = line->yMax;
                 d3 = line->yMin;
                 d4 = line->base;
                 d0 = (blk->xMax - d0) / (blk->xMax - blk->xMin);
                 d1 = (blk->xMax - d1) / (blk->xMax - blk->xMin);
                 d2 = (blk->yMax - d2) / (blk->yMax - blk->yMin);
                 d3 = (blk->yMax - d3) / (blk->yMax - blk->yMin);
                 d4 = (blk->yMax - d4) / (blk->yMax - blk->yMin);
                 break;
             case 3:
                 d2 = line->xMin;
                 d3 = line->xMax;
                 d4 = line->base;
                 d0 = (blk->yMax - d0) / (blk->yMax - blk->yMin);
                 d1 = (blk->yMax - d1) / (blk->yMax - blk->yMin);
                 d2 = (d2 - blk->xMin) / (blk->xMax - blk->xMin);
                 d3 = (d3 - blk->xMin) / (blk->xMax - blk->xMin);
                 d4 = (d4 - blk->xMin) / (blk->xMax - blk->xMin);
                 break;
             }

             switch (line->blk->page->primaryRot) {
             case 0:
                 xMin = blk->xMin + d0 * (blk->xMax - blk->xMin);
                 xMax = blk->xMin + d1 * (blk->xMax - blk->xMin);
                 yMin = blk->yMin + d2 * (blk->yMax - blk->yMin);
                 yMax = blk->yMin + d3 * (blk->yMax - blk->yMin);
                 base = blk->yMin + d4 * (blk->yMax - blk->yMin);
                 break;
             case 1:
                 xMin = blk->xMax - d3 * (blk->xMax - blk->xMin);
                 xMax = blk->xMax - d2 * (blk->xMax - blk->xMin);
                 yMin = blk->yMin + d0 * (blk->yMax - blk->yMin);
                 yMax = blk->yMin + d1 * (blk->yMax - blk->yMin);
                 base = blk->xMax - d4 * (blk->xMax - blk->xMin);
                 break;
             case 2:
                 xMin = blk->xMax - d1 * (blk->xMax - blk->xMin);
                 xMax = blk->xMax - d0 * (blk->xMax - blk->xMin);
                 yMin = blk->yMax - d3 * (blk->yMax - blk->yMin);
                 yMax = blk->yMax - d2 * (blk->yMax - blk->yMin);
                 base = blk->yMax - d4 * (blk->yMax - blk->yMin);
                 break;
             case 3:
                 xMin = blk->xMin + d2 * (blk->xMax - blk->xMin);
                 xMax = blk->xMin + d3 * (blk->xMax - blk->xMin);
                 yMin = blk->yMax - d1 * (blk->yMax - blk->yMin);
                 yMax = blk->yMax - d0 * (blk->yMax - blk->yMin);
                 base = blk->xMin + d4 * (blk->xMax - blk->xMin);
                 break;
             }
         }
     }
 }

 int TextLineFrag::cmpYXPrimaryRot(const void *p1, const void *p2)
 {
     TextLineFrag *frag1 = (TextLineFrag *)p1;
     TextLineFrag *frag2 = (TextLineFrag *)p2;
     double cmp;

     cmp = 0; // make gcc happy
     switch (frag1->line->blk->page->primaryRot) {
     case 0:
         if (fabs(cmp = frag1->yMin - frag2->yMin) < 0.01) {
             cmp = frag1->xMin - frag2->xMin;
         }
         break;
     case 1:
         if (fabs(cmp = frag2->xMax - frag1->xMax) < 0.01) {
             cmp = frag1->yMin - frag2->yMin;
         }
         break;
     case 2:
         if (fabs(cmp = frag2->yMin - frag1->yMin) < 0.01) {
             cmp = frag2->xMax - frag1->xMax;
         }
         break;
     case 3:
         if (fabs(cmp = frag1->xMax - frag2->xMax) < 0.01) {
             cmp = frag2->yMax - frag1->yMax;
         }
         break;
     }
     return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
 }

 int TextLineFrag::cmpYXLineRot(const void *p1, const void *p2)
 {
     TextLineFrag *frag1 = (TextLineFrag *)p1;
     TextLineFrag *frag2 = (TextLineFrag *)p2;
     double cmp;

     cmp = 0; // make gcc happy
     switch (frag1->line->rot) {
     case 0:
         if ((cmp = frag1->yMin - frag2->yMin) == 0) {
             cmp = frag1->xMin - frag2->xMin;
         }
         break;
     case 1:
         if ((cmp = frag2->xMax - frag1->xMax) == 0) {
             cmp = frag1->yMin - frag2->yMin;
         }
         break;
     case 2:
         if ((cmp = frag2->yMin - frag1->yMin) == 0) {
             cmp = frag2->xMax - frag1->xMax;
         }
         break;
     case 3:
         if ((cmp = frag1->xMax - frag2->xMax) == 0) {
             cmp = frag2->yMax - frag1->yMax;
         }
         break;
     }
     return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
 }

 int TextLineFrag::cmpXYLineRot(const void *p1, const void *p2)
 {
     TextLineFrag *frag1 = (TextLineFrag *)p1;
     TextLineFrag *frag2 = (TextLineFrag *)p2;
     double cmp;

     cmp = 0; // make gcc happy
     switch (frag1->line->rot) {
     case 0:
         if ((cmp = frag1->xMin - frag2->xMin) == 0) {
             cmp = frag1->yMin - frag2->yMin;
         }
         break;
     case 1:
         if ((cmp = frag1->yMin - frag2->yMin) == 0) {
             cmp = frag2->xMax - frag1->xMax;
         }
         break;
     case 2:
         if ((cmp = frag2->xMax - frag1->xMax) == 0) {
             cmp = frag2->yMin - frag1->yMin;
         }
         break;
     case 3:
         if ((cmp = frag2->yMax - frag1->yMax) == 0) {
             cmp = frag1->xMax - frag2->xMax;
         }
         break;
     }
     return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
 }

 int TextLineFrag::cmpXYColumnPrimaryRot(const void *p1, const void *p2)
 {
     TextLineFrag *frag1 = (TextLineFrag *)p1;
     TextLineFrag *frag2 = (TextLineFrag *)p2;
     double cmp;

     // if columns overlap, compare y values
     if (frag1->col < frag2->col + (frag2->line->col[frag2->start + frag2->len] - frag2->line->col[frag2->start]) && frag2->col < frag1->col + (frag1->line->col[frag1->start + frag1->len] - frag1->line->col[frag1->start])) {
         cmp = 0; // make gcc happy
         switch (frag1->line->blk->page->primaryRot) {
         case 0:
             cmp = frag1->yMin - frag2->yMin;
             break;
         case 1:
             cmp = frag2->xMax - frag1->xMax;
             break;
         case 2:
             cmp = frag2->yMin - frag1->yMin;
             break;
         case 3:
             cmp = frag1->xMax - frag2->xMax;
             break;
         }
         return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
     }

     // otherwise, compare starting column
     return frag1->col - frag2->col;
 }

 int TextLineFrag::cmpXYColumnLineRot(const void *p1, const void *p2)
 {
     TextLineFrag *frag1 = (TextLineFrag *)p1;
     TextLineFrag *frag2 = (TextLineFrag *)p2;
     double cmp;

     // if columns overlap, compare y values
     if (frag1->col < frag2->col + (frag2->line->col[frag2->start + frag2->len] - frag2->line->col[frag2->start]) && frag2->col < frag1->col + (frag1->line->col[frag1->start + frag1->len] - frag1->line->col[frag1->start])) {
         cmp = 0; // make gcc happy
         switch (frag1->line->rot) {
         case 0:
             cmp = frag1->yMin - frag2->yMin;
             break;
         case 1:
             cmp = frag2->xMax - frag1->xMax;
             break;
         case 2:
             cmp = frag2->yMin - frag1->yMin;
             break;
         case 3:
             cmp = frag1->xMax - frag2->xMax;
             break;
         }
         return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
     }

     // otherwise, compare starting column
     return frag1->col - frag2->col;
 }

 //------------------------------------------------------------------------
 // TextBlock
 //------------------------------------------------------------------------

 TextBlock::TextBlock(TextPage *pageA, int rotA)
 {
     page = pageA;
     rot = rotA;
     xMin = yMin = 0;
     xMax = yMax = -1;
     priMin = 0;
     priMax = page->pageWidth;
     pool = new TextPool();
     lines = nullptr;
     curLine = nullptr;
     next = nullptr;
     stackNext = nullptr;
     tableId = -1;
     tableEnd = false;
 }

 TextBlock::~TextBlock()
 {
     TextLine *line;

     delete pool;
     while (lines) {
         line = lines;
         lines = lines->next;
         delete line;
     }
 }

 void TextBlock::addWord(TextWord *word)
 {
     pool->addWord(word);
     if (xMin > xMax) {
         xMin = word->xMin;
         xMax = word->xMax;
         yMin = word->yMin;
         yMax = word->yMax;
     } else {
         if (word->xMin < xMin) {
             xMin = word->xMin;
         }
         if (word->xMax > xMax) {
             xMax = word->xMax;
         }
         if (word->yMin < yMin) {
             yMin = word->yMin;
         }
         if (word->yMax > yMax) {
             yMax = word->yMax;
         }
     }
 }

 void TextBlock::coalesce(const UnicodeMap *uMap, double fixedPitch)
 {
     // discard duplicated text (fake boldface, drop shadows)
     for (int idx0 = pool->minBaseIdx; idx0 <= pool->maxBaseIdx; ++idx0) {
         // Get the first LHS word from the pool
         TextWord *word0 = pool->getPool(idx0);

         while (word0) {
             double priDelta = dupMaxPriDelta * word0->fontSize;
             double secDelta = dupMaxSecDelta * word0->fontSize;
             double xDelta = ((rot == 0) || (rot == 2)) ? priDelta : secDelta;
             double yDelta = ((rot == 0) || (rot == 2)) ? secDelta : priDelta;

             int maxBaseIdx = pool->getBaseIdx(word0->base + secDelta);

             for (int idx1 = idx0; idx1 <= maxBaseIdx; idx1++) {
                 TextWord *prevWord;
                 /* In case the RHS word is from the same pool as the LHS word,
                  * start the inner loop with the word following the LHS word.
                  * Otherwise, start with the second word from the subsequent pools
                  * - the first word is compared at the end.
                  */
                 if (idx0 == idx1) {
                     prevWord = word0;
                 } else {
                     prevWord = pool->getPool(idx1);
                     if (!prevWord) {
                         continue;
                     }
                 }
                 TextWord *word1 = prevWord->next;

                 auto equalText = [](const TextWord &w1, const TextWord &w2) -> bool { //
                     return std::equal(w1.chars.begin(), w1.chars.end(), w2.chars.begin(), w2.chars.end(), //
                                       [](auto c1, auto c2) { return c1.text == c2.text; });
                 };
                 auto match = [&equalText, xDelta, yDelta](const TextWord &w1, const TextWord &w2) -> bool {
                     if (!equalText(w1, w2)) {
                         return false;
                     }
                     return fabs(w1.xMin - w2.xMin) < xDelta && fabs(w1.xMax - w2.xMax) < xDelta //
                             && fabs(w1.yMin - w2.yMin) < yDelta && fabs(w1.yMax - w2.yMax) < yDelta;
                 };

                 while (word1) {
                     if (match(*word0, *word1)) {
                         prevWord->next = word1->next;
                         delete word1;
                         word1 = prevWord->next;
                     } else {
                         prevWord = word1;
                         word1 = word1->next;
                     }
                 }

                 // Check the first word from each subsequent pool
                 if (idx0 != idx1) {
                     word1 = pool->getPool(idx1);
                 }
                 if (word1 && match(*word0, *word1)) {
                     pool->setPool(idx1, word1->next);
                     delete word1;
                 }
             }

             word0 = word0->next;
         }
     }

     TextWord *word0, *word1;
     TextWord *bestWord0, *bestWord1, *lastWord;
     TextLine *line, *line0, *line1;
     TextLine **lineArray;
     int poolMinBaseIdx, startBaseIdx, minBaseIdx, maxBaseIdx;
     int baseIdx, bestWordBaseIdx;
     double minBase, maxBase;
     double fontSize, wordSpacing, delta;
     bool overlap;
     int col1, col2;
     int i, j, k;

     // build the lines
     curLine = nullptr;
     poolMinBaseIdx = pool->minBaseIdx;
     charCount = 0;
     nLines = 0;
     while (true) {

         // find the first non-empty line in the pool
         for (; poolMinBaseIdx <= pool->maxBaseIdx && !pool->getPool(poolMinBaseIdx); ++poolMinBaseIdx) {
             ;
         }
         if (poolMinBaseIdx > pool->maxBaseIdx) {
             break;
         }

         // look for the left-most word in the first four lines of the
         // pool -- this avoids starting with a superscript word
         startBaseIdx = poolMinBaseIdx;
         for (baseIdx = poolMinBaseIdx + 1; baseIdx < poolMinBaseIdx + 4 && baseIdx <= pool->maxBaseIdx; ++baseIdx) {
             if (!pool->getPool(baseIdx)) {
                 continue;
             }
             if (pool->getPool(baseIdx)->primaryCmp(pool->getPool(startBaseIdx)) < 0) {
                 startBaseIdx = baseIdx;
             }
         }

         // create a new line
         word0 = pool->getPool(startBaseIdx);
         pool->setPool(startBaseIdx, word0->next);
         word0->next = nullptr;
         line = new TextLine(this, word0->rot, word0->base);
         line->addWord(word0);
         lastWord = word0;

         // compute the search range
         fontSize = word0->fontSize;
         minBase = word0->base - maxIntraLineDelta * fontSize;
         maxBase = word0->base + maxIntraLineDelta * fontSize;
         minBaseIdx = pool->getBaseIdx(minBase);
         maxBaseIdx = pool->getBaseIdx(maxBase);
         wordSpacing = fixedPitch ? fixedPitch : maxWordSpacing * fontSize;

         // find the rest of the words in this line
         while (true) {

             // find the left-most word whose baseline is in the range for
             // this line
             bestWordBaseIdx = 0;
             bestWord0 = bestWord1 = nullptr;
             overlap = false;
             for (baseIdx = minBaseIdx; !overlap && baseIdx <= maxBaseIdx; ++baseIdx) {
                 for (word0 = nullptr, word1 = pool->getPool(baseIdx); word1; word0 = word1, word1 = word1->next) {
                     if (word1->base >= minBase && word1->base <= maxBase) {
                         delta = lastWord->primaryDelta(word1);
                         if (delta < minCharSpacing * fontSize) {
                             overlap = true;
                             break;
                         } else {
                             if (delta < wordSpacing && (!bestWord1 || word1->primaryCmp(bestWord1) < 0)) {
                                 bestWordBaseIdx = baseIdx;
                                 bestWord0 = word0;
                                 bestWord1 = word1;
                             }
                             break;
                         }
                     }
                 }
             }
             if (overlap || !bestWord1) {
                 break;
             }

             // remove it from the pool, and add it to the line
             if (bestWord0) {
                 bestWord0->next = bestWord1->next;
             } else {
                 pool->setPool(bestWordBaseIdx, bestWord1->next);
             }
             bestWord1->next = nullptr;
             line->addWord(bestWord1);
             lastWord = bestWord1;
         }

         // add the line
         if (curLine && line->cmpYX(curLine) > 0) {
             line0 = curLine;
             line1 = curLine->next;
         } else {
             line0 = nullptr;
             line1 = lines;
         }
         for (; line1 && line->cmpYX(line1) > 0; line0 = line1, line1 = line1->next) {
             ;
         }
         if (line0) {
             line0->next = line;
         } else {
             lines = line;
         }
         line->next = line1;
         curLine = line;
         line->coalesce(uMap);
         charCount += line->len;
         ++nLines;
     }

     // sort lines into xy order for column assignment
     lineArray = (TextLine **)gmallocn(nLines, sizeof(TextLine *));
     for (line = lines, i = 0; line; line = line->next, ++i) {
         lineArray[i] = line;
     }
     qsort(lineArray, nLines, sizeof(TextLine *), &TextLine::cmpXY);

     // column assignment
     nColumns = 0;
     if (fixedPitch) {
         for (i = 0; i < nLines; ++i) {
             line0 = lineArray[i];
             col1 = 0; // make gcc happy
             switch (rot) {
             case 0:
                 col1 = (int)((line0->xMin - xMin) / fixedPitch + 0.5);
                 break;
             case 1:
                 col1 = (int)((line0->yMin - yMin) / fixedPitch + 0.5);
                 break;
             case 2:
                 col1 = (int)((xMax - line0->xMax) / fixedPitch + 0.5);
                 break;
             case 3:
                 col1 = (int)((yMax - line0->yMax) / fixedPitch + 0.5);
                 break;
             }
             for (k = 0; k <= line0->len; ++k) {
                 line0->col[k] += col1;
             }
             if (line0->col[line0->len] > nColumns) {
                 nColumns = line0->col[line0->len];
             }
         }
     } else {
         for (i = 0; i < nLines; ++i) {
             line0 = lineArray[i];
             col1 = 0;
             for (j = 0; j < i; ++j) {
                 line1 = lineArray[j];
                 if (line1->primaryDelta(line0) >= 0) {
                     col2 = line1->col[line1->len] + 1;
                 } else {
                     k = 0; // make gcc happy
                     switch (rot) {
                     case 0:
                         for (k = 0; k < line1->len && line0->xMin >= 0.5 * (line1->edge[k] + line1->edge[k + 1]); ++k) {
                             ;
                         }
                         break;
                     case 1:
                         for (k = 0; k < line1->len && line0->yMin >= 0.5 * (line1->edge[k] + line1->edge[k + 1]); ++k) {
                             ;
                         }
                         break;
                     case 2:
                         for (k = 0; k < line1->len && line0->xMax <= 0.5 * (line1->edge[k] + line1->edge[k + 1]); ++k) {
                             ;
                         }
                         break;
                     case 3:
                         for (k = 0; k < line1->len && line0->yMax <= 0.5 * (line1->edge[k] + line1->edge[k + 1]); ++k) {
                             ;
                         }
                         break;
                     }
                     col2 = line1->col[k];
                 }
                 if (col2 > col1) {
                     col1 = col2;
                 }
             }
             for (k = 0; k <= line0->len; ++k) {
                 line0->col[k] += col1;
             }
             if (line0->col[line0->len] > nColumns) {
                 nColumns = line0->col[line0->len];
             }
         }
     }
     gfree(lineArray);
 }

 void TextBlock::updatePriMinMax(const TextBlock *blk)
 {
     double newPriMin, newPriMax;
     bool gotPriMin, gotPriMax;

     gotPriMin = gotPriMax = false;
     newPriMin = newPriMax = 0; // make gcc happy
     switch (page->primaryRot) {
     case 0:
     case 2:
         if (blk->yMin < yMax && blk->yMax > yMin) {
             if (blk->xMin < xMin) {
                 newPriMin = blk->xMax;
                 gotPriMin = true;
             }
             if (blk->xMax > xMax) {
                 newPriMax = blk->xMin;
                 gotPriMax = true;
             }
         }
         break;
     case 1:
     case 3:
         if (blk->xMin < xMax && blk->xMax > xMin) {
             if (blk->yMin < yMin) {
                 newPriMin = blk->yMax;
                 gotPriMin = true;
             }
             if (blk->yMax > yMax) {
                 newPriMax = blk->yMin;
                 gotPriMax = true;
             }
         }
         break;
     }
     if (gotPriMin) {
         if (newPriMin > xMin) {
             newPriMin = xMin;
         }
         if (newPriMin > priMin) {
             priMin = newPriMin;
         }
     }
     if (gotPriMax) {
         if (newPriMax < xMax) {
             newPriMax = xMax;
         }
         if (newPriMax < priMax) {
             priMax = newPriMax;
         }
     }
 }

 int TextBlock::cmpXYPrimaryRot(const void *p1, const void *p2)
 {
     TextBlock *blk1 = *(TextBlock **)p1;
     TextBlock *blk2 = *(TextBlock **)p2;
     double cmp;

     cmp = 0; // make gcc happy
     switch (blk1->page->primaryRot) {
     case 0:
         if ((cmp = blk1->xMin - blk2->xMin) == 0) {
             cmp = blk1->yMin - blk2->yMin;
         }
         break;
     case 1:
         if ((cmp = blk1->yMin - blk2->yMin) == 0) {
             cmp = blk2->xMax - blk1->xMax;
         }
         break;
     case 2:
         if ((cmp = blk2->xMax - blk1->xMax) == 0) {
             cmp = blk2->yMin - blk1->yMin;
         }
         break;
     case 3:
         if ((cmp = blk2->yMax - blk1->yMax) == 0) {
             cmp = blk1->xMax - blk2->xMax;
         }
         break;
     }
     return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
 }

 int TextBlock::cmpYXPrimaryRot(const void *p1, const void *p2)
 {
     TextBlock *blk1 = *(TextBlock **)p1;
     TextBlock *blk2 = *(TextBlock **)p2;
     double cmp;

     cmp = 0; // make gcc happy
     switch (blk1->page->primaryRot) {
     case 0:
         if ((cmp = blk1->yMin - blk2->yMin) == 0) {
             cmp = blk1->xMin - blk2->xMin;
         }
         break;
     case 1:
         if ((cmp = blk2->xMax - blk1->xMax) == 0) {
             cmp = blk1->yMin - blk2->yMin;
         }
         break;
     case 2:
         if ((cmp = blk2->yMin - blk1->yMin) == 0) {
             cmp = blk2->xMax - blk1->xMax;
         }
         break;
     case 3:
         if ((cmp = blk1->xMax - blk2->xMax) == 0) {
             cmp = blk2->yMax - blk1->yMax;
         }
         break;
     }
     return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
 }

 int TextBlock::primaryCmp(const TextBlock *blk) const
 {
     double cmp;

     cmp = 0; // make gcc happy
     switch (rot) {
     case 0:
         cmp = xMin - blk->xMin;
         break;
     case 1:
         cmp = yMin - blk->yMin;
         break;
     case 2:
         cmp = blk->xMax - xMax;
         break;
     case 3:
         cmp = blk->yMax - yMax;
         break;
     }
     return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
 }

 double TextBlock::secondaryDelta(const TextBlock *blk) const
 {
     double delta;

     delta = 0; // make gcc happy
     switch (rot) {
     case 0:
         delta = blk->yMin - yMax;
         break;
     case 1:
         delta = xMin - blk->xMax;
         break;
     case 2:
         delta = yMin - blk->yMax;
         break;
     case 3:
         delta = blk->xMin - xMax;
         break;
     }
     return delta;
 }

 bool TextBlock::isBelow(const TextBlock *blk) const
 {
     bool below;

     below = false; // make gcc happy
     switch (page->primaryRot) {
     case 0:
         below = xMin >= blk->priMin && xMax <= blk->priMax && yMin > blk->yMin;
         break;
     case 1:
         below = yMin >= blk->priMin && yMax <= blk->priMax && xMax < blk->xMax;
         break;
     case 2:
         below = xMin >= blk->priMin && xMax <= blk->priMax && yMax < blk->yMax;
         break;
     case 3:
         below = yMin >= blk->priMin && yMax <= blk->priMax && xMin > blk->xMin;
         break;
     }

     return below;
 }

 bool TextBlock::isBeforeByRule1(const TextBlock *blk1)
 {
     bool before = false;
     bool overlap = false;

     switch (this->page->primaryRot) {
     case 0:
     case 2:
         overlap = ((this->ExMin <= blk1->ExMin) && (blk1->ExMin <= this->ExMax)) || ((blk1->ExMin <= this->ExMin) && (this->ExMin <= blk1->ExMax));
         break;
     case 1:
     case 3:
         overlap = ((this->EyMin <= blk1->EyMin) && (blk1->EyMin <= this->EyMax)) || ((blk1->EyMin <= this->EyMin) && (this->EyMin <= blk1->EyMax));
         break;
     }
     switch (this->page->primaryRot) {
     case 0:
         before = overlap && this->EyMin < blk1->EyMin;
         break;
     case 1:
         before = overlap && this->ExMax > blk1->ExMax;
         break;
     case 2:
         before = overlap && this->EyMax > blk1->EyMax;
         break;
     case 3:
         before = overlap && this->ExMin < blk1->ExMin;
         break;
     }
     return before;
 }

 bool TextBlock::isBeforeByRule2(const TextBlock *blk1)
 {
     double cmp = 0;
     int rotLR = rot;

     if (!page->primaryLR) {
         rotLR = (rotLR + 2) % 4;
     }

     switch (rotLR) {
     case 0:
         cmp = ExMax - blk1->ExMin;
         break;
     case 1:
         cmp = EyMin - blk1->EyMax;
         break;
     case 2:
         cmp = blk1->ExMax - ExMin;
         break;
     case 3:
         cmp = blk1->EyMin - EyMax;
         break;
     }
     return cmp <= 0;
 }

 // Sort into reading order by performing a topological sort using the rules
 // given in "High Performance Document Layout Analysis", T.M. Breuel, 2003.
 // See http://pubs.iupr.org/#2003-breuel-sdiut
 // Topological sort is done by depth first search, see
 // http://en.wikipedia.org/wiki/Topological_sorting
 int TextBlock::visitDepthFirst(TextBlock *blkList, int pos1, TextBlock **sorted, int sortPos, bool *visited, TextBlock **cache, int cacheSize)
 {
     int pos2;
     TextBlock *blk1, *blk2, *blk3;
     bool before;

     if (visited[pos1]) {
         return sortPos;
     }

     blk1 = this;

 #if 0 // for debugging
   printf("visited: %d %.2f..%.2f %.2f..%.2f\n",
 	 sortPos, blk1->ExMin, blk1->ExMax, blk1->EyMin, blk1->EyMax);
 #endif
     visited[pos1] = true;
     pos2 = -1;
     for (blk2 = blkList; blk2; blk2 = blk2->next) {
         pos2++;
         if (visited[pos2]) {
             // skip visited nodes
             continue;
         }
         before = false;

         // is blk2 before blk1? (for table entries)
         if (blk1->tableId >= 0 && blk1->tableId == blk2->tableId) {
             if (page->primaryLR) {
                 if (blk2->xMax <= blk1->xMin && blk2->yMin <= blk1->yMax && blk2->yMax >= blk1->yMin) {
                     before = true;
                 }
             } else {
                 if (blk2->xMin >= blk1->xMax && blk2->yMin <= blk1->yMax && blk2->yMax >= blk1->yMin) {
                     before = true;
                 }
             }

             if (blk2->yMax <= blk1->yMin) {
                 before = true;
             }
         } else {
             if (blk2->isBeforeByRule1(blk1)) {
                 // Rule (1) blk1 and blk2 overlap, and blk2 is above blk1.
                 before = true;
 #if 0 // for debugging
         printf("rule1: %.2f..%.2f %.2f..%.2f %.2f..%.2f %.2f..%.2f\n",
 	       blk2->ExMin, blk2->ExMax, blk2->EyMin, blk2->EyMax,
 	       blk1->ExMin, blk1->ExMax, blk1->EyMin, blk1->EyMax);
 #endif
             } else if (blk2->isBeforeByRule2(blk1)) {
                 // Rule (2) blk2 left of blk1, and no intervening blk3
                 //          such that blk1 is before blk3 by rule 1,
                 //          and blk3 is before blk2 by rule 1.
                 before = true;
                 for (int i = 0; i < cacheSize && cache[i]; ++i) {
                     if (blk1->isBeforeByRule1(cache[i]) && cache[i]->isBeforeByRule1(blk2)) {
                         before = false;
                         std::rotate(cache, cache + i, cache + i + 1);
                         break;
                     }
                 }

                 if (before) {
                     for (blk3 = blkList; blk3; blk3 = blk3->next) {
                         if (blk3 == blk2 || blk3 == blk1) {
                             continue;
                         }
                         if (blk1->isBeforeByRule1(blk3) && blk3->isBeforeByRule1(blk2)) {
                             before = false;
                             std::copy_backward(cache, cache + cacheSize - 1, cache + cacheSize);
                             cache[0] = blk3;
                             break;
                         }
                     }
                 }
 #if 0 // for debugging
         if (before) {
 	  printf("rule2: %.2f..%.2f %.2f..%.2f %.2f..%.2f %.2f..%.2f\n",
 	         blk1->ExMin, blk1->ExMax, blk1->EyMin, blk1->EyMax,
 	         blk2->ExMin, blk2->ExMax, blk2->EyMin, blk2->EyMax);
         }
 #endif
             }
         }
         if (before) {
             // blk2 is before blk1, so it needs to be visited
             // before we can add blk1 to the sorted list.
             sortPos = blk2->visitDepthFirst(blkList, pos2, sorted, sortPos, visited, cache, cacheSize);
         }
     }
 #if 0 // for debugging
   printf("sorted: %d %.2f..%.2f %.2f..%.2f\n",
 	 sortPos, blk1->ExMin, blk1->ExMax, blk1->EyMin, blk1->EyMax);
 #endif
     sorted[sortPos++] = blk1;
     return sortPos;
 }

 int TextBlock::visitDepthFirst(TextBlock *blkList, int pos1, TextBlock **sorted, int sortPos, bool *visited)
 {
     const int blockCacheSize = 4;
     TextBlock *blockCache[blockCacheSize];
     std::fill(blockCache, blockCache + blockCacheSize, nullptr);
     return visitDepthFirst(blkList, pos1, sorted, sortPos, visited, blockCache, blockCacheSize);
 }

 //------------------------------------------------------------------------
 // TextFlow
 //------------------------------------------------------------------------

 TextFlow::TextFlow(TextPage *pageA, TextBlock *blk)
 {
     page = pageA;
     xMin = blk->xMin;
     xMax = blk->xMax;
     yMin = blk->yMin;
     yMax = blk->yMax;
     priMin = blk->priMin;
     priMax = blk->priMax;
     blocks = lastBlk = blk;
     next = nullptr;
 }

 TextFlow::~TextFlow()
 {
     TextBlock *blk;

     while (blocks) {
         blk = blocks;
         blocks = blocks->next;
         delete blk;
     }
 }

 void TextFlow::addBlock(TextBlock *blk)
 {
     if (lastBlk) {
         lastBlk->next = blk;
     } else {
         blocks = blk;
     }
     lastBlk = blk;
     if (blk->xMin < xMin) {
         xMin = blk->xMin;
     }
     if (blk->xMax > xMax) {
         xMax = blk->xMax;
     }
     if (blk->yMin < yMin) {
         yMin = blk->yMin;
     }
     if (blk->yMax > yMax) {
         yMax = blk->yMax;
     }
 }

 bool TextFlow::blockFits(const TextBlock *blk, const TextBlock *prevBlk) const
 {
     bool fits;

     // lower blocks must use smaller fonts
     if (blk->lines->words->fontSize > lastBlk->lines->words->fontSize) {
         return false;
     }

     fits = false; // make gcc happy
     switch (page->primaryRot) {
     case 0:
         fits = blk->xMin >= priMin && blk->xMax <= priMax;
         break;
     case 1:
         fits = blk->yMin >= priMin && blk->yMax <= priMax;
         break;
     case 2:
         fits = blk->xMin >= priMin && blk->xMax <= priMax;
         break;
     case 3:
         fits = blk->yMin >= priMin && blk->yMax <= priMax;
         break;
     }
     return fits;
 }

 #ifdef TEXTOUT_WORD_LIST

 //------------------------------------------------------------------------
 // TextWordList
 //------------------------------------------------------------------------

 TextWordList::TextWordList(const TextPage *text, bool physLayout)
 {
     TextFlow *flow;
     TextBlock *blk;
     TextLine *line;
     TextWord *word;
     TextWord **wordArray;
     int nWords, i;

     if (text->rawOrder) {
         for (word = text->rawWords; word; word = word->next) {
             words.push_back(word);
         }

     } else if (physLayout) {
         // this is inefficient, but it's also the least useful of these
         // three cases
         nWords = 0;
         for (flow = text->flows; flow; flow = flow->next) {
             for (blk = flow->blocks; blk; blk = blk->next) {
                 for (line = blk->lines; line; line = line->next) {
                     for (word = line->words; word; word = word->next) {
                         ++nWords;
                     }
                 }
             }
         }
         wordArray = (TextWord **)gmallocn(nWords, sizeof(TextWord *));
         i = 0;
         for (flow = text->flows; flow; flow = flow->next) {
             for (blk = flow->blocks; blk; blk = blk->next) {
                 for (line = blk->lines; line; line = line->next) {
                     for (word = line->words; word; word = word->next) {
                         wordArray[i++] = word;
                     }
                 }
             }
         }
         qsort(wordArray, nWords, sizeof(TextWord *), &TextWord::cmpYX);
         for (i = 0; i < nWords; ++i) {
             words.push_back(wordArray[i]);
         }
         gfree(wordArray);

     } else {
         for (flow = text->flows; flow; flow = flow->next) {
             for (blk = flow->blocks; blk; blk = blk->next) {
                 for (line = blk->lines; line; line = line->next) {
                     for (word = line->words; word; word = word->next) {
                         words.push_back(word);
                     }
                 }
             }
         }
     }
 }

 TextWordList::~TextWordList() { }

 int TextWordList::getLength() const
 {
     return words.size();
 }

 TextWord *TextWordList::get(int idx)
 {
     if (idx < 0 || idx >= (int)words.size()) {
         return nullptr;
     }
     return words[idx];
 }

 #endif // TEXTOUT_WORD_LIST

 //------------------------------------------------------------------------
 // TextPage
 //------------------------------------------------------------------------

 TextPage::TextPage(bool rawOrderA, bool discardDiagA)
 {
     int rot;

     refCnt = 1;
     rawOrder = rawOrderA;
     discardDiag = discardDiagA;
     curWord = nullptr;
     charPos = 0;
     curFont = nullptr;
     curFontSize = 0;
     nest = 0;
     nTinyChars = 0;
     lastCharOverlap = false;
     if (!rawOrder) {
         for (rot = 0; rot < 4; ++rot) {
             pools[rot] = std::make_unique<TextPool>();
         }
     }
     flows = nullptr;
     blocks = nullptr;
     rawWords = nullptr;
     rawLastWord = nullptr;
     lastFindXMin = lastFindYMin = 0;
     haveLastFind = false;
     mergeCombining = true;
     diagonal = false;
 }

 TextPage::~TextPage()
 {
     clear();
 }

 void TextPage::incRefCnt()
 {
     refCnt++;
 }

 void TextPage::decRefCnt()
 {
     if (--refCnt == 0) {
         delete this;
     }
 }

 void TextPage::startPage(const GfxState *state)
 {
     clear();
     if (state) {
         pageWidth = state->getPageWidth();
         pageHeight = state->getPageHeight();
     } else {
         pageWidth = pageHeight = 0;
     }
 }

 void TextPage::endPage()
 {
     if (curWord) {
         endWord();
     }
 }

 void TextPage::clear()
 {
     int rot;
     TextFlow *flow;
     TextWord *word;

     if (curWord) {
         delete curWord;
         curWord = nullptr;
     }
     if (rawOrder) {
         while (rawWords) {
             word = rawWords;
             rawWords = rawWords->next;
             delete word;
         }
     } else {
         for (rot = 0; rot < 4; ++rot) {
             pools[rot] = std::make_unique<TextPool>();
         }
         while (flows) {
             flow = flows;
             flows = flows->next;
             delete flow;
         }
         gfree(blocks);
     }
     fonts.clear();
     underlines.clear();
     links.clear();

     diagonal = false;
     curWord = nullptr;
     charPos = 0;
     curFont = nullptr;
     curFontSize = 0;
     nest = 0;
     nTinyChars = 0;
     flows = nullptr;
     blocks = nullptr;
     rawWords = nullptr;
     rawLastWord = nullptr;
 }

 void TextPage::updateFont(const GfxState *state)
 {
     const double *fm;
     const char *name;
     int code, mCode, letterCode, anyCode;
     double w;

     // get the font info object
     curFont = nullptr;
     for (const std::unique_ptr<TextFontInfo> &f : fonts) {
         if (f->matches(state)) {
             curFont = f.get();
             break;
         }
     }
     if (!curFont) {
         fonts.emplace_back(std::make_unique<TextFontInfo>(state));
         curFont = fonts.back().get();
     }

     // adjust the font size
     GfxFont *const gfxFont = state->getFont().get();
     curFontSize = state->getTransformedFontSize();
     if (gfxFont && gfxFont->getType() == fontType3) {
         // This is a hack which makes it possible to deal with some Type 3
         // fonts.  The problem is that it's impossible to know what the
         // base coordinate system used in the font is without actually
         // rendering the font.  This code tries to guess by looking at the
         // width of the character 'm' (which breaks if the font is a
         // subset that doesn't contain 'm').
         mCode = letterCode = anyCode = -1;
         for (code = 0; code < 256; ++code) {
             name = ((Gfx8BitFont *)gfxFont)->getCharName(code);
             int nameLen = name ? strlen(name) : 0;
             bool nameOneChar = nameLen == 1 || (nameLen > 1 && name[1] == '\0');
             if (nameOneChar && name[0] == 'm') {
                 mCode = code;
             }
             if (letterCode < 0 && nameOneChar && ((name[0] >= 'A' && name[0] <= 'Z') || (name[0] >= 'a' && name[0] <= 'z'))) {
                 letterCode = code;
             }
             if (anyCode < 0 && name && ((Gfx8BitFont *)gfxFont)->getWidth(code) > 0) {
                 anyCode = code;
             }
         }
         if (mCode >= 0 && (w = ((Gfx8BitFont *)gfxFont)->getWidth(mCode)) > 0) {
             // 0.6 is a generic average 'm' width -- yes, this is a hack
             curFontSize *= w / 0.6;
         } else if (letterCode >= 0 && (w = ((Gfx8BitFont *)gfxFont)->getWidth(letterCode)) > 0) {
             // even more of a hack: 0.5 is a generic letter width
             curFontSize *= w / 0.5;
         } else if (anyCode >= 0 && (w = ((Gfx8BitFont *)gfxFont)->getWidth(anyCode)) > 0) {
             // better than nothing: 0.5 is a generic character width
             curFontSize *= w / 0.5;
         }
         fm = gfxFont->getFontMatrix();
         if (fm[0] != 0) {
             curFontSize *= fabs(fm[3] / fm[0]);
         }
     }
 }

 void TextPage::beginWord(const GfxState *state)
 {
     const double *fontm;
     double m[4], m2[4];
     int rot;

     // This check is needed because Type 3 characters can contain
     // text-drawing operations (when TextPage is being used via
     // {X,Win}SplashOutputDev rather than TextOutputDev).
     if (curWord) {
         ++nest;
         return;
     }

     // compute the rotation
     state->getFontTransMat(&m[0], &m[1], &m[2], &m[3]);
     std::shared_ptr<GfxFont> gfxFont = state->getFont();
     if (gfxFont && gfxFont->getType() == fontType3) {
         fontm = state->getFont()->getFontMatrix();
         m2[0] = fontm[0] * m[0] + fontm[1] * m[2];
         m2[1] = fontm[0] * m[1] + fontm[1] * m[3];
         m2[2] = fontm[2] * m[0] + fontm[3] * m[2];
         m2[3] = fontm[2] * m[1] + fontm[3] * m[3];
         m[0] = m2[0];
         m[1] = m2[1];
         m[2] = m2[2];
         m[3] = m2[3];
     }
     if (fabs(m[0] * m[3]) > fabs(m[1] * m[2])) {
         rot = (m[0] > 0 || m[3] < 0) ? 0 : 2;
     } else {
         rot = (m[2] > 0) ? 1 : 3;
     }
     if (fabs(m[0]) >= fabs(m[1])) {
         diagonal = fabs(m[1]) > diagonalThreshold * fabs(m[0]);
     } else {
         diagonal = fabs(m[0]) > diagonalThreshold * fabs(m[1]);
     }

     // for vertical writing mode, the lines are effectively rotated 90
     // degrees
     if (gfxFont && gfxFont->getWMode()) {
         rot = (rot + 1) & 3;
     }

     curWord = new TextWord(state, rot, curFontSize);
 }

 void TextPage::addChar(const GfxState *state, double x, double y, double dx, double dy, CharCode c, int nBytes, const Unicode *u, int uLen)
 {
     double x1, y1, w1, h1, dx2, dy2, base, sp, delta;
     bool overlap;
     int i;
     int wMode;
     Matrix mat;

     // subtract char and word spacing from the dx,dy values
     sp = state->getCharSpace();
     if (c == (CharCode)0x20) {
         sp += state->getWordSpace();
     }
     state->textTransformDelta(sp * state->getHorizScaling(), 0, &dx2, &dy2);
     dx -= dx2;
     dy -= dy2;
     state->transformDelta(dx, dy, &w1, &h1);

     // throw away chars that aren't inside the page bounds
     // (and also do a sanity check on the character size)
     state->transform(x, y, &x1, &y1);
     if (x1 + w1 < 0 || x1 > pageWidth || y1 + h1 < 0 || y1 > pageHeight || std::isnan(x1) || std::isnan(y1) || std::isnan(w1) || std::isnan(h1)) {
         charPos += nBytes;
         return;
     }

     // check the tiny chars limit
     if (fabs(w1) < 3 && fabs(h1) < 3) {
         if (++nTinyChars > 50000) {
             charPos += nBytes;
             return;
         }
     }

     // break words at space character
     if (uLen == 1 && UnicodeIsWhitespace(u[0])) {
         charPos += nBytes;
         endWord();
         return;
     } else if (uLen == 1 && u[0] == (Unicode)0x0) {
         // ignore null characters
         charPos += nBytes;
         return;
     }

     state->getFontTransMat(&mat.m[0], &mat.m[1], &mat.m[2], &mat.m[3]);
     mat.m[0] *= state->getHorizScaling();
     mat.m[1] *= state->getHorizScaling();
     mat.m[4] = x1;
     mat.m[5] = y1;

     if (mergeCombining && curWord && uLen == 1 && curWord->addCombining(state, curFont, curFontSize, x1, y1, w1, h1, charPos, nBytes, c, u[0], mat)) {
         charPos += nBytes;
         return;
     }

     // start a new word if:
     // (1) this character doesn't fall in the right place relative to
     //     the end of the previous word (this places upper and lower
     //     constraints on the position deltas along both the primary
     //     and secondary axes), or
     // (2) this character overlaps the previous one (duplicated text), or
     // (3) the previous character was an overlap (we want each duplicated
     //     character to be in a word by itself at this stage),
     // (4) the font size has changed
     // (5) the WMode changed
     if (curWord && curWord->len() > 0) {
         base = sp = delta = 0; // make gcc happy
         switch (curWord->rot) {
         case 0:
             base = y1;
             sp = x1 - curWord->xMax;
             delta = x1 - curWord->chars.back().edge;
             break;
         case 1:
             base = x1;
             sp = y1 - curWord->yMax;
             delta = y1 - curWord->chars.back().edge;
             break;
         case 2:
             base = y1;
             sp = curWord->xMin - x1;
             delta = curWord->chars.back().edge - x1;
             break;
         case 3:
             base = x1;
             sp = curWord->yMin - y1;
             delta = curWord->chars.back().edge - y1;
             break;
         }
         overlap = fabs(delta) < dupMaxPriDelta * curWord->fontSize && fabs(base - curWord->base) < dupMaxSecDelta * curWord->fontSize;
         wMode = curFont->getWMode();
         if (overlap || lastCharOverlap || sp < -minDupBreakOverlap * curWord->fontSize || sp > minWordBreakSpace * curWord->fontSize || fabs(base - curWord->base) > 0.5 || curFontSize != curWord->fontSize || wMode != curWord->wMode) {
             endWord();
         }
         lastCharOverlap = overlap;
     } else {
         lastCharOverlap = false;
     }

     if (uLen != 0) {
         // start a new word if needed
         if (!curWord) {
             beginWord(state);
         }

         // throw away diagonal chars
         if (discardDiag && diagonal) {
             charPos += nBytes;
             return;
         }

         // page rotation and/or transform matrices can cause text to be
         // drawn in reverse order -- in this case, swap the begin/end
         // coordinates and break text into individual chars
         if ((curWord->rot == 0 && w1 < 0) || (curWord->rot == 1 && h1 < 0) || (curWord->rot == 2 && w1 > 0) || (curWord->rot == 3 && h1 > 0)) {
             endWord();
             beginWord(state);

             // throw away diagonal chars
             if (discardDiag && diagonal) {
                 charPos += nBytes;
                 return;
             }

             x1 += w1;
             y1 += h1;
             w1 = -w1;
             h1 = -h1;
         }

         // add the characters to the current word
         w1 /= uLen;
         h1 /= uLen;
         for (i = 0; i < uLen; ++i) {
             curWord->addChar(state, curFont, x1 + i * w1, y1 + i * h1, w1, h1, charPos, nBytes, c, u[i], mat);
         }
     }
     charPos += nBytes;
 }

 void TextPage::incCharCount(int nChars)
 {
     charPos += nChars;
 }

 void TextPage::endWord()
 {
     // This check is needed because Type 3 characters can contain
     // text-drawing operations (when TextPage is being used via
     // {X,Win}SplashOutputDev rather than TextOutputDev).
     if (nest > 0) {
         --nest;
         return;
     }

     if (curWord) {
         addWord(curWord);
         curWord = nullptr;
     }
 }

 void TextPage::addWord(TextWord *word)
 {
     // throw away zero-length words -- they don't have valid xMin/xMax
     // values, and they're useless anyway
     if (word->len() == 0) {
         delete word;
         return;
     }

     if (rawOrder) {
         if (rawLastWord) {
             rawLastWord->next = word;
         } else {
             rawWords = word;
         }
         rawLastWord = word;
     } else {
         pools[word->rot]->addWord(word);
     }
 }

 void TextPage::addUnderline(double x0, double y0, double x1, double y1)
 {
     underlines.emplace_back(std::make_unique<TextUnderline>(x0, y0, x1, y1));
 }

 void TextPage::addLink(int xMin, int yMin, int xMax, int yMax, AnnotLink *link)
 {
     links.emplace_back(std::make_unique<TextLink>(xMin, yMin, xMax, yMax, link));
 }

 void TextPage::coalesce(bool physLayout, double fixedPitch, bool doHTML)
 {
     coalesce(physLayout, fixedPitch, doHTML, TextOutputDev::minColSpacing1_default);
 }

 void TextPage::coalesce(bool physLayout, double fixedPitch, bool doHTML, double minColSpacing1)
 {
     TextWord *word0, *word1, *word2;
     TextLine *line;
     TextBlock *blkList, *blk, *lastBlk, *blk0, *blk1, *blk2;
     TextFlow *flow, *lastFlow;
     int rot, poolMinBaseIdx, baseIdx, startBaseIdx, endBaseIdx;
     double minBase, maxBase, newMinBase, newMaxBase;
     double fontSize, colSpace1, colSpace2, lineSpace, intraLineSpace, blkSpace;
     bool found;
     int count[4];
     int lrCount;
     int col1, col2;
     int j, n;

     if (rawOrder) {
         primaryRot = 0;
         primaryLR = true;
         return;
     }

     const UnicodeMap *uMap = globalParams->getTextEncoding();
     blkList = nullptr;
     lastBlk = nullptr;
     nBlocks = 0;
     primaryRot = 0;

 #if 0 // for debugging
   printf("*** initial words ***\n");
   for (rot = 0; rot < 4; ++rot) {
     pool = pools[rot];
     for (baseIdx = pool->minBaseIdx; baseIdx <= pool->maxBaseIdx; ++baseIdx) {
       for (word0 = pool->getPool(baseIdx); word0; word0 = word0->next) {
 	printf("    word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f rot=%d link=%p '",
 	       word0->xMin, word0->xMax, word0->yMin, word0->yMax,
 	       word0->base, word0->fontSize, rot*90, word0->link);
 	for (i = 0; i < word0->len; ++i) {
 	  fputc(word0->text[i] & 0xff, stdout);
 	}
 	printf("'\n");
       }
     }
   }
   printf("\n");
 #endif

 #if 0 //~ for debugging
   for (i = 0; i < underlines->getLength(); ++i) {
     underline = (TextUnderline *)underlines->get(i);
     printf("underline: x=%g..%g y=%g..%g horiz=%d\n",
 	   underline->x0, underline->x1, underline->y0, underline->y1,
 	   underline->horiz);
   }
 #endif

     if (doHTML) {

         //----- handle underlining
         for (const std::unique_ptr<TextUnderline> &underline : underlines) {
             if (underline->horiz) {
                 // rot = 0
                 if (pools[0]->minBaseIdx <= pools[0]->maxBaseIdx) {
                     startBaseIdx = pools[0]->getBaseIdx(underline->y0 + minUnderlineGap);
                     endBaseIdx = pools[0]->getBaseIdx(underline->y0 + maxUnderlineGap);
                     for (j = startBaseIdx; j <= endBaseIdx; ++j) {
                         for (word0 = pools[0]->getPool(j); word0; word0 = word0->next) {
                             //~ need to check the y value against the word baseline
                             if (underline->x0 < word0->xMin + underlineSlack && word0->xMax - underlineSlack < underline->x1) {
                                 word0->underlined = true;
                             }
                         }
                     }
                 }

                 // rot = 2
                 if (pools[2]->minBaseIdx <= pools[2]->maxBaseIdx) {
                     startBaseIdx = pools[2]->getBaseIdx(underline->y0 - maxUnderlineGap);
                     endBaseIdx = pools[2]->getBaseIdx(underline->y0 - minUnderlineGap);
                     for (j = startBaseIdx; j <= endBaseIdx; ++j) {
                         for (word0 = pools[2]->getPool(j); word0; word0 = word0->next) {
                             if (underline->x0 < word0->xMin + underlineSlack && word0->xMax - underlineSlack < underline->x1) {
                                 word0->underlined = true;
                             }
                         }
                     }
                 }
             } else {
                 // rot = 1
                 if (pools[1]->minBaseIdx <= pools[1]->maxBaseIdx) {
                     startBaseIdx = pools[1]->getBaseIdx(underline->x0 - maxUnderlineGap);
                     endBaseIdx = pools[1]->getBaseIdx(underline->x0 - minUnderlineGap);
                     for (j = startBaseIdx; j <= endBaseIdx; ++j) {
                         for (word0 = pools[1]->getPool(j); word0; word0 = word0->next) {
                             if (underline->y0 < word0->yMin + underlineSlack && word0->yMax - underlineSlack < underline->y1) {
                                 word0->underlined = true;
                             }
                         }
                     }
                 }

                 // rot = 3
                 if (pools[3]->minBaseIdx <= pools[3]->maxBaseIdx) {
                     startBaseIdx = pools[3]->getBaseIdx(underline->x0 + minUnderlineGap);
                     endBaseIdx = pools[3]->getBaseIdx(underline->x0 + maxUnderlineGap);
                     for (j = startBaseIdx; j <= endBaseIdx; ++j) {
                         for (word0 = pools[3]->getPool(j); word0; word0 = word0->next) {
                             if (underline->y0 < word0->yMin + underlineSlack && word0->yMax - underlineSlack < underline->y1) {
                                 word0->underlined = true;
                             }
                         }
                     }
                 }
             }
         }

         //----- handle links
         for (const std::unique_ptr<TextLink> &link : links) {
             // rot = 0
             if (pools[0]->minBaseIdx <= pools[0]->maxBaseIdx) {
                 startBaseIdx = pools[0]->getBaseIdx(link->yMin);
                 endBaseIdx = pools[0]->getBaseIdx(link->yMax);
                 for (j = startBaseIdx; j <= endBaseIdx; ++j) {
                     for (word0 = pools[0]->getPool(j); word0; word0 = word0->next) {
                         if (link->xMin < word0->xMin + hyperlinkSlack && word0->xMax - hyperlinkSlack < link->xMax && link->yMin < word0->yMin + hyperlinkSlack && word0->yMax - hyperlinkSlack < link->yMax) {
                             word0->link = link->link;
                         }
                     }
                 }
             }

             // rot = 2
             if (pools[2]->minBaseIdx <= pools[2]->maxBaseIdx) {
                 startBaseIdx = pools[2]->getBaseIdx(link->yMin);
                 endBaseIdx = pools[2]->getBaseIdx(link->yMax);
                 for (j = startBaseIdx; j <= endBaseIdx; ++j) {
                     for (word0 = pools[2]->getPool(j); word0; word0 = word0->next) {
                         if (link->xMin < word0->xMin + hyperlinkSlack && word0->xMax - hyperlinkSlack < link->xMax && link->yMin < word0->yMin + hyperlinkSlack && word0->yMax - hyperlinkSlack < link->yMax) {
                             word0->link = link->link;
                         }
                     }
                 }
             }

             // rot = 1
             if (pools[1]->minBaseIdx <= pools[1]->maxBaseIdx) {
                 startBaseIdx = pools[1]->getBaseIdx(link->xMin);
                 endBaseIdx = pools[1]->getBaseIdx(link->xMax);
                 for (j = startBaseIdx; j <= endBaseIdx; ++j) {
                     for (word0 = pools[1]->getPool(j); word0; word0 = word0->next) {
                         if (link->yMin < word0->yMin + hyperlinkSlack && word0->yMax - hyperlinkSlack < link->yMax && link->xMin < word0->xMin + hyperlinkSlack && word0->xMax - hyperlinkSlack < link->xMax) {
                             word0->link = link->link;
                         }
                     }
                 }
             }

             // rot = 3
             if (pools[3]->minBaseIdx <= pools[3]->maxBaseIdx) {
                 startBaseIdx = pools[3]->getBaseIdx(link->xMin);
                 endBaseIdx = pools[3]->getBaseIdx(link->xMax);
                 for (j = startBaseIdx; j <= endBaseIdx; ++j) {
                     for (word0 = pools[3]->getPool(j); word0; word0 = word0->next) {
                         if (link->yMin < word0->yMin + hyperlinkSlack && word0->yMax - hyperlinkSlack < link->yMax && link->xMin < word0->xMin + hyperlinkSlack && word0->xMax - hyperlinkSlack < link->xMax) {
                             word0->link = link->link;
                         }
                     }
                 }
             }
         }
     }

     //----- assemble the blocks

     //~ add an outer loop for writing mode (vertical text)

     // build blocks for each rotation value
     for (rot = 0; rot < 4; ++rot) {
         std::unique_ptr<TextPool> &pool = pools[rot];
         poolMinBaseIdx = pool->minBaseIdx;
         count[rot] = 0;

         // add blocks until no more words are left
         while (true) {

             // find the first non-empty line in the pool
             for (; poolMinBaseIdx <= pool->maxBaseIdx && !pool->getPool(poolMinBaseIdx); ++poolMinBaseIdx) {
                 ;
             }
             if (poolMinBaseIdx > pool->maxBaseIdx) {
                 break;
             }

             // look for the left-most word in the first four lines of the
             // pool -- this avoids starting with a superscript word
             startBaseIdx = poolMinBaseIdx;
             for (baseIdx = poolMinBaseIdx + 1; baseIdx < poolMinBaseIdx + 4 && baseIdx <= pool->maxBaseIdx; ++baseIdx) {
                 if (!pool->getPool(baseIdx)) {
                     continue;
                 }
                 if (pool->getPool(baseIdx)->primaryCmp(pool->getPool(startBaseIdx)) < 0) {
                     startBaseIdx = baseIdx;
                 }
             }

             // create a new block
             word0 = pool->getPool(startBaseIdx);
             pool->setPool(startBaseIdx, word0->next);
             word0->next = nullptr;
             blk = new TextBlock(this, rot);
             blk->addWord(word0);

             fontSize = word0->fontSize;
             minBase = maxBase = word0->base;
             colSpace1 = minColSpacing1 * fontSize;
             colSpace2 = minColSpacing2 * fontSize;
             lineSpace = maxLineSpacingDelta * fontSize;
             intraLineSpace = maxIntraLineDelta * fontSize;

             // add words to the block
             do {
                 found = false;

                 // look for words on the line above the current top edge of
                 // the block
                 newMinBase = minBase;
                 for (baseIdx = pool->getBaseIdx(minBase); baseIdx >= pool->getBaseIdx(minBase - lineSpace); --baseIdx) {
                     word0 = nullptr;
                     word1 = pool->getPool(baseIdx);
                     while (word1) {
                         if (word1->base < minBase && word1->base >= minBase - lineSpace && ((rot == 0 || rot == 2) ? (word1->xMin < blk->xMax && word1->xMax > blk->xMin) : (word1->yMin < blk->yMax && word1->yMax > blk->yMin))
                             && fabs(word1->fontSize - fontSize) < maxBlockFontSizeDelta1 * fontSize) {
                             word2 = word1;
                             if (word0) {
                                 word0->next = word1->next;
                             } else {
                                 pool->setPool(baseIdx, word1->next);
                             }
                             word1 = word1->next;
                             word2->next = nullptr;
                             blk->addWord(word2);
                             found = true;
                             newMinBase = word2->base;
                         } else {
                             word0 = word1;
                             word1 = word1->next;
                         }
                     }
                 }
                 minBase = newMinBase;

                 // look for words on the line below the current bottom edge of
                 // the block
                 newMaxBase = maxBase;
                 for (baseIdx = pool->getBaseIdx(maxBase); baseIdx <= pool->getBaseIdx(maxBase + lineSpace); ++baseIdx) {
                     word0 = nullptr;
                     word1 = pool->getPool(baseIdx);
                     while (word1) {
                         if (word1->base > maxBase && word1->base <= maxBase + lineSpace && ((rot == 0 || rot == 2) ? (word1->xMin < blk->xMax && word1->xMax > blk->xMin) : (word1->yMin < blk->yMax && word1->yMax > blk->yMin))
                             && fabs(word1->fontSize - fontSize) < maxBlockFontSizeDelta1 * fontSize) {
                             word2 = word1;
                             if (word0) {
                                 word0->next = word1->next;
                             } else {
                                 pool->setPool(baseIdx, word1->next);
                             }
                             word1 = word1->next;
                             word2->next = nullptr;
                             blk->addWord(word2);
                             found = true;
                             newMaxBase = word2->base;
                         } else {
                             word0 = word1;
                             word1 = word1->next;
                         }
                     }
                 }
                 maxBase = newMaxBase;

                 // look for words that are on lines already in the block, and
                 // that overlap the block horizontally
                 for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace); baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace); ++baseIdx) {
                     word0 = nullptr;
                     word1 = pool->getPool(baseIdx);
                     while (word1) {
                         if (word1->base >= minBase - intraLineSpace && word1->base <= maxBase + intraLineSpace
                             && ((rot == 0 || rot == 2) ? (word1->xMin < blk->xMax + colSpace1 && word1->xMax > blk->xMin - colSpace1) : (word1->yMin < blk->yMax + colSpace1 && word1->yMax > blk->yMin - colSpace1))
                             && fabs(word1->fontSize - fontSize) < maxBlockFontSizeDelta2 * fontSize) {
                             word2 = word1;
                             if (word0) {
                                 word0->next = word1->next;
                             } else {
                                 pool->setPool(baseIdx, word1->next);
                             }
                             word1 = word1->next;
                             word2->next = nullptr;
                             blk->addWord(word2);
                             found = true;
                         } else {
                             word0 = word1;
                             word1 = word1->next;
                         }
                     }
                 }

                 // only check for outlying words (the next two chunks of code)
                 // if we didn't find anything else
                 if (found) {
                     continue;
                 }

                 // scan down the left side of the block, looking for words
                 // that are near (but not overlapping) the block; if there are
                 // three or fewer, add them to the block
                 n = 0;
                 for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace); baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace); ++baseIdx) {
                     word1 = pool->getPool(baseIdx);
                     while (word1) {
                         if (word1->base >= minBase - intraLineSpace && word1->base <= maxBase + intraLineSpace
                             && ((rot == 0 || rot == 2) ? (word1->xMax <= blk->xMin && word1->xMax > blk->xMin - colSpace2) : (word1->yMax <= blk->yMin && word1->yMax > blk->yMin - colSpace2))
                             && fabs(word1->fontSize - fontSize) < maxBlockFontSizeDelta3 * fontSize) {
                             ++n;
                             break;
                         }
                         word1 = word1->next;
                     }
                 }
                 if (n > 0 && n <= 3) {
                     for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace); baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace); ++baseIdx) {
                         word0 = nullptr;
                         word1 = pool->getPool(baseIdx);
                         while (word1) {
                             if (word1->base >= minBase - intraLineSpace && word1->base <= maxBase + intraLineSpace
                                 && ((rot == 0 || rot == 2) ? (word1->xMax <= blk->xMin && word1->xMax > blk->xMin - colSpace2) : (word1->yMax <= blk->yMin && word1->yMax > blk->yMin - colSpace2))
                                 && fabs(word1->fontSize - fontSize) < maxBlockFontSizeDelta3 * fontSize) {
                                 word2 = word1;
                                 if (word0) {
                                     word0->next = word1->next;
                                 } else {
                                     pool->setPool(baseIdx, word1->next);
                                 }
                                 word1 = word1->next;
                                 word2->next = nullptr;
                                 blk->addWord(word2);
                                 if (word2->base < minBase) {
                                     minBase = word2->base;
                                 } else if (word2->base > maxBase) {
                                     maxBase = word2->base;
                                 }
                                 found = true;
                                 break;
                             } else {
                                 word0 = word1;
                                 word1 = word1->next;
                             }
                         }
                     }
                 }

                 // scan down the right side of the block, looking for words
                 // that are near (but not overlapping) the block; if there are
                 // three or fewer, add them to the block
                 n = 0;
                 for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace); baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace); ++baseIdx) {
                     word1 = pool->getPool(baseIdx);
                     while (word1) {
                         if (word1->base >= minBase - intraLineSpace && word1->base <= maxBase + intraLineSpace
                             && ((rot == 0 || rot == 2) ? (word1->xMin >= blk->xMax && word1->xMin < blk->xMax + colSpace2) : (word1->yMin >= blk->yMax && word1->yMin < blk->yMax + colSpace2))
                             && fabs(word1->fontSize - fontSize) < maxBlockFontSizeDelta3 * fontSize) {
                             ++n;
                             break;
                         }
                         word1 = word1->next;
                     }
                 }
                 if (n > 0 && n <= 3) {
                     for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace); baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace); ++baseIdx) {
                         word0 = nullptr;
                         word1 = pool->getPool(baseIdx);
                         while (word1) {
                             if (word1->base >= minBase - intraLineSpace && word1->base <= maxBase + intraLineSpace
                                 && ((rot == 0 || rot == 2) ? (word1->xMin >= blk->xMax && word1->xMin < blk->xMax + colSpace2) : (word1->yMin >= blk->yMax && word1->yMin < blk->yMax + colSpace2))
                                 && fabs(word1->fontSize - fontSize) < maxBlockFontSizeDelta3 * fontSize) {
                                 word2 = word1;
                                 if (word0) {
                                     word0->next = word1->next;
                                 } else {
                                     pool->setPool(baseIdx, word1->next);
                                 }
                                 word1 = word1->next;
                                 word2->next = nullptr;
                                 blk->addWord(word2);
                                 if (word2->base < minBase) {
                                     minBase = word2->base;
                                 } else if (word2->base > maxBase) {
                                     maxBase = word2->base;
                                 }
                                 found = true;
                                 break;
                             } else {
                                 word0 = word1;
                                 word1 = word1->next;
                             }
                         }
                     }
                 }

             } while (found);

             //~ need to compute the primary writing mode (horiz/vert) in
             //~ addition to primary rotation

             // coalesce the block, and add it to the list
             blk->coalesce(uMap, fixedPitch);
             if (lastBlk) {
                 lastBlk->next = blk;
             } else {
                 blkList = blk;
             }
             lastBlk = blk;
             count[rot] += blk->charCount;
             ++nBlocks;
         }

         if (count[rot] > count[primaryRot]) {
             primaryRot = rot;
         }
     }

 #if 0 // for debugging
   printf("*** rotation ***\n");
   for (rot = 0; rot < 4; ++rot) {
     printf("  %d: %6d\n", rot, count[rot]);
   }
   printf("  primary rot = %d\n", primaryRot);
   printf("\n");
 #endif

 #if 0 // for debugging
   printf("*** blocks ***\n");
   for (blk = blkList; blk; blk = blk->next) {
     printf("block: rot=%d x=%.2f..%.2f y=%.2f..%.2f\n",
 	   blk->rot, blk->xMin, blk->xMax, blk->yMin, blk->yMax);
     for (line = blk->lines; line; line = line->next) {
       printf("  line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f\n",
 	     line->xMin, line->xMax, line->yMin, line->yMax, line->base);
       for (word0 = line->words; word0; word0 = word0->next) {
 	printf("    word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f space=%d: '",
 	       word0->xMin, word0->xMax, word0->yMin, word0->yMax,
 	       word0->base, word0->fontSize, word0->spaceAfter);
 	for (i = 0; i < word0->len; ++i) {
 	  fputc(word0->text[i] & 0xff, stdout);
 	}
 	printf("'\n");
       }
     }
   }
   printf("\n");
 #endif

     // determine the primary direction
     lrCount = 0;
     for (blk = blkList; blk; blk = blk->next) {
         for (line = blk->lines; line; line = line->next) {
             for (word0 = line->words; word0; word0 = word0->next) {
                 for (size_t i = 0; i < word0->len(); ++i) {
                     if (unicodeTypeL(word0->chars[i].text)) {
                         ++lrCount;
                     } else if (unicodeTypeR(word0->chars[i].text)) {
                         --lrCount;
                     }
                 }
             }
         }
     }
     primaryLR = lrCount >= 0;

 #if 0 // for debugging
   printf("*** direction ***\n");
   printf("lrCount = %d\n", lrCount);
   printf("primaryLR = %d\n", primaryLR);
 #endif

     //----- column assignment

     // sort blocks into xy order for column assignment
     if (blocks) {
         gfree(blocks);
     }
     if (physLayout && fixedPitch) {

         blocks = (TextBlock **)gmallocn(nBlocks, sizeof(TextBlock *));
         int i;
         for (blk = blkList, i = 0; blk; blk = blk->next, ++i) {
             blocks[i] = blk;
             col1 = 0; // make gcc happy
             switch (primaryRot) {
             case 0:
                 col1 = (int)(blk->xMin / fixedPitch + 0.5);
                 break;
             case 1:
                 col1 = (int)(blk->yMin / fixedPitch + 0.5);
                 break;
             case 2:
                 col1 = (int)((pageWidth - blk->xMax) / fixedPitch + 0.5);
                 break;
             case 3:
                 col1 = (int)((pageHeight - blk->yMax) / fixedPitch + 0.5);
                 break;
             }
             blk->col = col1;
             for (line = blk->lines; line; line = line->next) {
                 for (j = 0; j <= line->len; ++j) {
                     line->col[j] += col1;
                 }
             }
         }

     } else {

         // sort blocks into xy order for column assignment
         blocks = (TextBlock **)gmallocn(nBlocks, sizeof(TextBlock *));
         int i;
         for (blk = blkList, i = 0; blk; blk = blk->next, ++i) {
             blocks[i] = blk;
         }
         if (blocks) {
             qsort(blocks, nBlocks, sizeof(TextBlock *), &TextBlock::cmpXYPrimaryRot);
         }

         // column assignment
         for (i = 0; i < nBlocks; ++i) {
             blk0 = blocks[i];
             col1 = 0;
             for (j = 0; j < i; ++j) {
                 blk1 = blocks[j];
                 col2 = 0; // make gcc happy
                 switch (primaryRot) {
                 case 0:
                     if (blk0->xMin > blk1->xMax) {
                         col2 = blk1->col + blk1->nColumns + 3;
                     } else if (blk1->xMax == blk1->xMin) {
                         col2 = blk1->col;
                     } else {
                         col2 = blk1->col + (int)(((blk0->xMin - blk1->xMin) / (blk1->xMax - blk1->xMin)) * blk1->nColumns);
                     }
                     break;
                 case 1:
                     if (blk0->yMin > blk1->yMax) {
                         col2 = blk1->col + blk1->nColumns + 3;
                     } else if (blk1->yMax == blk1->yMin) {
                         col2 = blk1->col;
                     } else {
                         col2 = blk1->col + (int)(((blk0->yMin - blk1->yMin) / (blk1->yMax - blk1->yMin)) * blk1->nColumns);
                     }
                     break;
                 case 2:
                     if (blk0->xMax < blk1->xMin) {
                         col2 = blk1->col + blk1->nColumns + 3;
                     } else if (blk1->xMin == blk1->xMax) {
                         col2 = blk1->col;
                     } else {
                         col2 = blk1->col + (int)(((blk0->xMax - blk1->xMax) / (blk1->xMin - blk1->xMax)) * blk1->nColumns);
                     }
                     break;
                 case 3:
                     if (blk0->yMax < blk1->yMin) {
                         col2 = blk1->col + blk1->nColumns + 3;
                     } else if (blk1->yMin == blk1->yMax) {
                         col2 = blk1->col;
                     } else {
                         col2 = blk1->col + (int)(((blk0->yMax - blk1->yMax) / (blk1->yMin - blk1->yMax)) * blk1->nColumns);
                     }
                     break;
                 }
                 if (col2 > col1) {
                     col1 = col2;
                 }
             }
             blk0->col = col1;
             for (line = blk0->lines; line; line = line->next) {
                 for (j = 0; j <= line->len; ++j) {
                     line->col[j] += col1;
                 }
             }
         }
     }

 #if 0 // for debugging
   printf("*** blocks, after column assignment ***\n");
   for (blk = blkList; blk; blk = blk->next) {
     printf("block: rot=%d x=%.2f..%.2f y=%.2f..%.2f col=%d nCols=%d\n",
 	   blk->rot, blk->xMin, blk->xMax, blk->yMin, blk->yMax, blk->col,
 	   blk->nColumns);
     for (line = blk->lines; line; line = line->next) {
       printf("  line: col[0]=%d\n", line->col[0]);
       for (word0 = line->words; word0; word0 = word0->next) {
 	printf("    word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f space=%d: '",
 	       word0->xMin, word0->xMax, word0->yMin, word0->yMax,
 	       word0->base, word0->fontSize, word0->spaceAfter);
 	for (i = 0; i < word0->len; ++i) {
 	  fputc(word0->text[i] & 0xff, stdout);
 	}
 	printf("'\n");
       }
     }
   }
   printf("\n");
 #endif

     //----- reading order sort

     // compute space on left and right sides of each block
     for (int i = 0; i < nBlocks; ++i) {
         blk0 = blocks[i];
         for (j = 0; j < nBlocks; ++j) {
             blk1 = blocks[j];
             if (blk1 != blk0) {
                 blk0->updatePriMinMax(blk1);
             }
         }
     }

 #if 0 // for debugging
   printf("PAGE\n");
 #endif

     int sortPos = 0;
     bool *visited = (bool *)gmallocn(nBlocks, sizeof(bool));
     for (int i = 0; i < nBlocks; i++) {
         visited[i] = false;
     }

     double bxMin0, byMin0, bxMin1, byMin1;
     int numTables = 0;
     int tableId = -1;
     int correspondenceX, correspondenceY;
     double xCentre1, yCentre1, xCentre2, yCentre2;
     double xCentre3, yCentre3, xCentre4, yCentre4;
     double deltaX, deltaY;
     TextBlock *fblk2 = nullptr, *fblk3 = nullptr, *fblk4 = nullptr;

     for (blk1 = blkList; blk1; blk1 = blk1->next) {
         blk1->ExMin = blk1->xMin;
         blk1->ExMax = blk1->xMax;
         blk1->EyMin = blk1->yMin;
         blk1->EyMax = blk1->yMax;

         bxMin0 = DBL_MAX;
         byMin0 = DBL_MAX;
         bxMin1 = DBL_MAX;
         byMin1 = DBL_MAX;

         fblk2 = nullptr;
         fblk3 = nullptr;
         fblk4 = nullptr;

         /*  find fblk2, fblk3 and fblk4 so that
          *  fblk2 is on the right of blk1 and overlap with blk1 in y axis
          *  fblk3 is under blk1 and overlap with blk1 in x axis
          *  fblk4 is under blk1 and on the right of blk1
          *  and they are closest to blk1
          */
         for (blk2 = blkList; blk2; blk2 = blk2->next) {
             if (blk2 != blk1) {
                 if (blk2->yMin <= blk1->yMax && blk2->yMax >= blk1->yMin && blk2->xMin > blk1->xMax && blk2->xMin < bxMin0) {
                     bxMin0 = blk2->xMin;
                     fblk2 = blk2;
                 } else if (blk2->xMin <= blk1->xMax && blk2->xMax >= blk1->xMin && blk2->yMin > blk1->yMax && blk2->yMin < byMin0) {
                     byMin0 = blk2->yMin;
                     fblk3 = blk2;
                 } else if (blk2->xMin > blk1->xMax && blk2->xMin < bxMin1 && blk2->yMin > blk1->yMax && blk2->yMin < byMin1) {
                     bxMin1 = blk2->xMin;
                     byMin1 = blk2->yMin;
                     fblk4 = blk2;
                 }
             }
         }

         /*  fblk4 can not overlap with fblk3 in x and with fblk2 in y
          *  fblk2 can not overlap with fblk3 in x and y
          *  fblk4 has to overlap with fblk3 in y and with fblk2 in x
          */
         if (fblk2 != nullptr && fblk3 != nullptr && fblk4 != nullptr) {
             if (((fblk3->xMin <= fblk4->xMax && fblk3->xMax >= fblk4->xMin) || (fblk2->yMin <= fblk4->yMax && fblk2->yMax >= fblk4->yMin) || (fblk2->xMin <= fblk3->xMax && fblk2->xMax >= fblk3->xMin)
                  || (fblk2->yMin <= fblk3->yMax && fblk2->yMax >= fblk3->yMin))
                 || !(fblk4->xMin <= fblk2->xMax && fblk4->xMax >= fblk2->xMin && fblk4->yMin <= fblk3->yMax && fblk4->yMax >= fblk3->yMin)) {
                 fblk2 = nullptr;
                 fblk3 = nullptr;
                 fblk4 = nullptr;
             }
         }

         // if we found any then look whether they form a table
         if (fblk2 != nullptr && fblk3 != nullptr && fblk4 != nullptr) {
             tableId = -1;
             correspondenceX = 0;
             correspondenceY = 0;
             deltaX = 0.0;
             deltaY = 0.0;

             if (blk1->lines && blk1->lines->words) {
                 deltaX = blk1->lines->words->getFontSize();
             }
             if (fblk2->lines && fblk2->lines->words) {
                 deltaX = deltaX < fblk2->lines->words->getFontSize() ? deltaX : fblk2->lines->words->getFontSize();
             }
             if (fblk3->lines && fblk3->lines->words) {
                 deltaX = deltaX < fblk3->lines->words->getFontSize() ? deltaX : fblk3->lines->words->getFontSize();
             }
             if (fblk4->lines && fblk4->lines->words) {
                 deltaX = deltaX < fblk4->lines->words->getFontSize() ? deltaX : fblk4->lines->words->getFontSize();
             }

             deltaY = deltaX;

             deltaX *= minColSpacing1;
             deltaY *= maxIntraLineDelta;

             xCentre1 = (blk1->xMax + blk1->xMin) / 2.0;
             yCentre1 = (blk1->yMax + blk1->yMin) / 2.0;
             xCentre2 = (fblk2->xMax + fblk2->xMin) / 2.0;
             yCentre2 = (fblk2->yMax + fblk2->yMin) / 2.0;
             xCentre3 = (fblk3->xMax + fblk3->xMin) / 2.0;
             yCentre3 = (fblk3->yMax + fblk3->yMin) / 2.0;
             xCentre4 = (fblk4->xMax + fblk4->xMin) / 2.0;
             yCentre4 = (fblk4->yMax + fblk4->yMin) / 2.0;

             // are blocks centrally aligned in x ?
             if (fabs(xCentre1 - xCentre3) <= deltaX && fabs(xCentre2 - xCentre4) <= deltaX) {
                 correspondenceX++;
             }

             // are blocks centrally aligned in y ?
             if (fabs(yCentre1 - yCentre2) <= deltaY && fabs(yCentre3 - yCentre4) <= deltaY) {
                 correspondenceY++;
             }

             // are blocks aligned to the left ?
             if (fabs(blk1->xMin - fblk3->xMin) <= deltaX && fabs(fblk2->xMin - fblk4->xMin) <= deltaX) {
                 correspondenceX++;
             }

             // are blocks aligned to the right ?
             if (fabs(blk1->xMax - fblk3->xMax) <= deltaX && fabs(fblk2->xMax - fblk4->xMax) <= deltaX) {
                 correspondenceX++;
             }

             // are blocks aligned to the top ?
             if (fabs(blk1->yMin - fblk2->yMin) <= deltaY && fabs(fblk3->yMin - fblk4->yMin) <= deltaY) {
                 correspondenceY++;
             }

             // are blocks aligned to the bottom ?
             if (fabs(blk1->yMax - fblk2->yMax) <= deltaY && fabs(fblk3->yMax - fblk4->yMax) <= deltaY) {
                 correspondenceY++;
             }

             // are blocks aligned in x and y ?
             if (correspondenceX > 0 && correspondenceY > 0) {

                 // find maximal tableId
                 tableId = tableId < fblk4->tableId ? fblk4->tableId : tableId;
                 tableId = tableId < fblk3->tableId ? fblk3->tableId : tableId;
                 tableId = tableId < fblk2->tableId ? fblk2->tableId : tableId;
                 tableId = tableId < blk1->tableId ? blk1->tableId : tableId;

                 // if the tableId is -1, then we found new table
                 if (tableId < 0) {
                     tableId = numTables;
                     numTables++;
                 }

                 blk1->tableId = tableId;
                 fblk2->tableId = tableId;
                 fblk3->tableId = tableId;
                 fblk4->tableId = tableId;
             }
         }
     }

     /*  set extended bounding boxes of all table entries
      *  so that they contain whole table
      *  (we need to process whole table size when comparing it
      *   with regular text blocks)
      */
     PDFRectangle *envelopes = new PDFRectangle[numTables];
     TextBlock **ending_blocks = new TextBlock *[numTables];

     for (int i = 0; i < numTables; i++) {
         envelopes[i].x1 = DBL_MAX;
         envelopes[i].x2 = DBL_MIN;
         envelopes[i].y1 = DBL_MAX;
         envelopes[i].y2 = DBL_MIN;
         ending_blocks[i] = nullptr;
     }

     for (blk1 = blkList; blk1; blk1 = blk1->next) {
         if (blk1->tableId >= 0) {
             if (blk1->ExMin < envelopes[blk1->tableId].x1) {
                 envelopes[blk1->tableId].x1 = blk1->ExMin;
                 if (!blk1->page->primaryLR) {
                     ending_blocks[blk1->tableId] = blk1;
                 }
             }

             if (blk1->ExMax > envelopes[blk1->tableId].x2) {
                 envelopes[blk1->tableId].x2 = blk1->ExMax;
                 if (blk1->page->primaryLR) {
                     ending_blocks[blk1->tableId] = blk1;
                 }
             }

             envelopes[blk1->tableId].y1 = blk1->EyMin < envelopes[blk1->tableId].y1 ? blk1->EyMin : envelopes[blk1->tableId].y1;
             envelopes[blk1->tableId].y2 = blk1->EyMax > envelopes[blk1->tableId].y2 ? blk1->EyMax : envelopes[blk1->tableId].y2;
         }
     }

     for (blk1 = blkList; blk1; blk1 = blk1->next) {
         if (blk1->tableId >= 0 && ending_blocks[blk1->tableId] && blk1->xMin <= ending_blocks[blk1->tableId]->xMax && blk1->xMax >= ending_blocks[blk1->tableId]->xMin) {
             blk1->tableEnd = true;
         }
     }

     for (blk1 = blkList; blk1; blk1 = blk1->next) {
         if (blk1->tableId >= 0) {
             blk1->ExMin = envelopes[blk1->tableId].x1;
             blk1->ExMax = envelopes[blk1->tableId].x2;
             blk1->EyMin = envelopes[blk1->tableId].y1;
             blk1->EyMax = envelopes[blk1->tableId].y2;
         }
     }
     delete[] envelopes;
     delete[] ending_blocks;

     /*  set extended bounding boxes of all other blocks
      *  so that they extend in x without hitting neighbours
      */
     for (blk1 = blkList; blk1; blk1 = blk1->next) {
         if (!(blk1->tableId >= 0)) {
             double xMax = DBL_MAX;
             double xMin = DBL_MIN;

             for (blk2 = blkList; blk2; blk2 = blk2->next) {
                 if (blk2 == blk1) {
                     continue;
                 }

                 if (blk1->yMin <= blk2->yMax && blk1->yMax >= blk2->yMin) {
                     if (blk2->xMin < xMax && blk2->xMin > blk1->xMax) {
                         xMax = blk2->xMin;
                     }

                     if (blk2->xMax > xMin && blk2->xMax < blk1->xMin) {
                         xMin = blk2->xMax;
                     }
                 }
             }

             for (blk2 = blkList; blk2; blk2 = blk2->next) {
                 if (blk2 == blk1) {
                     continue;
                 }

                 if (blk2->xMax > blk1->ExMax && blk2->xMax <= xMax && blk2->yMin >= blk1->yMax) {
                     blk1->ExMax = blk2->xMax;
                 }

                 if (blk2->xMin < blk1->ExMin && blk2->xMin >= xMin && blk2->yMin >= blk1->yMax) {
                     blk1->ExMin = blk2->xMin;
                 }
             }
         }
     }

     int i = -1;
     for (blk1 = blkList; blk1; blk1 = blk1->next) {
         i++;
         sortPos = blk1->visitDepthFirst(blkList, i, blocks, sortPos, visited);
     }
     if (visited) {
         gfree(visited);
     }

 #if 0 // for debugging
   printf("*** blocks, after ro sort ***\n");
   for (i = 0; i < nBlocks; ++i) {
     blk = blocks[i];
     printf("block: rot=%d x=%.2f..%.2f y=%.2f..%.2f space=%.2f..%.2f\n",
 	   blk->rot, blk->xMin, blk->xMax, blk->yMin, blk->yMax,
 	   blk->priMin, blk->priMax);
     for (line = blk->lines; line; line = line->next) {
       printf("  line:\n");
       for (word0 = line->words; word0; word0 = word0->next) {
 	printf("    word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f space=%d: '",
 	       word0->xMin, word0->xMax, word0->yMin, word0->yMax,
 	       word0->base, word0->fontSize, word0->spaceAfter);
 	for (j = 0; j < word0->len; ++j) {
 	  fputc(word0->text[j] & 0xff, stdout);
 	}
 	printf("'\n");
       }
     }
   }
   printf("\n");
   fflush(stdout);
 #endif

     // build the flows
     //~ this needs to be adjusted for writing mode (vertical text)
     //~ this also needs to account for right-to-left column ordering
     while (flows) {
         flow = flows;
         flows = flows->next;
         delete flow;
     }
     flow = nullptr;
     flows = lastFlow = nullptr;
     // assume blocks are already in reading order,
     // and construct flows accordingly.
     for (i = 0; i < nBlocks; i++) {
         blk = blocks[i];
         blk->next = nullptr;
         if (flow) {
             blk1 = blocks[i - 1];
             blkSpace = maxBlockSpacing * blk1->lines->words->fontSize;
             if (blk1->secondaryDelta(blk) <= blkSpace && blk->isBelow(blk1) && flow->blockFits(blk, blk1)) {
                 flow->addBlock(blk);
                 continue;
             }
         }
         flow = new TextFlow(this, blk);
         if (lastFlow) {
             lastFlow->next = flow;
         } else {
             flows = flow;
         }
         lastFlow = flow;
     }

 #if 0 // for debugging
   printf("*** flows ***\n");
   for (flow = flows; flow; flow = flow->next) {
     printf("flow: x=%.2f..%.2f y=%.2f..%.2f pri:%.2f..%.2f\n",
 	   flow->xMin, flow->xMax, flow->yMin, flow->yMax,
 	   flow->priMin, flow->priMax);
     for (blk = flow->blocks; blk; blk = blk->next) {
       printf("  block: rot=%d x=%.2f..%.2f y=%.2f..%.2f pri=%.2f..%.2f\n",
 	     blk->rot, blk->ExMin, blk->ExMax, blk->EyMin, blk->EyMax,
 	     blk->priMin, blk->priMax);
       for (line = blk->lines; line; line = line->next) {
 	printf("    line:\n");
 	for (word0 = line->words; word0; word0 = word0->next) {
 	  printf("      word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f space=%d: '",
 		 word0->xMin, word0->xMax, word0->yMin, word0->yMax,
 		 word0->base, word0->fontSize, word0->spaceAfter);
 	  for (i = 0; i < word0->len; ++i) {
 	    fputc(word0->text[i] & 0xff, stdout);
 	  }
 	  printf("'\n");
 	}
       }
     }
   }
   printf("\n");
 #endif
 }

 void TextPage::adjustRotation(TextLine *line, int start, int end, double *xMin, double *xMax, double *yMin, double *yMax)
 {
     switch (line->rot) {
     case 0:
         *xMin = line->edge[start];
         *xMax = line->edge[end];
         *yMin = line->yMin;
         *yMax = line->yMax;
         break;
     case 1:
         *xMin = line->xMin;
         *xMax = line->xMax;
         *yMin = line->edge[start];
         *yMax = line->edge[end];
         break;
     case 2:
         *xMin = line->edge[end];
         *xMax = line->edge[start];
         *yMin = line->yMin;
         *yMax = line->yMax;
         break;
     case 3:
         *xMin = line->xMin;
         *xMax = line->xMax;
         *yMin = line->edge[end];
         *yMax = line->edge[start];
         break;
     }
 }

 bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtBottom, bool startAtLast, bool stopAtLast, bool caseSensitive, bool backward, bool wholeWord, double *xMin, double *yMin, double *xMax, double *yMax)
 {
     return findText(s, len, startAtTop, stopAtBottom, startAtLast, stopAtLast, caseSensitive, false, false, backward, wholeWord, xMin, yMin, xMax, yMax, nullptr, nullptr);
 }

 bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtBottom, bool startAtLast, bool stopAtLast, bool caseSensitive, bool ignoreDiacritics, bool backward, bool wholeWord, double *xMin, double *yMin, double *xMax,
                         double *yMax)
 {
     return findText(s, len, startAtTop, stopAtBottom, startAtLast, stopAtLast, caseSensitive, ignoreDiacritics, false, backward, wholeWord, xMin, yMin, xMax, yMax, nullptr, nullptr);
 }

 bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtBottom, bool startAtLast, bool stopAtLast, bool caseSensitive, bool ignoreDiacritics, bool matchAcrossLines, bool backward, bool wholeWord, double *xMin,
                         double *yMin, double *xMax, double *yMax, PDFRectangle *continueMatch, bool *ignoredHyphen)
 {
     TextBlock *blk;
     TextLine *line;
     Unicode *s2, *txt, *reordered;
     Unicode *p;
     TextLine *nextline;
     Unicode *nextline_txt;
     int nextline_len;
     bool nextlineAfterHyphen = false;
     int txtSize, m, i, j, k;
     double xStart, yStart, xStop, yStop;
     double xMin0, yMin0, xMax0, yMax0;
     double xMin1, yMin1, xMax1, yMax1;
     double xMin2, yMin2, xMax2, yMax2;
     bool found;

     if (len == 0) {
         return false;
     }

     if (rawOrder) {
         return false;
     }

     if (matchAcrossLines && backward) {
         // matchAcrossLines is unimplemented for backward search
         matchAcrossLines = false;
     }

     // handle right-to-left text
     reordered = (Unicode *)gmallocn(len, sizeof(Unicode));
     reorderText(s, len, nullptr, primaryLR, nullptr, reordered);

     // normalize the search string
     s2 = unicodeNormalizeNFKC(reordered, len, &len, nullptr);

     // if search string is not pure ascii then don't
     // use ignoreDiacritics (as they won't match)
     if (!caseSensitive) {
         // convert the search string to uppercase
         for (i = 0; i < len; ++i) {
             s2[i] = unicodeToUpper(s2[i]);
             if (ignoreDiacritics && !isAscii7(s2[i])) {
                 ignoreDiacritics = false;
             }
         }
     } else if (ignoreDiacritics) {
         for (i = 0; i < len; ++i) {
             if (!isAscii7(s2[i])) {
                 ignoreDiacritics = false;
                 break;
             }
         }
     }

     txt = nullptr;
     txtSize = 0;

     xStart = yStart = xStop = yStop = 0;
     if (startAtLast && haveLastFind) {
         xStart = lastFindXMin;
         yStart = lastFindYMin;
     } else if (!startAtTop) {
         xStart = *xMin;
         yStart = *yMin;
     }
     if (stopAtLast && haveLastFind) {
         xStop = lastFindXMin;
         yStop = lastFindYMin;
     } else if (!stopAtBottom) {
         xStop = *xMax;
         yStop = *yMax;
     }

     found = false;
     xMin0 = xMax0 = yMin0 = yMax0 = 0; // make gcc happy
     xMin1 = xMax1 = yMin1 = yMax1 = 0; // make gcc happy

     for (i = backward ? nBlocks - 1 : 0; backward ? i >= 0 : i < nBlocks; i += backward ? -1 : 1) {
         blk = blocks[i];

         // check: is the block above the top limit?
         // (this only works if the page's primary rotation is zero --
         // otherwise the blocks won't be sorted in the useful order)
         if (!startAtTop && primaryRot == 0 && (backward ? blk->yMin > yStart : blk->yMax < yStart)) {
             continue;
         }

         // check: is the block below the bottom limit?
         // (this only works if the page's primary rotation is zero --
         // otherwise the blocks won't be sorted in the useful order)
         if (!stopAtBottom && primaryRot == 0 && (backward ? blk->yMax < yStop : blk->yMin > yStop)) {
             break;
         }

         for (line = blk->lines; line; line = line->next) {

             // check: is the line above the top limit?
             // (this only works if the page's primary rotation is zero --
             // otherwise the lines won't be sorted in the useful order)
             if (!startAtTop && primaryRot == 0 && (backward ? line->yMin > yStart : line->yMin < yStart)) {
                 continue;
             }

             // check: is the line below the bottom limit?
             // (this only works if the page's primary rotation is zero --
             // otherwise the lines won't be sorted in the useful order)
             if (!stopAtBottom && primaryRot == 0 && (backward ? line->yMin < yStop : line->yMin > yStop)) {
                 continue;
             }

             if (!line->normalized) {
                 line->normalized = unicodeNormalizeNFKC(line->text, line->len, &line->normalized_len, &line->normalized_idx, true);
             }

             nextline = nullptr;
             nextline_txt = nullptr;
             nextline_len = 0;
             if (line->next) {
                 nextline = line->next;
             } else {
                 // set nextline to first line of next block
                 int ind = i + (backward ? -1 : 1);
                 if ((backward && ind >= 0) || (!backward && ind < nBlocks)) {
                     nextline = blocks[ind]->lines;
                 }
             }

             if (matchAcrossLines && nextline && !nextline->normalized) {
                 nextline->normalized = unicodeNormalizeNFKC(nextline->text, nextline->len, &nextline->normalized_len, &nextline->normalized_idx, true);
             }

             // convert the line to uppercase
             m = line->normalized_len;

             if (ignoreDiacritics) {
                 if (!line->ascii_translation) {
                     unicodeToAscii7(std::span(line->normalized, line->normalized_len), &line->ascii_translation, &line->ascii_len, line->normalized_idx, &line->ascii_idx);
                 }
                 if (line->ascii_len) {
                     m = line->ascii_len;
                 } else {
                     ignoreDiacritics = false;
                 }

                 if (matchAcrossLines && nextline && !nextline->ascii_translation) {
                     unicodeToAscii7(std::span(nextline->normalized, nextline->normalized_len), &nextline->ascii_translation, &nextline->ascii_len, nextline->normalized_idx, &nextline->ascii_idx);
                 }
             }
             if (!caseSensitive) {
                 if (m > txtSize) {
                     txt = (Unicode *)greallocn(txt, m, sizeof(Unicode));
                     txtSize = m;
                 }
                 for (k = 0; k < m; ++k) {
                     if (ignoreDiacritics) {
                         txt[k] = unicodeToUpper(line->ascii_translation[k]);
                     } else {
                         txt[k] = unicodeToUpper(line->normalized[k]);
                     }
                 }
                 if (matchAcrossLines && nextline) {
                     nextline_len = ignoreDiacritics ? nextline->ascii_len : nextline->normalized_len;
                     nextline_txt = (Unicode *)gmallocn(nextline_len, sizeof(Unicode));
                     for (k = 0; k < nextline_len; ++k) {
                         nextline_txt[k] = ignoreDiacritics ? unicodeToUpper(nextline->ascii_translation[k]) : unicodeToUpper(nextline->normalized[k]);
                     }
                 }
             } else {
                 if (ignoreDiacritics) {
                     txt = line->ascii_translation;
                 } else {
                     txt = line->normalized;
                 }

                 if (matchAcrossLines && nextline) {
                     nextline_len = ignoreDiacritics ? nextline->ascii_len : nextline->normalized_len;
                     nextline_txt = ignoreDiacritics ? nextline->ascii_translation : nextline->normalized;
                 }
             }

             // search each position in this line
             j = backward ? m - len : 0;
             p = txt + j;
             while (backward ? j >= 0 : j <= m - (nextline_txt ? 1 : len)) {
                 bool wholeWordStartIsOk, wholeWordEndIsOk;
                 if (wholeWord) {
                     wholeWordStartIsOk = j == 0 || !unicodeTypeAlphaNum(txt[j - 1]);
                     if (nextline_txt) {
                         wholeWordEndIsOk = true; // word end may be in next line, so we'll check it later
                     } else {
                         wholeWordEndIsOk = j + len == m || !unicodeTypeAlphaNum(txt[j + len]);
                     }
                 }
                 if (!wholeWord || (wholeWordStartIsOk && wholeWordEndIsOk)) {
                     int n = 0;
                     bool spaceConsumedByNewline = false;
                     bool found_it;

                     // compare the strings
                     for (k = 0; k < len; ++k) {
                         bool last_char_of_line = j + k == m - 1;
                         bool last_char_of_search_term = k == len - 1;
                         bool match_started = (bool)k;

                         if (p[k] != s2[k] || (nextline_txt && last_char_of_line && !last_char_of_search_term)) {
                             // now check if the comparison failed at the end-of-line hyphen,
                             // and if so, keep on comparing at the next line
                             nextlineAfterHyphen = false;

                             if (s2[k] == p[k]) {
                                 if (p[k] != (Unicode)'-' && !UnicodeIsWhitespace(s2[k + 1])) {
                                     break;
                                 }
                                 k++;
                             } else if (!match_started || p[k] != (Unicode)'-' || !last_char_of_line || UnicodeIsWhitespace(s2[k])) {
                                 break;
                             } else {
                                 nextlineAfterHyphen = true;
                             }

                             for (; n < nextline_len && k < len; ++k, ++n) {
                                 if (nextline_txt[n] != s2[k]) {
                                     if (!spaceConsumedByNewline && !n && UnicodeIsWhitespace(s2[k])) {
                                         n = -1;
                                         spaceConsumedByNewline = true;
                                         continue;
                                     }
                                     break;
                                 }
                             }
                             break;
                         }
                     }

                     found_it = k == len;
                     if (found_it && nextline_txt && wholeWord) { // check word end for nextline case
                         if (n) { // Match ended at next line
                             wholeWordEndIsOk = n == nextline_len || !unicodeTypeAlphaNum(nextline_txt[n]);
                         } else { // Match ended on same line
                             wholeWordEndIsOk = j + len == m || !unicodeTypeAlphaNum(txt[j + len]);
                         }

                         if (!wholeWordEndIsOk) {
                             found_it = false;
                         }
                     }
                     // found it
                     if (found_it) {
                         bool nextLineMatch = (bool)n;
                         if (spaceConsumedByNewline) {
                             k--;
                         }
                         // where s2 matches a subsequence of a compatibility equivalence
                         // decomposition, highlight the entire glyph, since we don't know
                         // the internal layout of subglyph components
                         int normStart, normAfterEnd;
                         if (ignoreDiacritics) {
                             normStart = line->ascii_idx[j];
                             if (nextline_txt) {
                                 normAfterEnd = line->ascii_idx[j + k - n];
                             } else {
                                 normAfterEnd = line->ascii_idx[j + len - 1] + 1;
                             }
                         } else {
                             normStart = line->normalized_idx[j];
                             if (nextline_txt) {
                                 normAfterEnd = line->normalized_idx[j + k - n];
                             } else {
                                 normAfterEnd = line->normalized_idx[j + len - 1] + 1;
                             }
                         }

                         adjustRotation(line, normStart, normAfterEnd, &xMin1, &xMax1, &yMin1, &yMax1);

                         if (backward) {
                             if ((startAtTop || yMin1 < yStart || (yMin1 == yStart && xMin1 < xStart)) && (stopAtBottom || yMin1 > yStop || (yMin1 == yStop && xMin1 > xStop))) {
                                 if (!found || yMin1 > yMin0 || (yMin1 == yMin0 && xMin1 > xMin0)) {
                                     xMin0 = xMin1;
                                     xMax0 = xMax1;
                                     yMin0 = yMin1;
                                     yMax0 = yMax1;
                                     found = true;
                                 }
                             }
                         } else {
                             if ((startAtTop || yMin1 > yStart || (yMin1 == yStart && xMin1 > xStart)) && (stopAtBottom || yMin1 < yStop || (yMin1 == yStop && xMin1 < xStop))) {
                                 if (!found || yMin1 < yMin0 || (yMin1 == yMin0 && xMin1 < xMin0)) {
                                     xMin0 = xMin1;
                                     xMax0 = xMax1;
                                     yMin0 = yMin1;
                                     yMax0 = yMax1;
                                     found = true;
                                     if (nextLineMatch) { // set the out parameters
                                         if (ignoredHyphen) {
                                             *ignoredHyphen = nextlineAfterHyphen;
                                         }

                                         if (continueMatch) {
                                             adjustRotation(nextline, 0, n, &xMin2, &xMax2, &yMin2, &yMax2);
                                             continueMatch->x1 = xMin2;
                                             continueMatch->y1 = yMax2;
                                             continueMatch->x2 = xMax2;
                                             continueMatch->y2 = yMin2;
                                         }
                                     } else if (continueMatch && continueMatch->x1 != std::numeric_limits<double>::max()) {
                                         if (ignoredHyphen) {
                                             *ignoredHyphen = false;
                                         }

                                         continueMatch->x1 = std::numeric_limits<double>::max();
                                     }
                                 }
                             }
                         }
                     }
                 }
                 if (backward) {
                     --j;
                     --p;
                 } else {
                     ++j;
                     ++p;
                 }
             }

             if (nextline_txt && nextline_txt != nextline->ascii_translation && nextline_txt != nextline->normalized) {
                 gfree(nextline_txt);
             }
         }
     }

     gfree(s2);
     gfree(reordered);
     if (!caseSensitive) {
         gfree(txt);
     }

     if (found) {
         *xMin = xMin0;
         *xMax = xMax0;
         *yMin = yMin0;
         *yMax = yMax0;
         lastFindXMin = xMin0;
         lastFindYMin = yMin0;
         haveLastFind = true;
         return true;
     }

     return false;
 }

 GooString *TextPage::getText(double xMin, double yMin, double xMax, double yMax, EndOfLineKind textEOL) const
 {
     GooString *s;
     const UnicodeMap *uMap;
     TextBlock *blk;
     TextLine *line;
     TextLineFrag *frags;
     int nFrags, fragsSize;
     TextLineFrag *frag;
     char space[8], eol[16];
     int spaceLen, eolLen;
     int lastRot;
     double x, y, delta;
     int col, idx0, idx1, i, j;
     bool multiLine, oneRot;

     s = new GooString();

     // get the output encoding
     if (!(uMap = globalParams->getTextEncoding())) {
         return s;
     }

     if (rawOrder) {
         TextWord *word;
         char mbc[16];
         int mbc_len;

         for (word = rawWords; word && word <= rawLastWord; word = word->next) {
             for (j = 0; j < word->getLength(); ++j) {
                 double gXMin, gXMax, gYMin, gYMax;
                 word->getCharBBox(j, &gXMin, &gYMin, &gXMax, &gYMax);
                 if (xMin <= gXMin && gXMax <= xMax && yMin <= gYMin && gYMax <= yMax) {
                     mbc_len = uMap->mapUnicode(*(word->getChar(j)), mbc, sizeof(mbc));
                     s->append(mbc, mbc_len);
                 }
             }
         }
         return s;
     }

     spaceLen = uMap->mapUnicode(0x20, space, sizeof(space));
     eolLen = 0; // make gcc happy
     switch (textEOL) {
     case eolUnix:
         eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol));
         break;
     case eolDOS:
         eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
         eolLen += uMap->mapUnicode(0x0a, eol + eolLen, sizeof(eol) - eolLen);
         break;
     case eolMac:
         eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
         break;
     }

     //~ writing mode (horiz/vert)

     // collect the line fragments that are in the rectangle
     fragsSize = 256;
     frags = (TextLineFrag *)gmallocn(fragsSize, sizeof(TextLineFrag));
     nFrags = 0;
     lastRot = -1;
     oneRot = true;
     for (i = 0; i < nBlocks; ++i) {
         blk = blocks[i];
         if (xMin < blk->xMax && blk->xMin < xMax && yMin < blk->yMax && blk->yMin < yMax) {
             for (line = blk->lines; line; line = line->next) {
                 if (xMin < line->xMax && line->xMin < xMax && yMin < line->yMax && line->yMin < yMax) {
                     idx0 = idx1 = -1;
                     switch (line->rot) {
                     case 0:
                         y = 0.5 * (line->yMin + line->yMax);
                         if (yMin < y && y < yMax) {
                             j = 0;
                             while (j < line->len) {
                                 if (0.5 * (line->edge[j] + line->edge[j + 1]) > xMin) {
                                     idx0 = j;
                                     break;
                                 }
                                 ++j;
                             }
                             j = line->len - 1;
                             while (j >= 0) {
                                 if (0.5 * (line->edge[j] + line->edge[j + 1]) < xMax) {
                                     idx1 = j;
                                     break;
                                 }
                                 --j;
                             }
                         }
                         break;
                     case 1:
                         x = 0.5 * (line->xMin + line->xMax);
                         if (xMin < x && x < xMax) {
                             j = 0;
                             while (j < line->len) {
                                 if (0.5 * (line->edge[j] + line->edge[j + 1]) > yMin) {
                                     idx0 = j;
                                     break;
                                 }
                                 ++j;
                             }
                             j = line->len - 1;
                             while (j >= 0) {
                                 if (0.5 * (line->edge[j] + line->edge[j + 1]) < yMax) {
                                     idx1 = j;
                                     break;
                                 }
                                 --j;
                             }
                         }
                         break;
                     case 2:
                         y = 0.5 * (line->yMin + line->yMax);
                         if (yMin < y && y < yMax) {
                             j = 0;
                             while (j < line->len) {
                                 if (0.5 * (line->edge[j] + line->edge[j + 1]) < xMax) {
                                     idx0 = j;
                                     break;
                                 }
                                 ++j;
                             }
                             j = line->len - 1;
                             while (j >= 0) {
                                 if (0.5 * (line->edge[j] + line->edge[j + 1]) > xMin) {
                                     idx1 = j;
                                     break;
                                 }
                                 --j;
                             }
                         }
                         break;
                     case 3:
                         x = 0.5 * (line->xMin + line->xMax);
                         if (xMin < x && x < xMax) {
                             j = 0;
                             while (j < line->len) {
                                 if (0.5 * (line->edge[j] + line->edge[j + 1]) < yMax) {
                                     idx0 = j;
                                     break;
                                 }
                                 ++j;
                             }
                             j = line->len - 1;
                             while (j >= 0) {
                                 if (0.5 * (line->edge[j] + line->edge[j + 1]) > yMin) {
                                     idx1 = j;
                                     break;
                                 }
                                 --j;
                             }
                         }
                         break;
                     }
                     if (idx0 >= 0 && idx1 >= 0) {
                         if (nFrags == fragsSize) {
                             fragsSize *= 2;
                             frags = (TextLineFrag *)greallocn(frags, fragsSize, sizeof(TextLineFrag));
                         }
                         frags[nFrags].init(line, idx0, idx1 - idx0 + 1);
                         ++nFrags;
                         if (lastRot >= 0 && line->rot != lastRot) {
                             oneRot = false;
                         }
                         lastRot = line->rot;
                     }
                 }
             }
         }
     }

     // sort the fragments and generate the string
     if (nFrags > 0) {

         for (i = 0; i < nFrags; ++i) {
             frags[i].computeCoords(oneRot);
         }
         assignColumns(frags, nFrags, oneRot);

         // if all lines in the region have the same rotation, use it;
         // otherwise, use the page's primary rotation
         if (oneRot) {
             qsort(frags, nFrags, sizeof(TextLineFrag), &TextLineFrag::cmpYXLineRot);
         } else {
             qsort(frags, nFrags, sizeof(TextLineFrag), &TextLineFrag::cmpYXPrimaryRot);
         }
         i = 0;
         while (i < nFrags) {
             delta = maxIntraLineDelta * frags[i].line->words->fontSize;
             for (j = i + 1; j < nFrags && fabs(frags[j].base - frags[i].base) < delta; ++j) {
                 ;
             }
             qsort(frags + i, j - i, sizeof(TextLineFrag), oneRot ? &TextLineFrag::cmpXYColumnLineRot : &TextLineFrag::cmpXYColumnPrimaryRot);
             i = j;
         }

         col = 0;
         multiLine = false;
         for (i = 0; i < nFrags; ++i) {
             frag = &frags[i];

             // insert a return
             if (frag->col < col || (i > 0 && fabs(frag->base - frags[i - 1].base) > maxIntraLineDelta * frags[i - 1].line->words->fontSize)) {
                 s->append(eol, eolLen);
                 col = 0;
                 multiLine = true;
             }

             // column alignment
             for (; col < frag->col; ++col) {
                 s->append(space, spaceLen);
             }

             // get the fragment text
             col += dumpFragment(frag->line->text + frag->start, frag->len, uMap, s);
         }

         if (multiLine) {
             s->append(eol, eolLen);
         }
     }

     gfree(frags);

     return s;
 }

 class TextSelectionVisitor
 {
 public:
     explicit TextSelectionVisitor(TextPage *page);
     virtual ~TextSelectionVisitor();
     TextSelectionVisitor(const TextSelectionVisitor &) = delete;
     TextSelectionVisitor &operator=(const TextSelectionVisitor &) = delete;
     virtual void visitBlock(TextBlock *block, TextLine *begin, TextLine *end, const PDFRectangle *selection) = 0;
     virtual void visitLine(TextLine *line, TextWord *begin, TextWord *end, int edge_begin, int edge_end, const PDFRectangle *selection) = 0;
     virtual void visitWord(TextWord *word, int begin, int end, const PDFRectangle *selection) = 0;

 protected:
     TextPage *page;
 };

 TextSelectionVisitor::TextSelectionVisitor(TextPage *p) : page(p) { }

 TextSelectionVisitor::~TextSelectionVisitor() = default;

 class TextSelectionDumper : public TextSelectionVisitor
 {
 public:
     explicit TextSelectionDumper(TextPage *page);
     ~TextSelectionDumper() override;

     void visitBlock(TextBlock *block, TextLine *begin, TextLine *end, const PDFRectangle *selection) override {};
     void visitLine(TextLine *line, TextWord *begin, TextWord *end, int edge_begin, int edge_end, const PDFRectangle *selection) override;
     void visitWord(TextWord *word, int begin, int end, const PDFRectangle *selection) override;
     void endPage();

     GooString *getText();
     std::vector<TextWordSelection *> **takeWordList(int *nLines);

 private:
     void startLine();
     void finishLine();

     std::vector<TextWordSelection *> **lines;
     int nLines, linesSize;
     std::vector<TextWordSelection *> *words;
     int tableId;
     TextBlock *currentBlock;
 };

 TextSelectionDumper::TextSelectionDumper(TextPage *p) : TextSelectionVisitor(p)
 {
     linesSize = 256;
     lines = (std::vector<TextWordSelection *> **)gmallocn(linesSize, sizeof(std::vector<TextWordSelection *> *));
     nLines = 0;

     tableId = -1;
     currentBlock = nullptr;
     words = nullptr;
 }

 TextSelectionDumper::~TextSelectionDumper()
 {
     for (int i = 0; i < nLines; i++) {
         for (auto entry : *(lines[i])) {
             delete entry;
         }
         delete lines[i];
     }
     gfree(lines);
 }

 void TextSelectionDumper::startLine()
 {
     finishLine();
     words = new std::vector<TextWordSelection *>();
 }

 void TextSelectionDumper::finishLine()
 {
     if (nLines == linesSize) {
         linesSize *= 2;
         lines = (std::vector<TextWordSelection *> **)grealloc(lines, linesSize * sizeof(std::vector<TextWordSelection *> *));
     }

     if (words && words->size() > 0) {
         // Reverse word order for RTL text. Fixes #53 for glib backend (Evince)
         if (!page->primaryLR) {
             std::reverse(words->begin(), words->end());
         }

         lines[nLines++] = words;
     } else if (words) {
         delete words;
     }
     words = nullptr;
 }

 void TextSelectionDumper::visitLine(TextLine *line, TextWord *begin, TextWord *end, int edge_begin, int edge_end, const PDFRectangle *selection)
 {
     TextLineFrag frag;

     frag.init(line, edge_begin, edge_end - edge_begin);

     if (tableId >= 0 && frag.line->blk->tableId < 0) {
         finishLine();

         tableId = -1;
         currentBlock = nullptr;
     }

     if (frag.line->blk->tableId >= 0) { // a table
         if (tableId == -1) {
             tableId = frag.line->blk->tableId;
             currentBlock = frag.line->blk;
         }

         if (currentBlock == frag.line->blk) { // the same block
             startLine();
         } else { // another block
             if (currentBlock->tableEnd) { // previous block ended its row
                 startLine();
             }
             currentBlock = frag.line->blk;
         }
     } else { // not a table
         startLine();
     }
 }

 void TextSelectionDumper::visitWord(TextWord *word, int begin, int end, const PDFRectangle *selection)
 {
     words->push_back(new TextWordSelection(word, begin, end));
 }

 void TextSelectionDumper::endPage()
 {
     finishLine();
 }

 GooString *TextSelectionDumper::getText()
 {
     GooString *text;
     int i;
     const UnicodeMap *uMap;
     char space[8], eol[16];
     int spaceLen, eolLen;

     text = new GooString();

     if (!(uMap = globalParams->getTextEncoding())) {
         return text;
     }

     spaceLen = uMap->mapUnicode(0x20, space, sizeof(space));
     eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol));

     std::vector<Unicode> uText;
     for (i = 0; i < nLines; i++) {
         std::vector<TextWordSelection *> *lineWords = lines[i];
         for (std::size_t j = 0; j < lineWords->size(); j++) {
             TextWordSelection *sel = (*lineWords)[j];

             uText.resize(sel->end - sel->begin);
             std::transform(sel->word->chars.begin() + sel->begin, sel->word->chars.begin() + sel->end, uText.begin(), [](auto &c) { return c.text; });
             page->dumpFragment(uText.data(), uText.size(), uMap, text);

             if (j < lineWords->size() - 1 && sel->word->spaceAfter) {
                 text->append(space, spaceLen);
             }
         }
         if (i < nLines - 1) {
             text->append(eol, eolLen);
         }
     }

     return text;
 }

 std::vector<TextWordSelection *> **TextSelectionDumper::takeWordList(int *nLinesOut)
 {
     std::vector<TextWordSelection *> **returnValue = lines;

     *nLinesOut = nLines;
     if (nLines == 0) {
         return nullptr;
     }

     nLines = 0;
     lines = nullptr;

     return returnValue;
 }

 class TextSelectionSizer : public TextSelectionVisitor
 {
 public:
     TextSelectionSizer(TextPage *page, double scale);
     ~TextSelectionSizer() override { delete list; }

     void visitBlock(TextBlock *block, TextLine *begin, TextLine *end, const PDFRectangle *selection) override {};
     void visitLine(TextLine *line, TextWord *begin, TextWord *end, int edge_begin, int edge_end, const PDFRectangle *selection) override;
     void visitWord(TextWord *word, int begin, int end, const PDFRectangle *selection) override {};

     std::vector<PDFRectangle *> *takeRegion()
     {
         auto aux = list;
         list = nullptr;
         return aux;
     }

 private:
     std::vector<PDFRectangle *> *list;
     double scale;
 };

 TextSelectionSizer::TextSelectionSizer(TextPage *p, double s) : TextSelectionVisitor(p), scale(s)
 {
     list = new std::vector<PDFRectangle *>();
 }

 void TextSelectionSizer::visitLine(TextLine *line, TextWord *begin, TextWord *end, int edge_begin, int edge_end, const PDFRectangle *selection)
 {
     PDFRectangle *rect;
     double x1, y1, x2, y2, margin;

     switch (line->rot) {
     default:
     case 0:
         margin = (line->yMax - line->yMin) / 8;
         x1 = line->edge[edge_begin];
         x2 = line->edge[edge_end];
         y1 = line->yMin - margin;
         y2 = line->yMax + margin;
         break;
     case 1:
         margin = (line->xMax - line->xMin) / 8;
         x1 = line->xMin - margin;
         x2 = line->xMax + margin;
         y1 = line->edge[edge_begin];
         y2 = line->edge[edge_end];
         break;
     case 2:
         margin = (line->yMax - line->yMin) / 8;
         x1 = line->edge[edge_end];
         x2 = line->edge[edge_begin];
         y1 = line->yMin - margin;
         y2 = line->yMax + margin;
         break;
     case 3:
         margin = (line->xMax - line->xMin) / 8;
         x1 = line->xMin - margin;
         x2 = line->xMax + margin;
         y1 = line->edge[edge_end];
         y2 = line->edge[edge_begin];
         break;
     }

     rect = new PDFRectangle(floor(x1 * scale), floor(y1 * scale), ceil(x2 * scale), ceil(y2 * scale));
     list->push_back(rect);
 }

 class TextSelectionPainter : public TextSelectionVisitor
 {
 public:
     TextSelectionPainter(TextPage *page, double scale, int rotation, OutputDev *out, const GfxColor *box_color, const GfxColor *glyph_color);
     ~TextSelectionPainter() override;

     void visitBlock(TextBlock *block, TextLine *begin, TextLine *end, const PDFRectangle *selection) override {};
     void visitLine(TextLine *line, TextWord *begin, TextWord *end, int edge_begin, int edge_end, const PDFRectangle *selection) override;
     void visitWord(TextWord *word, int begin, int end, const PDFRectangle *selection) override;
     void endPage();

 private:
     OutputDev *out;
     const GfxColor *glyph_color;
     GfxState *state;
     std::vector<TextWordSelection *> *selectionList;
     Matrix ctm, ictm;
     bool hasGlyphLessFont();
 };

 TextSelectionPainter::TextSelectionPainter(TextPage *p, double scale, int rotation, OutputDev *outA, const GfxColor *box_color, const GfxColor *glyph_colorA) : TextSelectionVisitor(p), out(outA), glyph_color(glyph_colorA)
 {
     PDFRectangle box(0, 0, p->pageWidth, p->pageHeight);

     selectionList = new std::vector<TextWordSelection *>();
     state = new GfxState(72 * scale, 72 * scale, &box, rotation, false);

     state->getCTM(&ctm);
     ctm.invertTo(&ictm);

     out->startPage(0, state, nullptr);
     out->setDefaultCTM(state->getCTM());

     state->setFillColorSpace(new GfxDeviceRGBColorSpace());
     state->setFillColor(box_color);
     out->updateFillColor(state);
 }

 TextSelectionPainter::~TextSelectionPainter()
 {
     for (auto entry : *selectionList) {
         delete entry;
     }
     delete selectionList;
     delete state;
 }

 void TextSelectionPainter::visitLine(TextLine *line, TextWord *begin, TextWord *end, int edge_begin, int edge_end, const PDFRectangle *selection)
 {
     double x1, y1, x2, y2, margin;

     switch (line->rot) {
     default:
     case 0:
         margin = (line->yMax - line->yMin) / 8;
         x1 = line->edge[edge_begin];
         x2 = line->edge[edge_end];
         y1 = line->yMin - margin;
         y2 = line->yMax + margin;
         break;
     case 1:
         margin = (line->xMax - line->xMin) / 8;
         x1 = line->xMin - margin;
         x2 = line->xMax + margin;
         y1 = line->edge[edge_begin];
         y2 = line->edge[edge_end];
         break;
     case 2:
         margin = (line->yMax - line->yMin) / 8;
         x1 = line->edge[edge_end];
         x2 = line->edge[edge_begin];
         y1 = line->yMin - margin;
         y2 = line->yMax + margin;
         break;
     case 3:
         margin = (line->xMax - line->xMin) / 8;
         x1 = line->xMin - margin;
         x2 = line->xMax + margin;
         y1 = line->edge[edge_end];
         y2 = line->edge[edge_begin];
         break;
     }

     ctm.transform(x1, y1, &x1, &y1);
     ctm.transform(x2, y2, &x2, &y2);

     if (x1 < x2) {
         x1 = floor(x1);
         x2 = ceil(x2);
     } else {
         x1 = ceil(x1);
         x2 = floor(x2);
     }

     if (y1 < y2) {
         y1 = floor(y1);
         y2 = ceil(y2);
     } else {
         y1 = ceil(y1);
         y2 = floor(y2);
     }

     ictm.transform(x1, y1, &x1, &y1);
     ictm.transform(x2, y2, &x2, &y2);

     state->moveTo(x1, y1);
     state->lineTo(x2, y1);
     state->lineTo(x2, y2);
     state->lineTo(x1, y2);
     state->closePath();
 }

 void TextSelectionPainter::visitWord(TextWord *word, int begin, int end, const PDFRectangle *selection)
 {
     selectionList->push_back(new TextWordSelection(word, begin, end));
 }

 bool TextSelectionPainter::hasGlyphLessFont()
 {
     if (selectionList && selectionList->size()) {
         TextWordSelection *sel = (*selectionList)[0];
         return sel->word->invisible;
     }

     return false;
 }

 void TextSelectionPainter::endPage()
 {
     /* Take a shortcut for glyphless fonts (eg. Tesseract scanned documents)
      * cause we just paint a transparent fill over existent text.Issue #157 */
     if (hasGlyphLessFont()) {
         state->setFillOpacity(glyphlessSelectionOpacity);
         out->updateFillOpacity(state);
         out->fill(state);
         out->endPage();
         return;
     }

     out->fill(state);

     out->saveState(state);
     out->clip(state);

     state->clearPath();

     state->setFillColor(glyph_color);

     out->updateFillColor(state);

     GooString string;
     for (const TextWordSelection *sel : *selectionList) {
         int begin = sel->begin;

         while (begin < sel->end) {
             TextFontInfo *font = sel->word->chars[begin].font;
             const Matrix *mat = &sel->word->chars[begin].textMat;

             state->setTextMat(mat->m[0], mat->m[1], mat->m[2], mat->m[3], 0, 0);
             state->setFont(font->gfxFont, 1);
             out->updateFont(state);

             int fEnd = begin + 1;
             while (fEnd < sel->end && font->matches(sel->word->chars[fEnd].font) //
                    && mat->m[0] == sel->word->chars[fEnd].textMat.m[0] && mat->m[1] == sel->word->chars[fEnd].textMat.m[1] //
                    && mat->m[2] == sel->word->chars[fEnd].textMat.m[2] && mat->m[3] == sel->word->chars[fEnd].textMat.m[3]) {
                 fEnd++;
             }

             /* The only purpose of this string is to let the output device query
              * it's length.  Might want to change this interface later. */
             string.clear();
             std::for_each(sel->word->chars.begin() + begin, sel->word->chars.begin() + fEnd, [&string](const auto c) { string.append(c.charcode); });
             out->beginString(state, &string);

             for (int j = begin; j < fEnd; j++) {
                 const auto &charJ = sel->word->chars[j];
                 if (j != begin && charJ.charPos == sel->word->chars[j - 1].charPos) {
                     continue;
                 }
                 out->drawChar(state, charJ.textMat.m[4], charJ.textMat.m[5], 0, 0, 0, 0, charJ.charcode, 1, nullptr, 0);
             }
             out->endString(state);
             begin = fEnd;
         }
     }

     out->restoreState(state);
     out->endPage();
 }

 void TextWord::visitSelection(TextSelectionVisitor *visitor, const PDFRectangle *selection, SelectionStyle style)
 {
     double mid, s1, s2;

     if (rot == 0 || rot == 2) {
         s1 = selection->x1;
         s2 = selection->x2;
     } else {
         s1 = selection->y1;
         s2 = selection->y2;
     }

     size_t begin = len();
     size_t end = 0;
     for (size_t i = 0; i < len(); i++) {
         if (i + 1 < len()) {
             mid = (chars[i].edge + chars[i + 1].edge) / 2;
         } else {
             mid = (chars[i].edge + edgeEnd) / 2;
         }
         if (XBetweenAB(mid, s1, s2)) {
             if (i < begin) {
                 begin = i;
             }

             end = i + 1;
         }
     }

     /* Skip empty selection. */
     if (end <= begin) {
         return;
     }

     visitor->visitWord(this, begin, end, selection);
 }

 void TextLine::visitSelection(TextSelectionVisitor *visitor, const PDFRectangle *selection, SelectionStyle style)
 {
     TextWord *p, *begin, *end, *current;
     int i, edge_begin, edge_end;
     PDFRectangle child_selection;
     double s1, s2, pMin, pMax;

     if (rot == 0 || rot == 2) {
         s1 = selection->x1;
         s2 = selection->x2;
     } else {
         s1 = selection->y1;
         s2 = selection->y2;
     }

     begin = nullptr;
     end = nullptr;
     current = nullptr;
     for (p = words; p != nullptr; p = p->next) {
         if (rot == 0 || rot == 2) {
             pMin = p->xMin;
             pMax = p->xMax;
         } else {
             pMin = p->yMin;
             pMax = p->yMax;
         }

         if (blk->page->primaryLR) {
             if (((s1 < pMax) || (s2 < pMax)) && begin == nullptr) {
                 begin = p;
             }

             if (((s1 > pMin) || (s2 > pMin)) && begin != nullptr) {
                 end = p->next;
                 current = p;
             }
         } else {
             if (((s1 > pMin) || (s2 > pMin)) && begin == nullptr) {
                 begin = p;
             }

             if (((s1 < pMax) || (s2 < pMax)) && begin != nullptr) {
                 end = p->next;
                 current = p;
             }
         }
     }

     if (!current) {
         current = begin;
     }

     child_selection = *selection;
     if (style == selectionStyleWord) {
         if (rot == 0 || rot == 2) {
             child_selection.x1 = begin ? begin->xMin : xMin;
             if (end && end->xMax != -1) {
                 child_selection.x2 = current->xMax;
             } else {
                 child_selection.x2 = xMax;
             }
         } else {
             child_selection.y1 = begin ? begin->yMin : yMin;
             if (end && end->yMax != -1) {
                 child_selection.y2 = current->yMax;
             } else {
                 child_selection.y2 = yMax;
             }
         }
     }

     if (rot == 0 || rot == 2) {
         s1 = child_selection.x1;
         s2 = child_selection.x2;
     } else {
         s1 = child_selection.y1;
         s2 = child_selection.y2;
     }

     edge_begin = len;
     edge_end = 0;
     for (i = 0; i < len; i++) {
         double mid = (edge[i] + edge[i + 1]) / 2;
         if (XBetweenAB(mid, s1, s2)) {
             if (i < edge_begin) {
                 edge_begin = i;
             }

             edge_end = i + 1;
         }
     }

     /* Skip empty selection. */
     if (edge_end <= edge_begin) {
         return;
     }

     visitor->visitLine(this, begin, end, edge_begin, edge_end, &child_selection);

     for (p = begin; p != end; p = p->next) {
         p->visitSelection(visitor, &child_selection, style);
     }
 }

 void TextBlock::visitSelection(TextSelectionVisitor *visitor, const PDFRectangle *selection, SelectionStyle style)
 {
     PDFRectangle child_selection;
     double x[2], y[2], d, best_d[2];
     TextLine *p, *best_line[2];
     int i, count = 0, best_count[2], start, stop;
     bool all[2];

     x[0] = selection->x1;
     y[0] = selection->y1;
     x[1] = selection->x2;
     y[1] = selection->y2;

     for (i = 0; i < 2; i++) {
         // the first/last lines are often not nearest
         // the corners, so we have to force them to be
         // selected when the selection runs outside this
         // block.
         if (page->primaryLR) {
             all[i] = x[i] >= this->xMax && y[i] >= this->yMax;
             if (x[i] <= this->xMin && y[i] <= this->yMin) {
                 best_line[i] = this->lines;
                 best_count[i] = 1;
             } else {
                 best_line[i] = nullptr;
                 best_count[i] = 0;
             }
         } else {
             all[i] = x[i] <= this->xMin && y[i] >= this->yMax;
             if (x[i] >= this->xMax && y[i] <= this->yMin) {
                 best_line[i] = this->lines;
                 best_count[i] = 1;
             } else {
                 best_line[i] = nullptr;
                 best_count[i] = 0;
             }
         }
         best_d[i] = 0;
     }

     // find the nearest line to the selection points
     // using the manhattan distance.
     for (p = this->lines; p; p = p->next) {
         count++;
         for (i = 0; i < 2; i++) {
             d = fmax(p->xMin - x[i], 0.0) + fmax(x[i] - p->xMax, 0.0) + fmax(p->yMin - y[i], 0.0) + fmax(y[i] - p->yMax, 0.0);
             if (!best_line[i] || all[i] || d < best_d[i]) {
                 best_line[i] = p;
                 best_count[i] = count;
                 best_d[i] = d;
             }
         }
     }
     // assert: best is always set.
     if (!best_line[0] || !best_line[1]) {
         return;
     }

     // Now decide which point was first.
     if (best_count[0] < best_count[1] || (best_count[0] == best_count[1] && y[0] < y[1])) {
         start = 0;
         stop = 1;
     } else {
         start = 1;
         stop = 0;
     }

     visitor->visitBlock(this, best_line[start], best_line[stop], selection);

     for (p = best_line[start]; p; p = p->next) {
         if (page->primaryLR) {
             child_selection.x1 = p->xMin;
             child_selection.x2 = p->xMax;
         } else {
             child_selection.x1 = p->xMax;
             child_selection.x2 = p->xMin;
         }
         child_selection.y1 = p->yMin;
         child_selection.y2 = p->yMax;
         if (style == selectionStyleLine) {
             if (p == best_line[start]) {
                 child_selection.x1 = 0;
                 child_selection.y1 = 0;
             }
             if (p == best_line[stop]) {
                 child_selection.x2 = page->pageWidth;
                 child_selection.y2 = page->pageHeight;
             }
         } else {
             if (p == best_line[start]) {
                 child_selection.x1 = fmax(p->xMin, fmin(p->xMax, x[start]));
                 child_selection.y1 = fmax(p->yMin, fmin(p->yMax, y[start]));
             }
             if (p == best_line[stop]) {
                 child_selection.x2 = fmax(p->xMin, fmin(p->xMax, x[stop]));
                 child_selection.y2 = fmax(p->yMin, fmin(p->yMax, y[stop]));
             }
         }
         p->visitSelection(visitor, &child_selection, style);
         if (p == best_line[stop]) {
             return;
         }
     }
 }

 void TextPage::visitSelection(TextSelectionVisitor *visitor, const PDFRectangle *selection, SelectionStyle style)
 {
     PDFRectangle child_selection;
     double x[2], y[2], d, best_d[2];
     double xMin, yMin, xMax, yMax;
     TextFlow *flow, *best_flow[2];
     TextBlock *blk, *best_block[2];
     int i, count = 0, best_count[2], start, stop;

     if (!flows) {
         return;
     }

     x[0] = selection->x1;
     y[0] = selection->y1;
     x[1] = selection->x2;
     y[1] = selection->y2;

     xMin = pageWidth;
     yMin = pageHeight;
     xMax = 0.0;
     yMax = 0.0;

     for (i = 0; i < 2; i++) {
         best_block[i] = nullptr;
         best_flow[i] = nullptr;
         best_count[i] = 0;
         best_d[i] = 0;
     }

     // find the nearest blocks to the selection points
     // using the manhattan distance.
     for (flow = flows; flow; flow = flow->next) {
         for (blk = flow->blocks; blk; blk = blk->next) {
             count++;
             // the first/last blocks in reading order are
             // often not the closest to the page corners;
             // track the corners, force those blocks to
             // be selected if the selection runs across
             // multiple pages.
             xMin = fmin(xMin, blk->xMin);
             yMin = fmin(yMin, blk->yMin);
             xMax = fmax(xMax, blk->xMax);
             yMax = fmax(yMax, blk->yMax);
             for (i = 0; i < 2; i++) {
                 d = fmax(blk->xMin - x[i], 0.0) + fmax(x[i] - blk->xMax, 0.0) + fmax(blk->yMin - y[i], 0.0) + fmax(y[i] - blk->yMax, 0.0);
                 if (!best_block[i] || d < best_d[i] || (!blk->next && !flow->next && x[i] >= fmin(xMax, pageWidth) && y[i] >= fmin(yMax, pageHeight))) {
                     best_block[i] = blk;
                     best_flow[i] = flow;
                     best_count[i] = count;
                     best_d[i] = d;
                 }
             }
         }
     }
     for (i = 0; i < 2; i++) {
         if (primaryLR) {
             if (x[i] < xMin && y[i] < yMin) {
                 best_block[i] = flows->blocks;
                 best_flow[i] = flows;
                 best_count[i] = 1;
             }
         } else {
             if (x[i] > xMax && y[i] < yMin) {
                 best_block[i] = flows->blocks;
                 best_flow[i] = flows;
                 best_count[i] = 1;
             }
         }
     }
     // assert: best is always set.
     if (!best_block[0] || !best_block[1]) {
         return;
     }

     // Now decide which point was first.
     if (best_count[0] < best_count[1] || (best_count[0] == best_count[1] && y[0] < y[1])) {
         start = 0;
         stop = 1;
     } else {
         start = 1;
         stop = 0;
     }

     for (flow = best_flow[start]; flow; flow = flow->next) {
         if (flow == best_flow[start]) {
             blk = best_block[start];
         } else {
             blk = flow->blocks;
         }
         for (; blk; blk = blk->next) {
             if (primaryLR) {
                 child_selection.x1 = blk->xMin;
                 child_selection.x2 = blk->xMax;
             } else {
                 child_selection.x1 = blk->xMax;
                 child_selection.x2 = blk->xMin;
             }
             child_selection.y1 = blk->yMin;
             child_selection.y2 = blk->yMax;
             if (blk == best_block[start]) {
                 child_selection.x1 = fmax(blk->xMin, fmin(blk->xMax, x[start]));
                 child_selection.y1 = fmax(blk->yMin, fmin(blk->yMax, y[start]));
             }
             if (blk == best_block[stop]) {
                 child_selection.x2 = fmax(blk->xMin, fmin(blk->xMax, x[stop]));
                 child_selection.y2 = fmax(blk->yMin, fmin(blk->yMax, y[stop]));
                 blk->visitSelection(visitor, &child_selection, style);
                 return;
             }
             blk->visitSelection(visitor, &child_selection, style);
         }
     }
 }

 void TextPage::drawSelection(OutputDev *out, double scale, int rotation, const PDFRectangle *selection, SelectionStyle style, const GfxColor *glyph_color, const GfxColor *box_color)
 {
     TextSelectionPainter painter(this, scale, rotation, out, box_color, glyph_color);

     visitSelection(&painter, selection, style);
     painter.endPage();
 }

 std::vector<PDFRectangle *> *TextPage::getSelectionRegion(const PDFRectangle *selection, SelectionStyle style, double scale)
 {
     TextSelectionSizer sizer(this, scale);

     visitSelection(&sizer, selection, style);

     return sizer.takeRegion();
 }

 GooString *TextPage::getSelectionText(const PDFRectangle *selection, SelectionStyle style)
 {
     TextSelectionDumper dumper(this);

     visitSelection(&dumper, selection, style);
     dumper.endPage();

     return dumper.getText();
 }

 std::vector<TextWordSelection *> **TextPage::getSelectionWords(const PDFRectangle *selection, SelectionStyle style, int *nLines)
 {
     TextSelectionDumper dumper(this);

     visitSelection(&dumper, selection, style);
     dumper.endPage();

     return dumper.takeWordList(nLines);
 }

 bool TextPage::findCharRange(int pos, int length, double *xMin, double *yMin, double *xMax, double *yMax) const
 {
     TextBlock *blk;
     TextLine *line;
     TextWord *word;
     double xMin0, xMax0, yMin0, yMax0;
     double xMin1, xMax1, yMin1, yMax1;
     bool first;

     if (rawOrder) {
         return false;
     }

     //~ this doesn't correctly handle ranges split across multiple lines
     //~ (the highlighted region is the bounding box of all the parts of
     //~ the range)
     first = true;
     xMin0 = xMax0 = yMin0 = yMax0 = 0; // make gcc happy
     xMin1 = xMax1 = yMin1 = yMax1 = 0; // make gcc happy
     for (int i = 0; i < nBlocks; ++i) {
         blk = blocks[i];
         for (line = blk->lines; line; line = line->next) {
             for (word = line->words; word; word = word->next) {
                 if (pos < word->charPosEnd && pos + length > word->chars.front().charPos) {
                     size_t j0, j1;
                     for (j0 = 0; (j0 + 1) < word->len() && pos >= word->chars[j0 + 1].charPos; ++j0) {
                         ;
                     }
                     for (j1 = word->len(); j1 > j0 && pos + length <= word->chars[j1].charPos; --j1) {
                         ;
                     }
                     auto startingEdge = word->chars[j0].edge;
                     auto endingEdge = (j1 + 1 == word->len()) ? word->edgeEnd : word->chars[j1 + 1].edge;
                     switch (line->rot) {
                     case 0:
                         xMin1 = startingEdge;
                         xMax1 = endingEdge;
                         yMin1 = word->yMin;
                         yMax1 = word->yMax;
                         break;
                     case 1:
                         xMin1 = word->xMin;
                         xMax1 = word->xMax;
                         yMin1 = startingEdge;
                         yMax1 = endingEdge;
                         break;
                     case 2:
                         xMin1 = endingEdge;
                         xMax1 = startingEdge;
                         yMin1 = word->yMin;
                         yMax1 = word->yMax;
                         break;
                     case 3:
                         xMin1 = word->xMin;
                         xMax1 = word->xMax;
                         yMin1 = endingEdge;
                         yMax1 = startingEdge;
                         break;
                     }
                     if (first || xMin1 < xMin0) {
                         xMin0 = xMin1;
                     }
                     if (first || xMax1 > xMax0) {
                         xMax0 = xMax1;
                     }
                     if (first || yMin1 < yMin0) {
                         yMin0 = yMin1;
                     }
                     if (first || yMax1 > yMax0) {
                         yMax0 = yMax1;
                     }
                     first = false;
                 }
             }
         }
     }
     if (!first) {
         *xMin = xMin0;
         *xMax = xMax0;
         *yMin = yMin0;
         *yMax = yMax0;
         return true;
     }
     return false;
 }

 void TextPage::dump(void *outputStream, TextOutputFunc outputFunc, bool physLayout, EndOfLineKind textEOL, bool pageBreaks)
 {
     const UnicodeMap *uMap;
     TextFlow *flow;
     TextBlock *blk;
     TextLine *line;
     TextLineFrag *frags;
     TextWord *word;
     int nFrags, fragsSize;
     TextLineFrag *frag;
     char space[8], eol[16], eop[8];
     int spaceLen, eolLen, eopLen;
     double delta;
     int col, i, j, d, n;

     // get the output encoding
     if (!(uMap = globalParams->getTextEncoding())) {
         return;
     }
     spaceLen = uMap->mapUnicode(0x20, space, sizeof(space));
     eolLen = 0; // make gcc happy
     switch (textEOL) {
     case eolUnix:
         eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol));
         break;
     case eolDOS:
         eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
         eolLen += uMap->mapUnicode(0x0a, eol + eolLen, sizeof(eol) - eolLen);
         break;
     case eolMac:
         eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
         break;
     }
     eopLen = uMap->mapUnicode(0x0c, eop, sizeof(eop));

     //~ writing mode (horiz/vert)

     // output the page in raw (content stream) order
     if (rawOrder) {

         GooString s;
         std::vector<Unicode> uText;

         for (word = rawWords; word; word = word->next) {
             s.clear();
             uText.resize(word->len());
             std::transform(word->chars.begin(), word->chars.end(), uText.begin(), [](auto &c) { return c.text; });
             dumpFragment(uText.data(), uText.size(), uMap, &s);
             (*outputFunc)(outputStream, s.c_str(), s.getLength());

             if (word->next && fabs(word->next->base - word->base) < maxIntraLineDelta * word->fontSize && word->next->xMin > word->xMax - minDupBreakOverlap * word->fontSize) {
                 if (word->next->xMin > word->xMax + minWordSpacing * word->fontSize) {
                     (*outputFunc)(outputStream, space, spaceLen);
                 }
             } else {
                 (*outputFunc)(outputStream, eol, eolLen);
             }
         }

         // output the page, maintaining the original physical layout
     } else if (physLayout) {

         // collect the line fragments for the page and sort them
         fragsSize = 256;
         frags = (TextLineFrag *)gmallocn(fragsSize, sizeof(TextLineFrag));
         nFrags = 0;
         for (i = 0; i < nBlocks; ++i) {
             blk = blocks[i];
             for (line = blk->lines; line; line = line->next) {
                 if (nFrags == fragsSize) {
                     fragsSize *= 2;
                     frags = (TextLineFrag *)greallocn(frags, fragsSize, sizeof(TextLineFrag));
                 }
                 frags[nFrags].init(line, 0, line->len);
                 frags[nFrags].computeCoords(true);
                 ++nFrags;
             }
         }
         qsort(frags, nFrags, sizeof(TextLineFrag), &TextLineFrag::cmpYXPrimaryRot);
         i = 0;
         while (i < nFrags) {
             delta = maxIntraLineDelta * frags[i].line->words->fontSize;
             for (j = i + 1; j < nFrags && fabs(frags[j].base - frags[i].base) < delta; ++j) {
                 ;
             }
             qsort(frags + i, j - i, sizeof(TextLineFrag), &TextLineFrag::cmpXYColumnPrimaryRot);
             i = j;
         }

 #if 0 // for debugging
     printf("*** line fragments ***\n");
     for (i = 0; i < nFrags; ++i) {
       frag = &frags[i];
       printf("frag: x=%.2f..%.2f y=%.2f..%.2f base=%.2f '",
 	     frag->xMin, frag->xMax, frag->yMin, frag->yMax, frag->base);
       for (n = 0; n < frag->len; ++n) {
 	fputc(frag->line->text[frag->start + n] & 0xff, stdout);
       }
       printf("'\n");
     }
     printf("\n");
 #endif

         GooString s;
         // generate output
         col = 0;
         for (i = 0; i < nFrags; ++i) {
             frag = &frags[i];

             // column alignment
             for (; col < frag->col; ++col) {
                 (*outputFunc)(outputStream, space, spaceLen);
             }

             // print the line
             s.clear();
             col += dumpFragment(frag->line->text + frag->start, frag->len, uMap, &s);
             (*outputFunc)(outputStream, s.c_str(), s.getLength());

             // print one or more returns if necessary
             if (i == nFrags - 1 || frags[i + 1].col < col || fabs(frags[i + 1].base - frag->base) > maxIntraLineDelta * frag->line->words->fontSize) {
                 if (i < nFrags - 1) {
                     d = (int)((frags[i + 1].base - frag->base) / frag->line->words->fontSize);
                     if (d < 1) {
                         d = 1;
                     } else if (d > 5) {
                         d = 5;
                     }
                 } else {
                     d = 1;
                 }
                 for (; d > 0; --d) {
                     (*outputFunc)(outputStream, eol, eolLen);
                 }
                 col = 0;
             }
         }

         gfree(frags);

         // output the page, "undoing" the layout
     } else {
         for (flow = flows; flow; flow = flow->next) {
             for (blk = flow->blocks; blk; blk = blk->next) {
                 for (line = blk->lines; line; line = line->next) {
                     n = line->len;
                     if (line->hyphenated && (line->next || blk->next)) {
                         --n;
                     }
                     GooString s;
                     dumpFragment(line->text, n, uMap, &s);
                     (*outputFunc)(outputStream, s.c_str(), s.getLength());
                     // output a newline when a hyphen is not suppressed
                     if (n == line->len) {
                         (*outputFunc)(outputStream, eol, eolLen);
                     }
                 }
             }
             (*outputFunc)(outputStream, eol, eolLen);
         }
     }

     // end of page
     if (pageBreaks) {
         (*outputFunc)(outputStream, eop, eopLen);
     }
 }

 void TextPage::setMergeCombining(bool merge)
 {
     mergeCombining = merge;
 }

 void TextPage::assignColumns(TextLineFrag *frags, int nFrags, bool oneRot) const
 {
     TextLineFrag *frag0, *frag1;
     int rot, col1, col2, i, j, k;

     // all text in the region has the same rotation -- recompute the
     // column numbers based only on the text in the region
     if (oneRot) {
         qsort(frags, nFrags, sizeof(TextLineFrag), &TextLineFrag::cmpXYLineRot);
         rot = frags[0].line->rot;
         for (i = 0; i < nFrags; ++i) {
             frag0 = &frags[i];
             col1 = 0;
             for (j = 0; j < i; ++j) {
                 frag1 = &frags[j];
                 col2 = 0; // make gcc happy
                 switch (rot) {
                 case 0:
                     if (frag0->xMin >= frag1->xMax) {
                         col2 = frag1->col + (frag1->line->col[frag1->start + frag1->len] - frag1->line->col[frag1->start]) + 1;
                     } else {
                         for (k = frag1->start; k < frag1->start + frag1->len && frag0->xMin >= 0.5 * (frag1->line->edge[k] + frag1->line->edge[k + 1]); ++k) {
                             ;
                         }
                         col2 = frag1->col + frag1->line->col[k] - frag1->line->col[frag1->start];
                     }
                     break;
                 case 1:
                     if (frag0->yMin >= frag1->yMax) {
                         col2 = frag1->col + (frag1->line->col[frag1->start + frag1->len] - frag1->line->col[frag1->start]) + 1;
                     } else {
                         for (k = frag1->start; k < frag1->start + frag1->len && frag0->yMin >= 0.5 * (frag1->line->edge[k] + frag1->line->edge[k + 1]); ++k) {
                             ;
                         }
                         col2 = frag1->col + frag1->line->col[k] - frag1->line->col[frag1->start];
                     }
                     break;
                 case 2:
                     if (frag0->xMax <= frag1->xMin) {
                         col2 = frag1->col + (frag1->line->col[frag1->start + frag1->len] - frag1->line->col[frag1->start]) + 1;
                     } else {
                         for (k = frag1->start; k < frag1->start + frag1->len && frag0->xMax <= 0.5 * (frag1->line->edge[k] + frag1->line->edge[k + 1]); ++k) {
                             ;
                         }
                         col2 = frag1->col + frag1->line->col[k] - frag1->line->col[frag1->start];
                     }
                     break;
                 case 3:
                     if (frag0->yMax <= frag1->yMin) {
                         col2 = frag1->col + (frag1->line->col[frag1->start + frag1->len] - frag1->line->col[frag1->start]) + 1;
                     } else {
                         for (k = frag1->start; k < frag1->start + frag1->len && frag0->yMax <= 0.5 * (frag1->line->edge[k] + frag1->line->edge[k + 1]); ++k) {
                             ;
                         }
                         col2 = frag1->col + frag1->line->col[k] - frag1->line->col[frag1->start];
                     }
                     break;
                 }
                 if (col2 > col1) {
                     col1 = col2;
                 }
             }
             frag0->col = col1;
         }

         // the region includes text at different rotations -- use the
         // globally assigned column numbers, offset by the minimum column
         // number (i.e., shift everything over to column 0)
     } else {
         col1 = frags[0].col;
         for (i = 1; i < nFrags; ++i) {
             if (frags[i].col < col1) {
                 col1 = frags[i].col;
             }
         }
         for (i = 0; i < nFrags; ++i) {
             frags[i].col -= col1;
         }
     }
 }

 int TextPage::dumpFragment(const Unicode *text, int len, const UnicodeMap *uMap, GooString *s) const
 {
     if (uMap->isUnicode()) {
         return reorderText(text, len, uMap, primaryLR, s, nullptr);
     } else {
         int nCols = 0;

         char buf[8];
         int buflen = 0;

         for (int i = 0; i < len; ++i) {
             buflen = uMap->mapUnicode(text[i], buf, sizeof(buf));
             s->append(buf, buflen);
             nCols += buflen;
         }

         return nCols;
     }
 }

 #ifdef TEXTOUT_WORD_LIST
 std::unique_ptr<TextWordList> TextPage::makeWordList(bool physLayout)
 {
     return std::make_unique<TextWordList>(this, physLayout);
 }
 #endif

 //------------------------------------------------------------------------
 // ActualText
 //------------------------------------------------------------------------
 ActualText::ActualText(TextPage *out)
 {
     out->incRefCnt();
     text = out;
     actualText = nullptr;
     actualTextNBytes = 0;
 }

 ActualText::~ActualText()
 {
     if (actualText) {
         delete actualText;
     }
     text->decRefCnt();
 }

 void ActualText::addChar(const GfxState *state, double x, double y, double dx, double dy, CharCode c, int nBytes, const Unicode *u, int uLen)
 {
     if (!actualText) {
         text->addChar(state, x, y, dx, dy, c, nBytes, u, uLen);
         return;
     }

     // Inside ActualText span.
     if (!actualTextNBytes) {
         actualTextX0 = x;
         actualTextY0 = y;
     }
     actualTextX1 = x + dx;
     actualTextY1 = y + dy;
     actualTextNBytes += nBytes;
 }

 void ActualText::begin(const GfxState *state, const GooString *t)
 {
     if (actualText) {
         delete actualText;
     }
     actualText = new GooString(t);
     actualTextNBytes = 0;
 }

 void ActualText::end(const GfxState *state)
 {
     // ActualText span closed. Output the span text and the
     // extents of all the glyphs inside the span

     if (actualTextNBytes) {
         // now that we have the position info for all of the text inside
         // the marked content span, we feed the "ActualText" back through
         // text->addChar()
         std::vector<Unicode> uni = TextStringToUCS4(actualText->toStr());
         text->addChar(state, actualTextX0, actualTextY0, actualTextX1 - actualTextX0, actualTextY1 - actualTextY0, 0, actualTextNBytes, uni.data(), uni.size());
     }

     delete actualText;
     actualText = nullptr;
     actualTextNBytes = 0;
 }

 //------------------------------------------------------------------------
 // TextOutputDev
 //------------------------------------------------------------------------

 static void TextOutputDev_outputToFile(void *stream, const char *text, int len)
 {
     fwrite(text, 1, len, (FILE *)stream);
 }

 TextOutputDev::TextOutputDev(const char *fileName, bool physLayoutA, double fixedPitchA, bool rawOrderA, bool append, bool discardDiagA)
 {
     text = nullptr;
     physLayout = physLayoutA;
     fixedPitch = physLayout ? fixedPitchA : 0;
     rawOrder = rawOrderA;
     discardDiag = discardDiagA;
     doHTML = false;
     textEOL = defaultEndOfLine();
     textPageBreaks = true;
     ok = true;
     minColSpacing1 = minColSpacing1_default;

     // open file
     needClose = false;
     if (fileName) {
         if (!strcmp(fileName, "-")) {
             outputStream = stdout;
 #if defined(_WIN32) || defined(__CYGWIN__)
             // keep DOS from munging the end-of-line characters
             _setmode(fileno(stdout), O_BINARY);
 #endif
         } else if ((outputStream = openFile(fileName, append ? "ab" : "wb"))) {
             needClose = true;
         } else {
             error(errIO, -1, "Couldn't open text file '{0:s}'", fileName);
             ok = false;
             actualText = nullptr;
             return;
         }
         outputFunc = &TextOutputDev_outputToFile;
     } else {
         outputStream = nullptr;
     }

     // set up text object
     text = new TextPage(rawOrderA, discardDiagA);
     actualText = new ActualText(text);
 }

 TextOutputDev::TextOutputDev(TextOutputFunc func, void *stream, bool physLayoutA, double fixedPitchA, bool rawOrderA, bool discardDiagA)
 {
     outputFunc = func;
     outputStream = stream;
     needClose = false;
     physLayout = physLayoutA;
     fixedPitch = physLayout ? fixedPitchA : 0;
     rawOrder = rawOrderA;
     discardDiag = discardDiagA;
     doHTML = false;
     text = new TextPage(rawOrderA, discardDiagA);
     actualText = new ActualText(text);
     textEOL = defaultEndOfLine();
     textPageBreaks = true;
     ok = true;
     minColSpacing1 = minColSpacing1_default;
 }

 TextOutputDev::~TextOutputDev()
 {
     if (needClose) {
         fclose((FILE *)outputStream);
     }
     if (text) {
         text->decRefCnt();
     }
     delete actualText;
 }

 void TextOutputDev::startPage(int pageNum, GfxState *state, XRef *xref)
 {
     text->startPage(state);
 }

 void TextOutputDev::endPage()
 {
     text->endPage();
     text->coalesce(physLayout, fixedPitch, doHTML, minColSpacing1);
     if (outputStream) {
         text->dump(outputStream, outputFunc, physLayout, textEOL, textPageBreaks);
     }
 }

 void TextOutputDev::restoreState(GfxState *state)
 {
     text->updateFont(state);
 }

 void TextOutputDev::updateFont(GfxState *state)
 {
     text->updateFont(state);
 }

 void TextOutputDev::beginString(GfxState *state, const GooString *s) { }

 void TextOutputDev::endString(GfxState *state) { }

 void TextOutputDev::drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode c, int nBytes, const Unicode *u, int uLen)
 {
     actualText->addChar(state, x, y, dx, dy, c, nBytes, u, uLen);
 }

 void TextOutputDev::incCharCount(int nChars)
 {
     text->incCharCount(nChars);
 }

 void TextOutputDev::beginActualText(GfxState *state, const GooString *t)
 {
     actualText->begin(state, t);
 }

 void TextOutputDev::endActualText(GfxState *state)
 {
     actualText->end(state);
 }

 void TextOutputDev::stroke(GfxState *state)
 {
     double x[2], y[2];

     if (!doHTML) {
         return;
     }
     const GfxPath *path = state->getPath();
     if (path->getNumSubpaths() != 1) {
         return;
     }
     const GfxSubpath *subpath = path->getSubpath(0);
     if (subpath->getNumPoints() != 2) {
         return;
     }
     state->transform(subpath->getX(0), subpath->getY(0), &x[0], &y[0]);
     state->transform(subpath->getX(1), subpath->getY(1), &x[1], &y[1]);

     // look for a vertical or horizontal line
     if (x[0] == x[1] || y[0] == y[1]) {
         text->addUnderline(x[0], y[0], x[1], y[1]);
     }
 }

 void TextOutputDev::fill(GfxState *state)
 {
     double x[5], y[5];
     double rx0, ry0, rx1, ry1, t;
     int i;

     if (!doHTML) {
         return;
     }
     const GfxPath *path = state->getPath();
     if (path->getNumSubpaths() != 1) {
         return;
     }
     const GfxSubpath *subpath = path->getSubpath(0);
     if (subpath->getNumPoints() != 5) {
         return;
     }
     for (i = 0; i < 5; ++i) {
         if (subpath->getCurve(i)) {
             return;
         }
         state->transform(subpath->getX(i), subpath->getY(i), &x[i], &y[i]);
     }

     // look for a rectangle
     if (x[0] == x[1] && y[1] == y[2] && x[2] == x[3] && y[3] == y[4] && x[0] == x[4] && y[0] == y[4]) {
         rx0 = x[0];
         ry0 = y[0];
         rx1 = x[2];
         ry1 = y[1];
     } else if (y[0] == y[1] && x[1] == x[2] && y[2] == y[3] && x[3] == x[4] && x[0] == x[4] && y[0] == y[4]) {
         rx0 = x[0];
         ry0 = y[0];
         rx1 = x[1];
         ry1 = y[2];
     } else {
         return;
     }
     if (rx1 < rx0) {
         t = rx0;
         rx0 = rx1;
         rx1 = t;
     }
     if (ry1 < ry0) {
         t = ry0;
         ry0 = ry1;
         ry1 = t;
     }

     // skinny horizontal rectangle
     if (ry1 - ry0 < rx1 - rx0) {
         if (ry1 - ry0 < maxUnderlineWidth) {
             ry0 = 0.5 * (ry0 + ry1);
             text->addUnderline(rx0, ry0, rx1, ry0);
         }

         // skinny vertical rectangle
     } else {
         if (rx1 - rx0 < maxUnderlineWidth) {
             rx0 = 0.5 * (rx0 + rx1);
             text->addUnderline(rx0, ry0, rx0, ry1);
         }
     }
 }

 void TextOutputDev::eoFill(GfxState *state)
 {
     if (!doHTML) {
         return;
     }
     fill(state);
 }

 void TextOutputDev::processLink(AnnotLink *link)
 {
     double x1, y1, x2, y2;
     int xMin, yMin, xMax, yMax, x, y;

     if (!doHTML) {
         return;
     }
     link->getRect(&x1, &y1, &x2, &y2);
     cvtUserToDev(x1, y1, &x, &y);
     xMin = xMax = x;
     yMin = yMax = y;
     cvtUserToDev(x1, y2, &x, &y);
     if (x < xMin) {
         xMin = x;
     } else if (x > xMax) {
         xMax = x;
     }
     if (y < yMin) {
         yMin = y;
     } else if (y > yMax) {
         yMax = y;
     }
     cvtUserToDev(x2, y1, &x, &y);
     if (x < xMin) {
         xMin = x;
     } else if (x > xMax) {
         xMax = x;
     }
     if (y < yMin) {
         yMin = y;
     } else if (y > yMax) {
         yMax = y;
     }
     cvtUserToDev(x2, y2, &x, &y);
     if (x < xMin) {
         xMin = x;
     } else if (x > xMax) {
         xMax = x;
     }
     if (y < yMin) {
         yMin = y;
     } else if (y > yMax) {
         yMax = y;
     }
     text->addLink(xMin, yMin, xMax, yMax, link);
 }

 bool TextOutputDev::findText(const Unicode *s, int len, bool startAtTop, bool stopAtBottom, bool startAtLast, bool stopAtLast, bool caseSensitive, bool backward, bool wholeWord, double *xMin, double *yMin, double *xMax, double *yMax) const
 {
     return text->findText(s, len, startAtTop, stopAtBottom, startAtLast, stopAtLast, caseSensitive, backward, wholeWord, xMin, yMin, xMax, yMax);
 }

 GooString *TextOutputDev::getText(double xMin, double yMin, double xMax, double yMax) const
 {
     return text->getText(xMin, yMin, xMax, yMax, textEOL);
 }

 void TextOutputDev::drawSelection(OutputDev *out, double scale, int rotation, const PDFRectangle *selection, SelectionStyle style, const GfxColor *glyph_color, const GfxColor *box_color)
 {
     text->drawSelection(out, scale, rotation, selection, style, glyph_color, box_color);
 }

 std::vector<PDFRectangle *> *TextOutputDev::getSelectionRegion(const PDFRectangle *selection, SelectionStyle style, double scale)
 {
     return text->getSelectionRegion(selection, style, scale);
 }

 GooString *TextOutputDev::getSelectionText(const PDFRectangle *selection, SelectionStyle style)
 {
     return text->getSelectionText(selection, style);
 }

 bool TextOutputDev::findCharRange(int pos, int length, double *xMin, double *yMin, double *xMax, double *yMax) const
 {
     return text->findCharRange(pos, length, xMin, yMin, xMax, yMax);
 }

 void TextOutputDev::setMergeCombining(bool merge)
 {
     text->setMergeCombining(merge);
 }

 #ifdef TEXTOUT_WORD_LIST
 std::unique_ptr<TextWordList> TextOutputDev::makeWordList()
 {
     return text->makeWordList(physLayout);
 }
 #endif

 TextPage *TextOutputDev::takeText()
 {
     TextPage *ret;

     ret = text;
     text = new TextPage(rawOrder, discardDiag);
     delete actualText;
     actualText = new ActualText(text);
     return ret;
 }

 const TextFlow *TextOutputDev::getFlows() const
 {
     return text->getFlows();
 }