//========================================================================
//
// TextOutputDev.h
//
// Copyright 1997-2003 Glyph & Cog, LLC
//
//========================================================================

//========================================================================
//
// Modified under the Poppler project - http://poppler.freedesktop.org
//
// All changes made under the Poppler project to this file are licensed
// under GPL version 2 or later
//
// Copyright (C) 2005-2007 Kristian Høgsberg <krh@redhat.com>
// Copyright (C) 2006 Ed Catmur <ed@catmur.co.uk>
// Copyright (C) 2007, 2008, 2011, 2013 Carlos Garcia Campos <carlosgc@gnome.org>
// Copyright (C) 2007, 2017 Adrian Johnson <ajohnson@redneon.com>
// Copyright (C) 2008, 2010, 2015, 2016, 2018, 2019, 2021 Albert Astals Cid <aacid@kde.org>
// Copyright (C) 2010 Brian Ewins <brian.ewins@gmail.com>
// Copyright (C) 2012, 2013, 2015, 2016 Jason Crain <jason@aquaticape.us>
// Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de>
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
// Copyright (C) 2018 Sanchit Anand <sanxchit@gmail.com>
// Copyright (C) 2018, 2020, 2021 Nelson Benítez León <nbenitezl@gmail.com>
// Copyright (C) 2019, 2022 Oliver Sander <oliver.sander@tu-dresden.de>
// Copyright (C) 2019 Dan Shea <dan.shea@logical-innovations.com>
// Copyright (C) 2020 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
// Copyright (C) 2024 Stefan Brüns <stefan.bruens@rwth-aachen.de>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
//
//========================================================================

#ifndef TEXTOUTPUTDEV_H
#define TEXTOUTPUTDEV_H

#include "poppler-config.h"
#include "poppler_private_export.h"
#include <cstdio>
#include "GfxFont.h"
#include "GfxState.h"
#include "OutputDev.h"

class GooString;
class Gfx;
class GfxFont;
class GfxState;
class UnicodeMap;
class AnnotLink;

class TextWord;
class TextPool;
class TextLine;
class TextLineFrag;
class TextBlock;
class TextFlow;
class TextLink;
class TextUnderline;
class TextWordList;
class TextPage;
class TextSelectionVisitor;

//------------------------------------------------------------------------

typedef void (*TextOutputFunc)(void *stream, const char *text, int len);

enum SelectionStyle
{
    selectionStyleGlyph,
    selectionStyleWord,
    selectionStyleLine
};

enum EndOfLineKind
{
    eolUnix, // LF
    eolDOS, // CR+LF
    eolMac // CR
};

//------------------------------------------------------------------------
// TextFontInfo
//------------------------------------------------------------------------

class POPPLER_PRIVATE_EXPORT TextFontInfo
{
public:
    explicit TextFontInfo(const GfxState *state);
    ~TextFontInfo();

    TextFontInfo(const TextFontInfo &) = delete;
    TextFontInfo &operator=(const TextFontInfo &) = delete;

    bool matches(const GfxState *state) const;
    bool matches(const TextFontInfo *fontInfo) const;
    bool matches(const Ref *ref) const;

    // Get the font ascent, or a default value if the font is not set
    double getAscent() const;

    // Get the font descent, or a default value if the font is not set
    double getDescent() const;

    // Get the writing mode (0 or 1), or 0 if the font is not set
    int getWMode() const;

#ifdef TEXTOUT_WORD_LIST
    // Get the font name (which may be NULL).
    const GooString *getFontName() const { return fontName; }

    // Get font descriptor flags.
    bool isFixedWidth() const { return flags & fontFixedWidth; }
    bool isSerif() const { return flags & fontSerif; }
    bool isSymbolic() const { return flags & fontSymbolic; }
    bool isItalic() const { return flags & fontItalic; }
    bool isBold() const { return flags & fontBold; }
#endif

private:
    std::shared_ptr<GfxFont> gfxFont;
#ifdef TEXTOUT_WORD_LIST
    GooString *fontName;
    int flags;
#endif

    friend class TextWord;
    friend class TextPage;
    friend class TextSelectionPainter;
};

//------------------------------------------------------------------------
// TextWord
//------------------------------------------------------------------------

class POPPLER_PRIVATE_EXPORT TextWord
{
public:
    // Constructor.
    TextWord(const GfxState *state, int rotA, double fontSize);

    // Destructor.
    ~TextWord();

    TextWord(const TextWord &) = delete;
    TextWord &operator=(const TextWord &) = delete;

    // Add a character to the word.
    void addChar(const GfxState *state, TextFontInfo *fontA, double x, double y, double dx, double dy, int charPosA, int charLen, CharCode c, Unicode u, const Matrix &textMatA);

    // Attempt to add a character to the word as a combining character.
    // Either character u or the last character in the word must be an
    // acute, dieresis, or other combining character.  Returns true if
    // the character was added.
    bool addCombining(const GfxState *state, TextFontInfo *fontA, double fontSizeA, double x, double y, double dx, double dy, int charPosA, int charLen, CharCode c, Unicode u, const Matrix &textMatA);

    // Merge <word> onto the end of <this>.
    void merge(TextWord *word);

    // Compares <this> to <word>, returning -1 (<), 0 (=), or +1 (>),
    // based on a primary-axis comparison, e.g., x ordering if rot=0.
    int primaryCmp(const TextWord *word) const;

    // Return the distance along the primary axis between <this> and
    // <word>.
    double primaryDelta(const TextWord *word) const;

    static int cmpYX(const void *p1, const void *p2);

    void visitSelection(TextSelectionVisitor *visitor, const PDFRectangle *selection, SelectionStyle style);

    // Get the TextFontInfo object associated with a character.
    const TextFontInfo *getFontInfo(int idx) const { return chars[idx].font; }

    // Get the next TextWord on the linked list.
    const TextWord *getNext() const { return next; }

#ifdef TEXTOUT_WORD_LIST
    int getLength() const { return chars.size(); }
    const Unicode *getChar(int idx) const { return &chars[idx].text; }
    GooString *getText() const;
    const GooString *getFontName(int idx) const { return chars[idx].font->fontName; }
    void getColor(double *r, double *g, double *b) const
    {
        *r = colorR;
        *g = colorG;
        *b = colorB;
    }
    void getBBox(double *xMinA, double *yMinA, double *xMaxA, double *yMaxA) const
    {
        *xMinA = xMin;
        *yMinA = yMin;
        *xMaxA = xMax;
        *yMaxA = yMax;
    }
    void getCharBBox(int charIdx, double *xMinA, double *yMinA, double *xMaxA, double *yMaxA) const;
    double getFontSize() const { return fontSize; }
    int getRotation() const { return rot; }
    int getCharPos() const { return chars.empty() ? 0 : chars.front().charPos; }
    int getCharLen() const { return chars.empty() ? 0 : chars.back().charPos - chars.front().charPos; }
    bool getSpaceAfter() const { return spaceAfter; }
#endif
    bool isUnderlined() const { return underlined; }
    const AnnotLink *getLink() const { return link; }
    double getEdge(int i) const { return chars[i].edge; }
    double getBaseline() const { return base; }
    bool hasSpaceAfter() const { return spaceAfter; }
    const TextWord *nextWord() const { return next; };
    auto len() const { return chars.size(); }

private:
    void setInitialBounds(TextFontInfo *fontA, double x, double y);

    int rot; // rotation, multiple of 90 degrees
             //   (0, 1, 2, or 3)
    int wMode; // horizontal (0) or vertical (1) writing mode
    double xMin, xMax; // bounding box x coordinates
    double yMin, yMax; // bounding box y coordinates
    double base; // baseline x or y coordinate

    double fontSize; // font size

    struct CharInfo
    {
        Unicode text;
        CharCode charcode;
        int charPos;
        double edge;
        TextFontInfo *font;
        Matrix textMat;
    };
    std::vector<CharInfo> chars;
    int charPosEnd = 0;
    double edgeEnd = 0;

    bool spaceAfter; // set if there is a space between this
                     //   word and the next word on the line
    bool underlined;
    bool invisible; // whether we are invisible (glyphless)
    TextWord *next; // next word in line

#ifdef TEXTOUT_WORD_LIST
    double colorR, // word color
            colorG, colorB;
#endif

    AnnotLink *link;

    friend class TextPool;
    friend class TextLine;
    friend class TextBlock;
    friend class TextFlow;
    friend class TextWordList;
    friend class TextPage;

    friend class TextSelectionPainter;
    friend class TextSelectionDumper;
};

//------------------------------------------------------------------------
// TextPool
//------------------------------------------------------------------------

class TextPool
{
public:
    TextPool();
    ~TextPool();

    TextPool(const TextPool &) = delete;
    TextPool &operator=(const TextPool &) = delete;

    TextWord *getPool(int baseIdx) { return pool[baseIdx - minBaseIdx]; }
    void setPool(int baseIdx, TextWord *p) { pool[baseIdx - minBaseIdx] = p; }

    int getBaseIdx(double base) const;

    void addWord(TextWord *word);

private:
    int minBaseIdx; // min baseline bucket index
    int maxBaseIdx; // max baseline bucket index
    TextWord **pool; // array of linked lists, one for each
                     //   baseline value (multiple of 4 pts)
    TextWord *cursor; // pointer to last-accessed word
    int cursorBaseIdx; // baseline bucket index of last-accessed word

    friend class TextBlock;
    friend class TextPage;
};

struct TextFlowData;

//------------------------------------------------------------------------
// TextLine
//------------------------------------------------------------------------

class TextLine
{
public:
    TextLine(TextBlock *blkA, int rotA, double baseA);
    ~TextLine();

    TextLine(const TextLine &) = delete;
    TextLine &operator=(const TextLine &) = delete;

    void addWord(TextWord *word);

    // Return the distance along the primary axis between <this> and
    // <line>.
    double primaryDelta(const TextLine *line) const;

    // Compares <this> to <line>, returning -1 (<), 0 (=), or +1 (>),
    // based on a primary-axis comparison, e.g., x ordering if rot=0.
    int primaryCmp(const TextLine *line) const;

    // Compares <this> to <line>, returning -1 (<), 0 (=), or +1 (>),
    // based on a secondary-axis comparison of the baselines, e.g., y
    // ordering if rot=0.
    int secondaryCmp(const TextLine *line) const;

    int cmpYX(const TextLine *line) const;

    static int cmpXY(const void *p1, const void *p2);

    void coalesce(const UnicodeMap *uMap);

    void visitSelection(TextSelectionVisitor *visitor, const PDFRectangle *selection, SelectionStyle style);

    // Get the head of the linked list of TextWords.
    const TextWord *getWords() const { return words; }

    // Get the next TextLine on the linked list.
    const TextLine *getNext() const { return next; }

    // Returns true if the last char of the line is a hyphen.
    bool isHyphenated() const { return hyphenated; }

private:
    TextBlock *blk; // parent block
    int rot; // text rotation
    double xMin, xMax; // bounding box x coordinates
    double yMin, yMax; // bounding box y coordinates
    double base; // baseline x or y coordinate
    TextWord *words; // words in this line
    TextWord *lastWord; // last word in this line
    Unicode *text; // Unicode text of the line, including
                   //   spaces between words
    double *edge; // "near" edge x or y coord of each char
                  //   (plus one extra entry for the last char)
    int *col; // starting column number of each Unicode char
    int len; // number of Unicode chars
    int convertedLen; // total number of converted characters
    bool hyphenated; // set if last char is a hyphen
    TextLine *next; // next line in block
    Unicode *normalized; // normalized form of Unicode text
    int normalized_len; // number of normalized Unicode chars
    int *normalized_idx; // indices of normalized chars into Unicode text
    Unicode *ascii_translation; // ascii translation from the normalized text
    int ascii_len; // length of ascii translation text
    int *ascii_idx; // indices of ascii chars into Unicode text of line

    friend class TextLineFrag;
    friend class TextBlock;
    friend class TextFlow;
    friend class TextWordList;
    friend class TextPage;

    friend class TextSelectionPainter;
    friend class TextSelectionSizer;
    friend class TextSelectionDumper;
};

//------------------------------------------------------------------------
// TextBlock
//------------------------------------------------------------------------

class TextBlock
{
public:
    TextBlock(TextPage *pageA, int rotA);
    ~TextBlock();

    TextBlock(const TextBlock &) = delete;
    TextBlock &operator=(const TextBlock &) = delete;

    void addWord(TextWord *word);

    void coalesce(const UnicodeMap *uMap, double fixedPitch);

    // Update this block's priMin and priMax values, looking at <blk>.
    void updatePriMinMax(const TextBlock *blk);

    static int cmpXYPrimaryRot(const void *p1, const void *p2);

    static int cmpYXPrimaryRot(const void *p1, const void *p2);

    int primaryCmp(const TextBlock *blk) const;

    double secondaryDelta(const TextBlock *blk) const;

    // Returns true if <this> is below <blk>, relative to the page's
    // primary rotation.
    bool isBelow(const TextBlock *blk) const;

    void visitSelection(TextSelectionVisitor *visitor, const PDFRectangle *selection, SelectionStyle style);

    // Get the head of the linked list of TextLines.
    const TextLine *getLines() const { return lines; }

    // Get the next TextBlock on the linked list.
    const TextBlock *getNext() const { return next; }

    void getBBox(double *xMinA, double *yMinA, double *xMaxA, double *yMaxA) const
    {
        *xMinA = xMin;
        *yMinA = yMin;
        *xMaxA = xMax;
        *yMaxA = yMax;
    }

    int getLineCount() const { return nLines; }

private:
    bool isBeforeByRule1(const TextBlock *blk1);
    bool isBeforeByRepeatedRule1(const TextBlock *blkList, const TextBlock *blk1);
    bool isBeforeByRule2(const TextBlock *blk1);

    int visitDepthFirst(TextBlock *blkList, int pos1, TextBlock **sorted, int sortPos, bool *visited);
    int visitDepthFirst(TextBlock *blkList, int pos1, TextBlock **sorted, int sortPos, bool *visited, TextBlock **cache, int cacheSize);

    TextPage *page; // the parent page
    int rot; // text rotation
    double xMin, xMax; // bounding box x coordinates
    double yMin, yMax; // bounding box y coordinates
    double priMin, priMax; // whitespace bounding box along primary axis
    double ExMin, ExMax; // extended bounding box x coordinates
    double EyMin, EyMax; // extended bounding box y coordinates
    int tableId; // id of table to which this block belongs
    bool tableEnd; // is this block at end of line of actual table

    TextPool *pool; // pool of words (used only until lines
                    //   are built)
    TextLine *lines; // linked list of lines
    TextLine *curLine; // most recently added line
    int nLines; // number of lines
    int charCount; // number of characters in the block
    int col; // starting column
    int nColumns; // number of columns in the block

    TextBlock *next;
    TextBlock *stackNext;

    friend class TextLine;
    friend class TextLineFrag;
    friend class TextFlow;
    friend class TextWordList;
    friend class TextPage;
    friend class TextSelectionPainter;
    friend class TextSelectionDumper;
};

//------------------------------------------------------------------------
// TextFlow
//------------------------------------------------------------------------

class TextFlow
{
public:
    TextFlow(TextPage *pageA, TextBlock *blk);
    ~TextFlow();

    TextFlow(const TextFlow &) = delete;
    TextFlow &operator=(const TextFlow &) = delete;

    // Add a block to the end of this flow.
    void addBlock(TextBlock *blk);

    // Returns true if <blk> fits below <prevBlk> in the flow, i.e., (1)
    // it uses a font no larger than the last block added to the flow,
    // and (2) it fits within the flow's [priMin, priMax] along the
    // primary axis.
    bool blockFits(const TextBlock *blk, const TextBlock *prevBlk) const;

    // Get the head of the linked list of TextBlocks.
    const TextBlock *getBlocks() const { return blocks; }

    // Get the next TextFlow on the linked list.
    const TextFlow *getNext() const { return next; }

private:
    TextPage *page; // the parent page
    double xMin, xMax; // bounding box x coordinates
    double yMin, yMax; // bounding box y coordinates
    double priMin, priMax; // whitespace bounding box along primary axis
    TextBlock *blocks; // blocks in flow
    TextBlock *lastBlk; // last block in this flow
    TextFlow *next;

    friend class TextWordList;
    friend class TextPage;
};

#ifdef TEXTOUT_WORD_LIST

//------------------------------------------------------------------------
// TextWordList
//------------------------------------------------------------------------

class POPPLER_PRIVATE_EXPORT TextWordList
{
public:
    // Build a flat word list, in content stream order (if
    // text->rawOrder is true), physical layout order (if <physLayout>
    // is true and text->rawOrder is false), or reading order (if both
    // flags are false).
    TextWordList(const TextPage *text, bool physLayout);

    ~TextWordList();

    TextWordList(const TextWordList &) = delete;
    TextWordList &operator=(const TextWordList &) = delete;

    // Return the number of words on the list.
    int getLength() const;

    // Return the <idx>th word from the list.
    TextWord *get(int idx);

private:
    std::vector<TextWord *> words;
};

#endif // TEXTOUT_WORD_LIST

class TextWordSelection
{
public:
    TextWordSelection(const TextWord *wordA, int beginA, int endA) : word(wordA), begin(beginA), end(endA) { }

    const TextWord *getWord() const { return word; }
    int getBegin() const { return begin; }
    int getEnd() const { return end; }

private:
    const TextWord *word;
    int begin;
    int end;

    friend class TextSelectionPainter;
    friend class TextSelectionDumper;
};

//------------------------------------------------------------------------
// TextPage
//------------------------------------------------------------------------

class POPPLER_PRIVATE_EXPORT TextPage
{
public:
    // Constructor.
    explicit TextPage(bool rawOrderA, bool discardDiagA = false);

    TextPage(const TextPage &) = delete;
    TextPage &operator=(const TextPage &) = delete;

    void incRefCnt();
    void decRefCnt();

    // Start a new page.
    void startPage(const GfxState *state);

    // End the current page.
    void endPage();

    // Update the current font.
    void updateFont(const GfxState *state);

    // Begin a new word.
    void beginWord(const GfxState *state);

    // Add a character to the current word.
    void addChar(const GfxState *state, double x, double y, double dx, double dy, CharCode c, int nBytes, const Unicode *u, int uLen);

    // Add <nChars> invisible characters.
    void incCharCount(int nChars);

    // End the current word, sorting it into the list of words.
    void endWord();

    // Add a word, sorting it into the list of words.
    void addWord(TextWord *word);

    // Add a (potential) underline.
    void addUnderline(double x0, double y0, double x1, double y1);

    // Add a hyperlink.
    void addLink(int xMin, int yMin, int xMax, int yMax, AnnotLink *link);

    // Coalesce strings that look like parts of the same line.
    void coalesce(bool physLayout, double fixedPitch, bool doHTML);
    void coalesce(bool physLayout, double fixedPitch, bool doHTML, double minColSpacing1);

    // Find a string.  If <startAtTop> is true, starts looking at the
    // top of the page; else if <startAtLast> is true, starts looking
    // immediately after the last find result; else starts looking at
    // <xMin>,<yMin>.  If <stopAtBottom> is true, stops looking at the
    // bottom of the page; else if <stopAtLast> is true, stops looking
    // just before the last find result; else stops looking at
    // <xMax>,<yMax>.
    bool findText(const Unicode *s, int len, bool startAtTop, bool stopAtBottom, bool startAtLast, bool stopAtLast, bool caseSensitive, bool backward, bool wholeWord, double *xMin, double *yMin, double *xMax, double *yMax);

    // Adds new parameter ignoreDiacritics, which will do diacritics
    // insensitive search, i.e. ignore accents, umlauts, diaeresis,etc.
    // while matching. This option will be ignored if <s> contains characters
    // which are not pure ascii.
    bool findText(const Unicode *s, int len, bool startAtTop, bool stopAtBottom, bool startAtLast, bool stopAtLast, bool caseSensitive, bool ignoreDiacritics, bool backward, bool wholeWord, double *xMin, double *yMin, double *xMax,
                  double *yMax);

    // Adds new parameter <matchAcrossLines>, which allows <s> to match on text
    // spanning from end of a line to the next line. In that case, the rect for
    // the part of match that falls on the next line will be stored in
    // <continueMatch>, and if hyphenation (i.e. ignoring hyphen at end of line)
    // was used while matching at the end of the line prior to <continueMatch>,
    // then <ignoredHyphen> will be true, otherwise will be false.
    // Only finding across two lines is supported, i.e. it won't match where <s>
    // spans more than two lines.
    //
    // <matchAcrossLines> will be ignored if <backward> is true (as that
    // combination has not been implemented yet).
    bool findText(const Unicode *s, int len, bool startAtTop, bool stopAtBottom, bool startAtLast, bool stopAtLast, bool caseSensitive, bool ignoreDiacritics, bool matchAcrossLines, bool backward, bool wholeWord, double *xMin, double *yMin,
                  double *xMax, double *yMax, PDFRectangle *continueMatch, bool *ignoredHyphen);

    // Get the text which is inside the specified rectangle.
    GooString *getText(double xMin, double yMin, double xMax, double yMax, EndOfLineKind textEOL) const;

    void visitSelection(TextSelectionVisitor *visitor, const PDFRectangle *selection, SelectionStyle style);

    void drawSelection(OutputDev *out, double scale, int rotation, const PDFRectangle *selection, SelectionStyle style, const GfxColor *glyph_color, const GfxColor *box_color);

    std::vector<PDFRectangle *> *getSelectionRegion(const PDFRectangle *selection, SelectionStyle style, double scale);

    GooString *getSelectionText(const PDFRectangle *selection, SelectionStyle style);

    std::vector<TextWordSelection *> **getSelectionWords(const PDFRectangle *selection, SelectionStyle style, int *nLines);

    // Find a string by character position and length.  If found, sets
    // the text bounding rectangle and returns true; otherwise returns
    // false.
    bool findCharRange(int pos, int length, double *xMin, double *yMin, double *xMax, double *yMax) const;

    // Dump contents of page to a file.
    void dump(void *outputStream, TextOutputFunc outputFunc, bool physLayout, EndOfLineKind textEOL, bool pageBreaks);

    // Get the head of the linked list of TextFlows.
    const TextFlow *getFlows() const { return flows; }

    // If true, will combine characters when a base and combining
    // character are drawn on eachother.
    void setMergeCombining(bool merge);

#ifdef TEXTOUT_WORD_LIST
    // Build a flat word list, in content stream order (if
    // this->rawOrder is true), physical layout order (if <physLayout>
    // is true and this->rawOrder is false), or reading order (if both
    // flags are false).
    std::unique_ptr<TextWordList> makeWordList(bool physLayout);
#endif

private:
    // Destructor.
    ~TextPage();

    void clear();
    void assignColumns(TextLineFrag *frags, int nFrags, bool rot) const;
    int dumpFragment(const Unicode *text, int len, const UnicodeMap *uMap, GooString *s) const;
    void adjustRotation(TextLine *line, int start, int end, double *xMin, double *xMax, double *yMin, double *yMax);

    bool rawOrder; // keep text in content stream order
    bool discardDiag; // discard diagonal text
    bool mergeCombining; // merge when combining and base characters
                         // are drawn on top of each other

    double pageWidth, pageHeight; // width and height of current page
    TextWord *curWord; // currently active string
    int charPos; // next character position (within content
                 //   stream)
    TextFontInfo *curFont; // current font
    double curFontSize; // current font size
    int nest; // current nesting level (for Type 3 fonts)
    int nTinyChars; // number of "tiny" chars seen so far
    bool lastCharOverlap; // set if the last added char overlapped the
                          //   previous char
    bool diagonal; // whether the current text is diagonal

    std::unique_ptr<TextPool> pools[4]; // a "pool" of TextWords for each rotation
    TextFlow *flows; // linked list of flows
    TextBlock **blocks; // array of blocks, in yx order
    int nBlocks; // number of blocks
    int primaryRot; // primary rotation
    bool primaryLR; // primary direction (true means L-to-R,
                    //   false means R-to-L)
    TextWord *rawWords; // list of words, in raw order (only if
                        //   rawOrder is set)
    TextWord *rawLastWord; // last word on rawWords list

    std::vector<std::unique_ptr<TextFontInfo>> fonts; // all font info objects used on this page

    double lastFindXMin, // coordinates of the last "find" result
            lastFindYMin;
    bool haveLastFind;

    std::vector<std::unique_ptr<TextUnderline>> underlines;
    std::vector<std::unique_ptr<TextLink>> links;

    int refCnt;

    friend class TextLine;
    friend class TextLineFrag;
    friend class TextBlock;
    friend class TextFlow;
    friend class TextWordList;
    friend class TextSelectionPainter;
    friend class TextSelectionDumper;
};

//------------------------------------------------------------------------
// ActualText
//------------------------------------------------------------------------

class POPPLER_PRIVATE_EXPORT ActualText
{
public:
    // Create an ActualText
    explicit ActualText(TextPage *out);
    ~ActualText();

    ActualText(const ActualText &) = delete;
    ActualText &operator=(const ActualText &) = delete;

    void addChar(const GfxState *state, double x, double y, double dx, double dy, CharCode c, int nBytes, const Unicode *u, int uLen);
    void begin(const GfxState *state, const GooString *text);
    void end(const GfxState *state);

private:
    TextPage *text;

    GooString *actualText; // replacement text for the span
    double actualTextX0;
    double actualTextY0;
    double actualTextX1;
    double actualTextY1;
    int actualTextNBytes;
};

//------------------------------------------------------------------------
// TextOutputDev
//------------------------------------------------------------------------

class POPPLER_PRIVATE_EXPORT TextOutputDev : public OutputDev
{
public:
    static double minColSpacing1_default;

    // Open a text output file.  If <fileName> is NULL, no file is
    // written (this is useful, e.g., for searching text).  If
    // <physLayoutA> is true, the original physical layout of the text
    // is maintained.  If <rawOrder> is true, the text is kept in
    // content stream order.  If <discardDiag> is true, diagonal text
    // is removed from output.
    TextOutputDev(const char *fileName, bool physLayoutA, double fixedPitchA, bool rawOrderA, bool append, bool discardDiagA = false);

    // Create a TextOutputDev which will write to a generic stream.  If
    // <physLayoutA> is true, the original physical layout of the text
    // is maintained.  If <rawOrder> is true, the text is kept in
    // content stream order.  If <discardDiag> is true, diagonal text
    // is removed from output.
    TextOutputDev(TextOutputFunc func, void *stream, bool physLayoutA, double fixedPitchA, bool rawOrderA, bool discardDiagA = false);

    // Destructor.
    ~TextOutputDev() override;

    // Check if file was successfully created.
    virtual bool isOk() { return ok; }

    //---- get info about output device

    // Does this device use upside-down coordinates?
    // (Upside-down means (0,0) is the top left corner of the page.)
    bool upsideDown() override { return true; }

    // Does this device use drawChar() or drawString()?
    bool useDrawChar() override { return true; }

    // Does this device use beginType3Char/endType3Char?  Otherwise,
    // text in Type 3 fonts will be drawn with drawChar/drawString.
    bool interpretType3Chars() override { return false; }

    // Does this device need non-text content?
    bool needNonText() override { return false; }

    // Does this device require incCharCount to be called for text on
    // non-shown layers?
    bool needCharCount() override { return true; }

    //----- initialization and control

    // Start a page.
    void startPage(int pageNum, GfxState *state, XRef *xref) override;

    // End a page.
    void endPage() override;

    //----- save/restore graphics state
    void restoreState(GfxState *state) override;

    //----- update text state
    void updateFont(GfxState *state) override;

    //----- text drawing
    void beginString(GfxState *state, const GooString *s) override;
    void endString(GfxState *state) override;
    void drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode c, int nBytes, const Unicode *u, int uLen) override;
    void incCharCount(int nChars) override;
    void beginActualText(GfxState *state, const GooString *text) override;
    void endActualText(GfxState *state) override;

    //----- path painting
    void stroke(GfxState *state) override;
    void fill(GfxState *state) override;
    void eoFill(GfxState *state) override;

    //----- link borders
    void processLink(AnnotLink *link) override;

    //----- special access

    // Find a string.  If <startAtTop> is true, starts looking at the
    // top of the page; else if <startAtLast> is true, starts looking
    // immediately after the last find result; else starts looking at
    // <xMin>,<yMin>.  If <stopAtBottom> is true, stops looking at the
    // bottom of the page; else if <stopAtLast> is true, stops looking
    // just before the last find result; else stops looking at
    // <xMax>,<yMax>.
    bool findText(const Unicode *s, int len, bool startAtTop, bool stopAtBottom, bool startAtLast, bool stopAtLast, bool caseSensitive, bool backward, bool wholeWord, double *xMin, double *yMin, double *xMax, double *yMax) const;

    // Get the text which is inside the specified rectangle.
    GooString *getText(double xMin, double yMin, double xMax, double yMax) const;

    // Find a string by character position and length.  If found, sets
    // the text bounding rectangle and returns true; otherwise returns
    // false.
    bool findCharRange(int pos, int length, double *xMin, double *yMin, double *xMax, double *yMax) const;

    void drawSelection(OutputDev *out, double scale, int rotation, const PDFRectangle *selection, SelectionStyle style, const GfxColor *glyph_color, const GfxColor *box_color);

    std::vector<PDFRectangle *> *getSelectionRegion(const PDFRectangle *selection, SelectionStyle style, double scale);

    GooString *getSelectionText(const PDFRectangle *selection, SelectionStyle style);

    // If true, will combine characters when a base and combining
    // character are drawn on eachother.
    void setMergeCombining(bool merge);

#ifdef TEXTOUT_WORD_LIST
    // Build a flat word list, in content stream order (if
    // this->rawOrder is true), physical layout order (if
    // this->physLayout is true and this->rawOrder is false), or reading
    // order (if both flags are false).
    std::unique_ptr<TextWordList> makeWordList();
#endif

    // Returns the TextPage object for the last rasterized page,
    // transferring ownership to the caller.
    TextPage *takeText();

    // Turn extra processing for HTML conversion on or off.
    void enableHTMLExtras(bool doHTMLA) { doHTML = doHTMLA; }

    // Get the head of the linked list of TextFlows for the
    // last rasterized page.
    const TextFlow *getFlows() const;

    static constexpr EndOfLineKind defaultEndOfLine()
    {
#if defined(_WIN32)
        return eolDOS;
#else
        return eolUnix;
#endif
    }
    void setTextEOL(EndOfLineKind textEOLA) { textEOL = textEOLA; }
    void setTextPageBreaks(bool textPageBreaksA) { textPageBreaks = textPageBreaksA; }
    double getMinColSpacing1() const { return minColSpacing1; }
    void setMinColSpacing1(double val) { minColSpacing1 = val; }

private:
    TextOutputFunc outputFunc; // output function
    void *outputStream; // output stream
    bool needClose; // need to close the output file?
                    //   (only if outputStream is a FILE*)
    TextPage *text; // text for the current page
    bool physLayout; // maintain original physical layout when
                     //   dumping text
    double fixedPitch; // if physLayout is true and this is non-zero,
                       //   assume fixed-pitch characters with this
                       //   width
    double minColSpacing1; // see default value defined with same name at TextOutputDev.cc
    bool rawOrder; // keep text in content stream order
    bool discardDiag; // Diagonal text, i.e., text that is not close to one of the
                      // 0, 90, 180, or 270 degree axes, is discarded. This is useful
                      // to skip watermarks drawn on top of body text, etc.
    bool doHTML; // extra processing for HTML conversion
    bool ok; // set up ok?
    bool textPageBreaks; // insert end-of-page markers?
    EndOfLineKind textEOL; // type of EOL marker to use

    ActualText *actualText;
};

#endif
