blob: 6041402c8337e948321a6dc315a823a1dbf5eeef [file] [log] [blame] [edit]
/*
* Copyright (C) 2009-2010, Pino Toscano <pino@kde.org>
* Copyright (C) 2018, 2020, Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
* Copyright (C) 2018-2022, Albert Astals Cid <aacid@kde.org>
* Copyright (C) 2018, Zsombor Hollay-Horvath <hollay.horvath@gmail.com>
* Copyright (C) 2018, Aleksey Nikolaev <nae202@gmail.com>
* Copyright (C) 2020, Jiri Jakes <freedesktop@jirijakes.eu>
* Copyright (C) 2020, Adam Reichold <adam.reichold@t-online.de>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef POPPLER_PAGE_H
#define POPPLER_PAGE_H
#include "poppler-global.h"
#include "poppler-rectangle.h"
#include <memory>
namespace poppler {
struct text_box_data;
class POPPLER_CPP_EXPORT text_box
{
friend class page;
public:
text_box(text_box &&) noexcept;
text_box &operator=(text_box &&) noexcept;
~text_box();
ustring text() const;
rectf bbox() const;
/**
\since 0.68
*/
int rotation() const;
/**
Get a bbox for the i-th glyph
This method returns a rectf of the bounding box for
the i-th glyph in the text_box.
\note The text_box object owns the rectf objects,
the caller is not needed to free them.
\warning For too large glyph index, rectf(0,0,0,0)
is returned. The number of the glyphs and ustring
codepoints might be different in some complex scripts.
*/
rectf char_bbox(size_t i) const;
bool has_space_after() const;
/**
\since 0.89
*/
bool has_font_info() const;
/**
Get a writing mode for the i-th glyph
This method returns an enum of the writing mode
for the i-th glyph in the text_box.
\note Usually all glyphs in one text_box have the
same writing mode. Thus the default value of the
glyph index is 0.
*/
enum writing_mode_enum
{
invalid_wmode = -1,
horizontal_wmode = 0,
vertical_wmode = 1
};
/**
\since 0.89
*/
writing_mode_enum get_wmode(int i = 0) const;
/**
Get a font size of this text_box instance.
This method return a double floating value of the
font size from the text_box instance.
*/
/**
\since 0.89
*/
double get_font_size() const;
/**
Get a font name for the i-th glyph
This method returns a std::string object holding
the font name for the i-th glyph.
\note The randomization prefix of the embedded fonts
are not removed. The font names including these
prefixes are insuffucient to determine whether the
two fonts are same or different.
\note The clients should not assume that the
encoding of the font name is one of the ASCII,
Latin1 or UTF-8. Some legacy PDF producers used
in CJK market use GBK, Big5, Wansung or Shift-JIS.
*/
/**
\since 0.89
*/
std::string get_font_name(int i = 0) const;
private:
explicit text_box(text_box_data *data);
std::unique_ptr<text_box_data> m_data;
};
class document;
class document_private;
class page_private;
class page_transition;
class POPPLER_CPP_EXPORT page : public poppler::noncopyable
{
public:
enum orientation_enum
{
landscape,
portrait,
seascape,
upside_down
};
enum search_direction_enum
{
search_from_top,
search_next_result,
search_previous_result
};
enum text_layout_enum
{
physical_layout,
raw_order_layout,
non_raw_non_physical_layout ///< \since 0.88
};
~page();
orientation_enum orientation() const;
double duration() const;
rectf page_rect(page_box_enum box = crop_box) const;
ustring label() const;
page_transition *transition() const;
bool search(const ustring &text, rectf &r, search_direction_enum direction, case_sensitivity_enum case_sensitivity, rotation_enum rotation = rotate_0) const;
ustring text(const rectf &r = rectf()) const;
ustring text(const rectf &r, text_layout_enum layout_mode) const;
/**
Returns a list of text of the page
This method returns a std::vector of text_box that contain all
the text of the page, with roughly one text word of text
per text_box item.
For text written in western languages (left-to-right and
up-to-down), the std::vector contains the text in the proper
order.
\since 0.63
\note The page object owns the text_box objects as unique_ptr,
the caller is not needed to free them.
\warning This method is not tested with Asian scripts
*/
std::vector<text_box> text_list() const;
/*
* text_list_option_enum is a bitmask-style flags for text_list(),
* 0 means the default & simplest behaviour.
*/
enum text_list_option_enum
{
text_list_include_font = 1 // \since 0.89
};
/**
Extended version of text_list() taking an option flag.
The option flag should be the multiple of text_list_option_enum.
\since 0.89
*/
std::vector<text_box> text_list(int opt_flag) const;
private:
page(document_private *doc, int index);
page_private *d;
friend class page_private;
friend class document;
};
}
#endif