| /* |
| * Copyright (C) 2009-2010, Pino Toscano <pino@kde.org> |
| * Copyright (C) 2017, 2018, Albert Astals Cid <aacid@kde.org> |
| * Copyright (C) 2017, Jason Alan Palmer <jalanpalmer@gmail.com> |
| * Copyright (C) 2018, Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp> |
| * Copyright (C) 2019, Masamichi Hosoda <trueroad@trueroad.jp> |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License as published by |
| * the Free Software Foundation; either version 2, or (at your option) |
| * any later version. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, write to the Free Software |
| * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. |
| */ |
| |
| #include <goo/glibc.h> |
| #include <poppler-destination.h> |
| #include <poppler-document.h> |
| #include <poppler-embedded-file.h> |
| #include <poppler-font.h> |
| #include <poppler-page.h> |
| #include <poppler-toc.h> |
| #include <poppler-version.h> |
| |
| #include <cstdlib> |
| #include <cstring> |
| #include <ctime> |
| #include <algorithm> |
| #include <iomanip> |
| #include <ios> |
| #include <iostream> |
| #include <map> |
| #include <memory> |
| #include <sstream> |
| |
| #include "parseargs.h" |
| |
| #include "config.h" |
| |
| static const int out_width = 30; |
| |
| bool show_all = false; |
| bool show_info = false; |
| bool show_perm = false; |
| bool show_metadata = false; |
| bool show_toc = false; |
| bool show_fonts = false; |
| bool show_embedded_files = false; |
| bool show_pages = false; |
| bool show_destinations = false; |
| bool show_help = false; |
| bool show_version = false; |
| char show_text[32]; |
| bool show_text_list = false; |
| poppler::page::text_layout_enum show_text_layout = poppler::page::physical_layout; |
| |
| static const ArgDesc the_args[] = { |
| { "--show-all", argFlag, &show_all, 0, |
| "show all the available information" }, |
| { "--show-info", argFlag, &show_info, 0, |
| "show general document information" }, |
| { "--show-perm", argFlag, &show_perm, 0, |
| "show document permissions" }, |
| { "--show-metadata", argFlag, &show_metadata, 0, |
| "show document metadata" }, |
| { "--show-toc", argFlag, &show_toc, 0, |
| "show the TOC" }, |
| { "--show-fonts", argFlag, &show_fonts, 0, |
| "show the document fonts" }, |
| { "--show-embedded-files", argFlag, &show_embedded_files, 0, |
| "show the document-level embedded files" }, |
| { "--show-pages", argFlag, &show_pages, 0, |
| "show pages information" }, |
| { "--show-destinations", argFlag, &show_destinations, 0, |
| "show named destinations" }, |
| { "--show-text", argString, &show_text, sizeof(show_text), |
| "show text (physical|raw) extracted from all pages" }, |
| { "--show-text-list", argFlag, &show_text_list, 0, |
| "show text list (experimental)" }, |
| { "-h", argFlag, &show_help, 0, |
| "print usage information" }, |
| { "--help", argFlag, &show_help, 0, |
| "print usage information" }, |
| { "--version", argFlag, &show_version, 0, |
| "print poppler version" }, |
| { nullptr, argFlag, nullptr, 0, nullptr } |
| }; |
| |
| static void error(const std::string &msg) |
| { |
| std::cerr << "Error: " << msg << std::endl; |
| std::cerr << "Exiting..." << std::endl; |
| exit(1); |
| } |
| |
| static std::ostream& operator<<(std::ostream& stream, const poppler::ustring &str) |
| { |
| const poppler::byte_array ba = str.to_utf8(); |
| for (const char c : ba) { |
| stream << c; |
| } |
| return stream; |
| } |
| |
| static std::string out_date(std::time_t date) |
| { |
| if (date != std::time_t(-1)) { |
| struct tm time; |
| gmtime_r(&date, &time); |
| struct tm *t = &time; |
| char buf[32]; |
| strftime(buf, sizeof(buf) - 1, "%d/%m/%Y %H:%M:%S", t); |
| return std::string(buf); |
| } |
| return std::string("n/a"); |
| } |
| |
| static std::string out_size(int size) |
| { |
| if (size >= 0) { |
| std::ostringstream ss; |
| ss << size; |
| return ss.str(); |
| } |
| return std::string("n/a"); |
| } |
| |
| static char charToHex(int x) |
| { |
| return x < 10 ? x + '0' : x - 10 + 'a'; |
| } |
| |
| static std::string out_hex_string(const poppler::byte_array &str) |
| { |
| std::string ret(str.size() * 2, '\0'); |
| const char *str_p = &str[0]; |
| for (unsigned int i = 0; i < str.size(); ++i, ++str_p) { |
| ret[i * 2] = charToHex((*str_p & 0xf0) >> 4); |
| ret[i * 2 + 1] = charToHex(*str_p & 0xf); |
| } |
| return ret; |
| } |
| |
| static std::string out_page_orientation(poppler::page::orientation_enum o) |
| { |
| switch (o) { |
| case poppler::page::landscape: |
| return "landscape (90)"; |
| case poppler::page::portrait: |
| return "portrait (0)"; |
| case poppler::page::seascape: |
| return "seascape (270)"; |
| case poppler::page::upside_down: |
| return "upside_downs (180)"; |
| }; |
| return "<unknown orientation>"; |
| } |
| |
| static std::string out_font_info_type(poppler::font_info::type_enum t) |
| { |
| #define OUT_FONT_TYPE(thetype) case poppler::font_info::thetype: return #thetype |
| switch (t) { |
| OUT_FONT_TYPE(unknown); |
| OUT_FONT_TYPE(type1); |
| OUT_FONT_TYPE(type1c); |
| OUT_FONT_TYPE(type1c_ot); |
| OUT_FONT_TYPE(type3); |
| OUT_FONT_TYPE(truetype); |
| OUT_FONT_TYPE(truetype_ot); |
| OUT_FONT_TYPE(cid_type0); |
| OUT_FONT_TYPE(cid_type0c); |
| OUT_FONT_TYPE(cid_type0c_ot); |
| OUT_FONT_TYPE(cid_truetype); |
| OUT_FONT_TYPE(cid_truetype_ot); |
| } |
| return "<unknown font type>"; |
| #undef OUT_FONT_TYPE |
| } |
| |
| static void print_info(poppler::document *doc) |
| { |
| std::cout << "Document information:" << std::endl; |
| int major = 0, minor = 0; |
| doc->get_pdf_version(&major, &minor); |
| std::cout << std::setw(out_width) << "PDF version" << ": " << major << "." << minor << std::endl; |
| std::string permanent_id, update_id; |
| if (doc->get_pdf_id(&permanent_id, &update_id)) { |
| std::cout << std::setw(out_width) << "PDF IDs" << ": P: " << permanent_id << " - U: " << update_id << std::endl; |
| } else { |
| std::cout << std::setw(out_width) << "PDF IDs" << ": <none>" << std::endl; |
| } |
| const std::vector<std::string> keys = doc->info_keys(); |
| std::vector<std::string>::const_iterator key_it = keys.begin(), key_end = keys.end(); |
| for (; key_it != key_end; ++key_it) { |
| std::cout << std::setw(out_width) << *key_it << ": " << doc->info_key(*key_it) << std::endl; |
| } |
| std::cout << std::setw(out_width) << "Date (creation)" << ": " << out_date(doc->info_date("CreationDate")) << std::endl; |
| std::cout << std::setw(out_width) << "Date (modification)" << ": " << out_date(doc->info_date("ModDate")) << std::endl; |
| std::cout << std::setw(out_width) << "Number of pages" << ": " << doc->pages() << std::endl; |
| std::cout << std::setw(out_width) << "Linearized" << ": " << doc->is_linearized() << std::endl; |
| std::cout << std::setw(out_width) << "Encrypted" << ": " << doc->is_encrypted() << std::endl; |
| std::cout << std::endl; |
| } |
| |
| static void print_perm(poppler::document *doc) |
| { |
| std::cout << "Document permissions:" << std::endl; |
| #define OUT_PERM(theperm) \ |
| std::cout << std::setw(out_width) << #theperm << ": " \ |
| << doc->has_permission(poppler::perm_##theperm) << std::endl |
| OUT_PERM(print); |
| OUT_PERM(change); |
| OUT_PERM(copy); |
| OUT_PERM(add_notes); |
| OUT_PERM(fill_forms); |
| OUT_PERM(accessibility); |
| OUT_PERM(assemble); |
| OUT_PERM(print_high_resolution); |
| std::cout << std::endl; |
| #undef OUT_PERM |
| } |
| |
| static void print_metadata(poppler::document *doc) |
| { |
| std::cout << std::setw(out_width) << "Metadata" << ":" << std::endl |
| << doc->metadata() << std::endl; |
| std::cout << std::endl; |
| } |
| |
| static void print_toc_item(poppler::toc_item *item, int indent) |
| { |
| std::cout << std::setw(indent * 2) << " " |
| << "+ " << item->title() << " (" << item->is_open() << ")" |
| << std::endl; |
| poppler::toc_item::iterator it = item->children_begin(), it_end = item->children_end(); |
| for (; it != it_end; ++it) { |
| print_toc_item(*it, indent + 1); |
| } |
| } |
| |
| static void print_toc(poppler::toc *doctoc) |
| { |
| std::cout << "Document TOC:" << std::endl; |
| if (doctoc) { |
| print_toc_item(doctoc->root(), 0); |
| } else { |
| std::cout << "<no TOC>" << std::endl; |
| } |
| std::cout << std::endl; |
| } |
| |
| static void print_fonts(poppler::document *doc) |
| { |
| std::cout << "Document fonts:" << std::endl; |
| std::vector<poppler::font_info> fl = doc->fonts(); |
| if (!fl.empty()) { |
| std::vector<poppler::font_info>::const_iterator it = fl.begin(), it_end = fl.end(); |
| const std::ios_base::fmtflags f = std::cout.flags(); |
| std::left(std::cout); |
| for (; it != it_end; ++it) { |
| std::cout |
| << " " << std::setw(out_width + 10) << it->name() |
| << " " << std::setw(15) << out_font_info_type(it->type()) |
| << " " << std::setw(5) << it->is_embedded() |
| << " " << std::setw(5) << it->is_subset() |
| << " " << it->file() |
| << std::endl; |
| } |
| std::cout.flags(f); |
| } else { |
| std::cout << "<no fonts>" << std::endl; |
| } |
| std::cout << std::endl; |
| } |
| |
| static void print_embedded_files(poppler::document *doc) |
| { |
| std::cout << "Document embedded files:" << std::endl; |
| std::vector<poppler::embedded_file *> ef = doc->embedded_files(); |
| if (!ef.empty()) { |
| std::vector<poppler::embedded_file *>::const_iterator it = ef.begin(), it_end = ef.end(); |
| const std::ios_base::fmtflags flags = std::cout.flags(); |
| std::left(std::cout); |
| for (; it != it_end; ++it) { |
| poppler::embedded_file *f = *it; |
| std::cout |
| << " " << std::setw(out_width + 10) << f->name() |
| << " " << std::setw(10) << out_size(f->size()) |
| << " " << std::setw(20) << out_date(f->creation_date()) |
| << " " << std::setw(20) << out_date(f->modification_date()) |
| << std::endl |
| << " "; |
| if (f->description().empty()) { |
| std::cout << "<no description>"; |
| } else { |
| std::cout << f->description(); |
| } |
| std::cout |
| << std::endl |
| << " " << std::setw(35) << (f->checksum().empty() ? std::string("<no checksum>") : out_hex_string(f->checksum())) |
| << " " << (f->mime_type().empty() ? std::string("<no mime type>") : f->mime_type()) |
| << std::endl; |
| } |
| std::cout.flags(flags); |
| } else { |
| std::cout << "<no embedded files>" << std::endl; |
| } |
| std::cout << std::endl; |
| } |
| |
| static void print_page(poppler::page *p) |
| { |
| if (p) { |
| std::cout << std::setw(out_width) << "Rect" << ": " << p->page_rect() << std::endl; |
| std::cout << std::setw(out_width) << "Label" << ": " << p->label() << std::endl; |
| std::cout << std::setw(out_width) << "Duration" << ": " << p->duration() << std::endl; |
| std::cout << std::setw(out_width) << "Orientation" << ": " << out_page_orientation(p->orientation()) << std::endl; |
| } else { |
| std::cout << std::setw(out_width) << "Broken Page. Could not be parsed" << std::endl; |
| } |
| std::cout << std::endl; |
| } |
| |
| static void print_destination(const poppler::destination *d) |
| { |
| if (d) { |
| std::cout << std::setw(out_width) << "Type" << ": "; |
| switch (d->type()) { |
| case poppler::destination::unknown: |
| std::cout << "unknown" << std::endl; |
| break; |
| case poppler::destination::xyz: |
| std::cout << "xyz" << std::endl |
| << std::setw(out_width) |
| << "Page" << ": " << d->page_number() << std::endl |
| << std::setw(out_width) |
| << "Left" << ": " << d->left() << std::endl |
| << std::setw(out_width) |
| << "Top" << ": " << d->top() << std::endl |
| << std::setw(out_width) |
| << "Zoom" << ": " << d->zoom() << std::endl; |
| break; |
| case poppler::destination::fit: |
| std::cout << "fit" << std::endl |
| << std::setw(out_width) |
| << "Page" << ": " << d->page_number() << std::endl; |
| break; |
| case poppler::destination::fit_h: |
| std::cout << "fit_h" << std::endl |
| << std::setw(out_width) |
| << "Page" << ": " << d->page_number() << std::endl |
| << std::setw(out_width) |
| << "Top" << ": " << d->top() << std::endl; |
| break; |
| case poppler::destination::fit_v: |
| std::cout << "fit_v" << std::endl |
| << std::setw(out_width) |
| << "Page" << ": " << d->page_number() << std::endl |
| << std::setw(out_width) |
| << "Left" << ": " << d->left() << std::endl; |
| break; |
| case poppler::destination::fit_r: |
| std::cout << "fit_r" << std::endl |
| << std::setw(out_width) |
| << "Page" << ": " << d->page_number() << std::endl |
| << std::setw(out_width) |
| << "Left" << ": " << d->left() << std::endl |
| << std::setw(out_width) |
| << "Bottom" << ": " << d->bottom() << std::endl |
| << std::setw(out_width) |
| << "Right" << ": " << d->right() << std::endl |
| << std::setw(out_width) |
| << "Top" << ": " << d->top() << std::endl; |
| break; |
| case poppler::destination::fit_b: |
| std::cout << "fit_b" << std::endl |
| << std::setw(out_width) |
| << "Page" << ": " << d->page_number() << std::endl; |
| break; |
| case poppler::destination::fit_b_h: |
| std::cout << "fit_b_h" << std::endl |
| << std::setw(out_width) |
| << "Page" << ": " << d->page_number() << std::endl |
| << std::setw(out_width) |
| << "Top" << ": " << d->top() << std::endl; |
| break; |
| case poppler::destination::fit_b_v: |
| std::cout << "fit_b_v" << std::endl |
| << std::setw(out_width) |
| << "Page" << ": " << d->page_number() << std::endl |
| << std::setw(out_width) |
| << "Left" << ": " << d->left() << std::endl; |
| break; |
| default: |
| std::cout << "error" << std::endl; |
| break; |
| } |
| } |
| std::cout << std::endl; |
| } |
| |
| static void print_page_text(poppler::page *p) |
| { |
| if (p) { |
| std::cout << p->text(poppler::rectf(), show_text_layout) << std::endl; |
| } else { |
| std::cout << std::setw(out_width) << "Broken Page. Could not be parsed" << std::endl; |
| } |
| std::cout << std::endl; |
| } |
| |
| static void print_page_text_list(poppler::page *p) |
| { |
| if (!p) { |
| std::cout << std::setw(out_width) << "Broken Page. Could not be parsed" << std::endl; |
| std::cout << std::endl; |
| return; |
| } |
| auto text_list = p->text_list(); |
| |
| std::cout << "---" << std::endl; |
| for (size_t i = 0; i < text_list.size(); i ++) { |
| poppler::rectf bbox = text_list[i].bbox(); |
| poppler::ustring ustr = text_list[i].text(); |
| std::cout << "[" << ustr << "] @ "; |
| std::cout << "( x=" << bbox.x() << " y=" << bbox.y() << " w=" << bbox.width() << " h=" << bbox.height() << " )"; |
| std::cout << std::endl; |
| |
| } |
| std::cout << "---" << std::endl; |
| } |
| |
| |
| int main(int argc, char *argv[]) |
| { |
| if (!parseArgs(the_args, &argc, argv) |
| || argc < 2 || show_help) { |
| printUsage(argv[0], "DOCUMENT", the_args); |
| exit(1); |
| } |
| |
| if (show_text[0]) { |
| if (!memcmp(show_text, "physical", 9)) { |
| show_text_layout = poppler::page::physical_layout; |
| } else if (!memcmp(show_text, "raw", 4)) { |
| show_text_layout = poppler::page::raw_order_layout; |
| } else { |
| error(std::string("unrecognized text mode: '") + show_text + "'"); |
| } |
| } |
| |
| std::string file_name(argv[1]); |
| |
| std::unique_ptr<poppler::document> doc(poppler::document::load_from_file(file_name)); |
| if (!doc.get()) { |
| error("loading error"); |
| } |
| if (doc->is_locked()) { |
| error("encrypted document"); |
| } |
| |
| std::cout.setf(std::ios_base::boolalpha); |
| |
| if (show_all) { |
| show_info = true; |
| show_perm = true; |
| show_metadata = true; |
| show_toc = true; |
| show_fonts = true; |
| show_embedded_files = true; |
| show_pages = true; |
| } |
| |
| if (show_version) { |
| std::cout << std::setw(out_width) << "Compiled" << ": poppler-cpp " |
| << POPPLER_VERSION << std::endl |
| << std::setw(out_width) << "Running" << ": poppler-cpp " |
| << poppler::version_string() << std::endl; |
| } |
| if (show_info) { |
| print_info(doc.get()); |
| } |
| if (show_perm) { |
| print_perm(doc.get()); |
| } |
| if (show_metadata) { |
| print_metadata(doc.get()); |
| } |
| if (show_toc) { |
| std::unique_ptr<poppler::toc> doctoc(doc->create_toc()); |
| print_toc(doctoc.get()); |
| } |
| if (show_fonts) { |
| print_fonts(doc.get()); |
| } |
| if (show_embedded_files) { |
| print_embedded_files(doc.get()); |
| } |
| if (show_pages) { |
| const int pages = doc->pages(); |
| for (int i = 0; i < pages; ++i) { |
| std::cout << "Page " << (i + 1) << "/" << pages << ":" << std::endl; |
| std::unique_ptr<poppler::page> p(doc->create_page(i)); |
| print_page(p.get()); |
| } |
| } |
| if (show_destinations) { |
| auto map = doc->create_destination_map(); |
| for (const auto &pair: map) { |
| std::string s = pair.first; |
| for (auto &c: s) { |
| if (c < 0x20 || c>0x7e ) |
| c = '.'; |
| } |
| std::cout << "Named destination \"" << s << "\":" << std::endl; |
| print_destination(&pair.second); |
| } |
| } |
| if (show_text[0]) { |
| const int pages = doc->pages(); |
| for (int i = 0; i < pages; ++i) { |
| std::cout << "Page " << (i + 1) << "/" << pages << ":" << std::endl; |
| std::unique_ptr<poppler::page> p(doc->create_page(i)); |
| print_page_text(p.get()); |
| } |
| } |
| if (show_text_list) { |
| const int pages = doc->pages(); |
| for (int i = 0; i < pages; ++i) { |
| std::cout << "Page " << (i + 1) << "/" << pages << ":" << std::endl; |
| std::unique_ptr<poppler::page> p(doc->create_page(i)); |
| print_page_text_list(p.get()); |
| } |
| } |
| |
| return 0; |
| } |