| //======================================================================== |
| // |
| // pdfdetach.cc |
| // |
| // Copyright 2010 Glyph & Cog, LLC |
| // |
| //======================================================================== |
| |
| //======================================================================== |
| // |
| // Modified under the Poppler project - http://poppler.freedesktop.org |
| // |
| // All changes made under the Poppler project to this file are licensed |
| // under GPL version 2 or later |
| // |
| // Copyright (C) 2011 Carlos Garcia Campos <carlosgc@gnome.org> |
| // Copyright (C) 2013 Yury G. Kudryashov <urkud.urkud@gmail.com> |
| // Copyright (C) 2014, 2017 Adrian Johnson <ajohnson@redneon.com> |
| // Copyright (C) 2018, 2020, 2022, 2024 Albert Astals Cid <aacid@kde.org> |
| // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de> |
| // Copyright (C) 2019, 2021, 2024 Oliver Sander <oliver.sander@tu-dresden.de> |
| // Copyright (C) 2020 <r.coeffier@bee-buzziness.com> |
| // Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk> |
| // |
| // To see a description of the changes please see the Changelog file that |
| // came with your tarball or type make ChangeLog if you are building from git |
| // |
| //======================================================================== |
| |
| #include "config.h" |
| #include <poppler-config.h> |
| #include <cstdio> |
| #include "goo/gmem.h" |
| #include "parseargs.h" |
| #include "Annot.h" |
| #include "GlobalParams.h" |
| #include "Page.h" |
| #include "PDFDoc.h" |
| #include "PDFDocFactory.h" |
| #include "FileSpec.h" |
| #include "CharTypes.h" |
| #include "Catalog.h" |
| #include "UnicodeMap.h" |
| #include "PDFDocEncoding.h" |
| #include "Error.h" |
| #include "UTF.h" |
| #include "Win32Console.h" |
| |
| #include <filesystem> |
| |
| static bool doList = false; |
| static int saveNum = 0; |
| static char saveFile[128] = ""; |
| static bool saveAll = false; |
| static char savePath[1024] = ""; |
| static char textEncName[128] = ""; |
| static char ownerPassword[33] = "\001"; |
| static char userPassword[33] = "\001"; |
| static bool printVersion = false; |
| static bool printHelp = false; |
| |
| static const ArgDesc argDesc[] = { { "-list", argFlag, &doList, 0, "list all embedded files" }, |
| { "-save", argInt, &saveNum, 0, "save the specified embedded file (file number)" }, |
| { "-savefile", argString, &saveFile, sizeof(saveFile), "save the specified embedded file (file name)" }, |
| { "-saveall", argFlag, &saveAll, 0, "save all embedded files" }, |
| { "-o", argString, savePath, sizeof(savePath), "file name for the saved embedded file" }, |
| { "-enc", argString, textEncName, sizeof(textEncName), "output text encoding name" }, |
| { "-opw", argString, ownerPassword, sizeof(ownerPassword), "owner password (for encrypted files)" }, |
| { "-upw", argString, userPassword, sizeof(userPassword), "user password (for encrypted files)" }, |
| { "-v", argFlag, &printVersion, 0, "print copyright and version info" }, |
| { "-h", argFlag, &printHelp, 0, "print usage information" }, |
| { "-help", argFlag, &printHelp, 0, "print usage information" }, |
| { "--help", argFlag, &printHelp, 0, "print usage information" }, |
| { "-?", argFlag, &printHelp, 0, "print usage information" }, |
| {} }; |
| |
| int main(int argc, char *argv[]) |
| { |
| std::unique_ptr<PDFDoc> doc; |
| GooString *fileName; |
| const UnicodeMap *uMap; |
| std::optional<GooString> ownerPW, userPW; |
| char uBuf[8]; |
| bool ok; |
| bool hasSaveFile; |
| std::vector<std::unique_ptr<FileSpec>> embeddedFiles; |
| int nFiles, nPages, n, i, j; |
| Page *page; |
| Annots *annots; |
| const GooString *s1; |
| Unicode u; |
| bool isUnicode; |
| |
| Win32Console win32Console(&argc, &argv); |
| |
| // parse args |
| ok = parseArgs(argDesc, &argc, argv); |
| hasSaveFile = strlen(saveFile) > 0; |
| if ((doList ? 1 : 0) + ((saveNum != 0) ? 1 : 0) + ((hasSaveFile != 0) ? 1 : 0) + (saveAll ? 1 : 0) != 1) { |
| ok = false; |
| } |
| if (!ok || argc != 2 || printVersion || printHelp) { |
| fprintf(stderr, "pdfdetach version %s\n", PACKAGE_VERSION); |
| fprintf(stderr, "%s\n", popplerCopyright); |
| fprintf(stderr, "%s\n", xpdfCopyright); |
| if (!printVersion) { |
| printUsage("pdfdetach", "<PDF-file>", argDesc); |
| } |
| return 99; |
| } |
| fileName = new GooString(argv[1]); |
| |
| // read config file |
| globalParams = std::make_unique<GlobalParams>(); |
| if (textEncName[0]) { |
| globalParams->setTextEncoding(textEncName); |
| } |
| |
| // get mapping to output encoding |
| if (!(uMap = globalParams->getTextEncoding())) { |
| error(errConfig, -1, "Couldn't get text encoding"); |
| delete fileName; |
| return 99; |
| } |
| |
| // open PDF file |
| if (ownerPassword[0] != '\001') { |
| ownerPW = GooString(ownerPassword); |
| } |
| if (userPassword[0] != '\001') { |
| userPW = GooString(userPassword); |
| } |
| |
| doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW); |
| |
| if (!doc->isOk()) { |
| return 1; |
| } |
| |
| for (i = 0; i < doc->getCatalog()->numEmbeddedFiles(); ++i) { |
| embeddedFiles.push_back(doc->getCatalog()->embeddedFile(i)); |
| } |
| |
| nPages = doc->getCatalog()->getNumPages(); |
| for (i = 0; i < nPages; ++i) { |
| page = doc->getCatalog()->getPage(i + 1); |
| if (!page) { |
| continue; |
| } |
| annots = page->getAnnots(); |
| if (!annots) { |
| break; |
| } |
| |
| for (Annot *annot : annots->getAnnots()) { |
| if (annot->getType() != Annot::typeFileAttachment) { |
| continue; |
| } |
| embeddedFiles.push_back(std::make_unique<FileSpec>(static_cast<AnnotFileAttachment *>(annot)->getFile())); |
| } |
| } |
| |
| nFiles = embeddedFiles.size(); |
| |
| // list embedded files |
| if (doList) { |
| printf("%d embedded files\n", nFiles); |
| for (i = 0; i < nFiles; ++i) { |
| const std::unique_ptr<FileSpec> &fileSpec = embeddedFiles[i]; |
| printf("%d: ", i + 1); |
| s1 = fileSpec->getFileName(); |
| if (!s1) { |
| return 3; |
| } |
| if (hasUnicodeByteOrderMark(s1->toStr())) { |
| isUnicode = true; |
| j = 2; |
| } else { |
| isUnicode = false; |
| j = 0; |
| } |
| while (j < s1->getLength()) { |
| if (isUnicode) { |
| u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j + 1) & 0xff); |
| j += 2; |
| } else { |
| u = pdfDocEncoding[s1->getChar(j) & 0xff]; |
| ++j; |
| } |
| n = uMap->mapUnicode(u, uBuf, sizeof(uBuf)); |
| fwrite(uBuf, 1, n, stdout); |
| } |
| fputc('\n', stdout); |
| } |
| |
| // save all embedded files |
| } else if (saveAll) { |
| std::filesystem::path basePath = savePath; |
| if (basePath.empty()) { |
| basePath = std::filesystem::current_path(); |
| } |
| basePath = basePath.lexically_normal(); |
| |
| for (i = 0; i < nFiles; ++i) { |
| const std::unique_ptr<FileSpec> &fileSpec = embeddedFiles[i]; |
| std::string filename; |
| |
| s1 = fileSpec->getFileName(); |
| if (!s1) { |
| return 3; |
| } |
| if (hasUnicodeByteOrderMark(s1->toStr())) { |
| isUnicode = true; |
| j = 2; |
| } else { |
| isUnicode = false; |
| j = 0; |
| } |
| while (j < s1->getLength()) { |
| if (isUnicode) { |
| u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j + 1) & 0xff); |
| j += 2; |
| } else { |
| u = pdfDocEncoding[s1->getChar(j) & 0xff]; |
| ++j; |
| } |
| n = uMap->mapUnicode(u, uBuf, sizeof(uBuf)); |
| filename.append(uBuf, n); |
| } |
| |
| if (filename.empty()) { |
| return 3; |
| } |
| std::filesystem::path filePath = basePath; |
| filePath = filePath.append(filename).lexically_normal(); |
| |
| if (!filePath.generic_string().starts_with(basePath.generic_string())) { |
| error(errIO, -1, "Preventing directory traversal"); |
| return 3; |
| } |
| |
| auto *embFile = fileSpec->getEmbeddedFile(); |
| if (!embFile || !embFile->isOk()) { |
| return 3; |
| } |
| if (!embFile->save(filePath.generic_string())) { |
| error(errIO, -1, "Error saving embedded file as '{0:s}'", filePath.c_str()); |
| return 2; |
| } |
| } |
| |
| // save an embedded file |
| } else { |
| if (hasSaveFile) { |
| for (i = 0; i < nFiles; ++i) { |
| const std::unique_ptr<FileSpec> &fileSpec = embeddedFiles[i]; |
| s1 = fileSpec->getFileName(); |
| if (strcmp(s1->c_str(), saveFile) == 0) { |
| saveNum = i + 1; |
| break; |
| } |
| } |
| } |
| if (saveNum < 1 || saveNum > nFiles) { |
| error(errCommandLine, -1, hasSaveFile ? "Invalid file name" : "Invalid file number"); |
| return 99; |
| } |
| |
| const std::unique_ptr<FileSpec> &fileSpec = embeddedFiles[saveNum - 1]; |
| std::string targetPath = savePath; |
| if (targetPath.empty()) { |
| // The user hasn't given a path to save, just use the filename specified in the pdf as name |
| s1 = fileSpec->getFileName(); |
| if (!s1) { |
| return 3; |
| } |
| if (hasUnicodeByteOrderMark(s1->toStr())) { |
| isUnicode = true; |
| j = 2; |
| } else { |
| isUnicode = false; |
| j = 0; |
| } |
| while (j < s1->getLength()) { |
| if (isUnicode) { |
| u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j + 1) & 0xff); |
| j += 2; |
| } else { |
| u = pdfDocEncoding[s1->getChar(j) & 0xff]; |
| ++j; |
| } |
| n = uMap->mapUnicode(u, uBuf, sizeof(uBuf)); |
| targetPath.append(uBuf, n); |
| } |
| |
| const std::filesystem::path basePath = std::filesystem::current_path().lexically_normal(); |
| std::filesystem::path filePath = basePath; |
| filePath = filePath.append(targetPath).lexically_normal(); |
| |
| if (!filePath.generic_string().starts_with(basePath.generic_string())) { |
| error(errIO, -1, "Preventing directory traversal"); |
| return 3; |
| } |
| targetPath = filePath.generic_string(); |
| } |
| |
| auto *embFile = fileSpec->getEmbeddedFile(); |
| if (!embFile || !embFile->isOk()) { |
| return 3; |
| } |
| if (!embFile->save(targetPath)) { |
| error(errIO, -1, "Error saving embedded file as '{0:s}'", targetPath.c_str()); |
| return 2; |
| } |
| } |
| |
| return 0; |
| } |