| //======================================================================== |
| // |
| // Parser.cc |
| // |
| // Copyright 1996-2003 Glyph & Cog, LLC |
| // |
| //======================================================================== |
| |
| //======================================================================== |
| // |
| // Modified under the Poppler project - http://poppler.freedesktop.org |
| // |
| // All changes made under the Poppler project to this file are licensed |
| // under GPL version 2 or later |
| // |
| // Copyright (C) 2006, 2009, 201, 2010 Albert Astals Cid <aacid@kde.org> |
| // Copyright (C) 2006 Krzysztof Kowalczyk <kkowalczyk@gmail.com> |
| // Copyright (C) 2009 Ilya Gorenbein <igorenbein@finjan.com> |
| // |
| // To see a description of the changes please see the Changelog file that |
| // came with your tarball or type make ChangeLog if you are building from git |
| // |
| //======================================================================== |
| |
| #include <config.h> |
| |
| #ifdef USE_GCC_PRAGMAS |
| #pragma implementation |
| #endif |
| |
| #include <stddef.h> |
| #include "Object.h" |
| #include "Array.h" |
| #include "Dict.h" |
| #include "Decrypt.h" |
| #include "Parser.h" |
| #include "XRef.h" |
| #include "Error.h" |
| |
| Parser::Parser(XRef *xrefA, Lexer *lexerA, GBool allowStreamsA) { |
| xref = xrefA; |
| lexer = lexerA; |
| inlineImg = 0; |
| allowStreams = allowStreamsA; |
| lexer->getObj(&buf1); |
| lexer->getObj(&buf2); |
| } |
| |
| Parser::~Parser() { |
| buf1.free(); |
| buf2.free(); |
| delete lexer; |
| } |
| |
| Object *Parser::getObj(Object *obj, Guchar *fileKey, |
| CryptAlgorithm encAlgorithm, int keyLength, |
| int objNum, int objGen) { |
| std::set<int> fetchOriginatorNums; |
| return getObj(obj, fileKey, encAlgorithm, keyLength, objNum, objGen, &fetchOriginatorNums); |
| } |
| |
| Object *Parser::getObj(Object *obj, std::set<int> *fetchOriginatorNums) |
| { |
| return getObj(obj, NULL, cryptRC4, 0, 0, 0, fetchOriginatorNums); |
| } |
| |
| Object *Parser::getObj(Object *obj, Guchar *fileKey, |
| CryptAlgorithm encAlgorithm, int keyLength, |
| int objNum, int objGen, std::set<int> *fetchOriginatorNums) { |
| char *key; |
| Stream *str; |
| Object obj2; |
| int num; |
| DecryptStream *decrypt; |
| GooString *s, *s2; |
| int c; |
| |
| // refill buffer after inline image data |
| if (inlineImg == 2) { |
| buf1.free(); |
| buf2.free(); |
| lexer->getObj(&buf1); |
| lexer->getObj(&buf2); |
| inlineImg = 0; |
| } |
| |
| // array |
| if (buf1.isCmd("[")) { |
| shift(); |
| obj->initArray(xref); |
| while (!buf1.isCmd("]") && !buf1.isEOF()) |
| obj->arrayAdd(getObj(&obj2, fileKey, encAlgorithm, keyLength, |
| objNum, objGen, fetchOriginatorNums)); |
| if (buf1.isEOF()) |
| error(getPos(), "End of file inside array"); |
| shift(); |
| |
| // dictionary or stream |
| } else if (buf1.isCmd("<<")) { |
| shift(objNum); |
| obj->initDict(xref); |
| while (!buf1.isCmd(">>") && !buf1.isEOF()) { |
| if (!buf1.isName()) { |
| error(getPos(), "Dictionary key must be a name object"); |
| shift(); |
| } else { |
| // buf1 might go away in shift(), so construct the key |
| key = copyString(buf1.getName()); |
| shift(); |
| if (buf1.isEOF() || buf1.isError()) { |
| gfree(key); |
| break; |
| } |
| obj->dictAdd(key, getObj(&obj2, fileKey, encAlgorithm, keyLength, objNum, objGen, fetchOriginatorNums)); |
| } |
| } |
| if (buf1.isEOF()) |
| error(getPos(), "End of file inside dictionary"); |
| // stream objects are not allowed inside content streams or |
| // object streams |
| if (allowStreams && buf2.isCmd("stream")) { |
| if ((str = makeStream(obj, fileKey, encAlgorithm, keyLength, |
| objNum, objGen, fetchOriginatorNums))) { |
| obj->initStream(str); |
| } else { |
| obj->free(); |
| obj->initError(); |
| } |
| } else { |
| shift(); |
| } |
| |
| // indirect reference or integer |
| } else if (buf1.isInt()) { |
| num = buf1.getInt(); |
| shift(); |
| if (buf1.isInt() && buf2.isCmd("R")) { |
| obj->initRef(num, buf1.getInt()); |
| shift(); |
| shift(); |
| } else { |
| obj->initInt(num); |
| } |
| |
| // string |
| } else if (buf1.isString() && fileKey) { |
| s = buf1.getString(); |
| s2 = new GooString(); |
| obj2.initNull(); |
| decrypt = new DecryptStream(new MemStream(s->getCString(), 0, |
| s->getLength(), &obj2), |
| fileKey, encAlgorithm, keyLength, |
| objNum, objGen); |
| decrypt->reset(); |
| while ((c = decrypt->getChar()) != EOF) { |
| s2->append((char)c); |
| } |
| delete decrypt; |
| obj->initString(s2); |
| shift(); |
| |
| // simple object |
| } else { |
| // avoid re-allocating memory for complex objects like strings by |
| // shallow copy of <buf1> to <obj> and nulling <buf1> so that |
| // subsequent buf1.free() won't free this memory |
| buf1.shallowCopy(obj); |
| buf1.initNull(); |
| shift(); |
| } |
| |
| return obj; |
| } |
| |
| Stream *Parser::makeStream(Object *dict, Guchar *fileKey, |
| CryptAlgorithm encAlgorithm, int keyLength, |
| int objNum, int objGen, std::set<int> *fetchOriginatorNums) { |
| Object obj; |
| BaseStream *baseStr; |
| Stream *str; |
| Guint pos, endPos, length; |
| |
| // get stream start position |
| lexer->skipToNextLine(); |
| pos = lexer->getPos(); |
| |
| // get length |
| dict->dictLookup("Length", &obj, fetchOriginatorNums); |
| if (obj.isInt()) { |
| length = (Guint)obj.getInt(); |
| obj.free(); |
| } else { |
| error(getPos(), "Bad 'Length' attribute in stream"); |
| obj.free(); |
| length = 0; |
| } |
| |
| // check for length in damaged file |
| if (xref && xref->getStreamEnd(pos, &endPos)) { |
| length = endPos - pos; |
| } |
| |
| // in badly damaged PDF files, we can run off the end of the input |
| // stream immediately after the "stream" token |
| if (!lexer->getStream()) { |
| return NULL; |
| } |
| baseStr = lexer->getStream()->getBaseStream(); |
| |
| // skip over stream data |
| if (Lexer::LOOK_VALUE_NOT_CACHED != lexer->lookCharLastValueCached) { |
| // take into account the fact that we've cached one value |
| pos = pos - 1; |
| lexer->lookCharLastValueCached = Lexer::LOOK_VALUE_NOT_CACHED; |
| } |
| lexer->setPos(pos + length); |
| |
| // refill token buffers and check for 'endstream' |
| shift(); // kill '>>' |
| shift(); // kill 'stream' |
| if (buf1.isCmd("endstream")) { |
| shift(); |
| } else { |
| error(getPos(), "Missing 'endstream'"); |
| if (xref) { |
| // shift until we find the proper endstream or we change to another object or reach eof |
| while (!buf1.isCmd("endstream") && xref->getNumEntry(lexer->getPos()) == objNum && !buf1.isEOF()) { |
| shift(); |
| } |
| length = lexer->getPos() - pos; |
| if (buf1.isCmd("endstream")) { |
| obj.initInt(length); |
| dict->dictSet("Length", &obj); |
| obj.free(); |
| } |
| } else { |
| // When building the xref we can't use it so use this |
| // kludge for broken PDF files: just add 5k to the length, and |
| // hope its enough |
| length += 5000; |
| } |
| } |
| |
| // make base stream |
| str = baseStr->makeSubStream(pos, gTrue, length, dict); |
| |
| // handle decryption |
| if (fileKey) { |
| str = new DecryptStream(str, fileKey, encAlgorithm, keyLength, |
| objNum, objGen); |
| } |
| |
| // get filters |
| str = str->addFilters(dict); |
| |
| return str; |
| } |
| |
| void Parser::shift(int objNum) { |
| if (inlineImg > 0) { |
| if (inlineImg < 2) { |
| ++inlineImg; |
| } else { |
| // in a damaged content stream, if 'ID' shows up in the middle |
| // of a dictionary, we need to reset |
| inlineImg = 0; |
| } |
| } else if (buf2.isCmd("ID")) { |
| lexer->skipChar(); // skip char after 'ID' command |
| inlineImg = 1; |
| } |
| buf1.free(); |
| buf2.shallowCopy(&buf1); |
| if (inlineImg > 0) // don't buffer inline image data |
| buf2.initNull(); |
| else |
| lexer->getObj(&buf2, objNum); |
| } |