blob: a11c6307c784ab8fa1a66b9bba1ee4edac6a84a7 [file] [log] [blame]
#include <QtTest/QTest>
#include <poppler-qt5.h>
#include <QtCore/QFile>
class TestActualText : public QObject
{
Q_OBJECT
public:
explicit TestActualText(QObject *parent = nullptr) : QObject(parent) { }
private slots:
void checkActualText1();
void checkActualText2();
void checkActualText2_data();
void checkAllOrientations();
void checkAllOrientations_data();
void checkFakeboldText();
void checkFakeboldText_data();
private:
void checkActualText(Poppler::Document *doc, const QRectF &area, const QString &text);
};
void TestActualText::checkActualText(Poppler::Document *doc, const QRectF &area, const QString &text)
{
Poppler::Page *page = doc->page(0);
QVERIFY(page);
QCOMPARE(page->text(area), text);
delete page;
}
void TestActualText::checkActualText1()
{
Poppler::Document *doc;
doc = Poppler::Document::load(TESTDATADIR "/unittestcases/WithActualText.pdf");
QVERIFY(doc);
checkActualText(doc, QRectF {}, QStringLiteral("The slow brown fox jumps over the black dog."));
delete doc;
}
void TestActualText::checkActualText2()
{
QFETCH(QRectF, area);
QFETCH(QString, text);
QFile file(TESTDATADIR "/unittestcases/WithActualText.pdf");
QVERIFY(file.open(QIODevice::ReadOnly));
Poppler::Document *doc;
doc = Poppler::Document::load(&file);
QVERIFY(doc);
checkActualText(doc, area, text);
delete doc;
}
void TestActualText::checkActualText2_data()
{
QTest::addColumn<QRectF>("area");
QTest::addColumn<QString>("text");
// Line bounding box is [100.000 90.720 331.012110 102.350]
QTest::newRow("full page") << QRectF {} << QStringLiteral("The slow brown fox jumps over the black dog.");
QTest::newRow("full line") << QRectF { 50.0, 90.0, 290.0, 20.0 } << QStringLiteral("The slow brown fox jumps over the black dog.");
QTest::newRow("full line [narrow]") << QRectF { 50.0, 95.0, 290.0, 5.0 } << QStringLiteral("The slow brown fox jumps over the black dog.");
QTest::newRow("above line") << QRectF { 50.0, 85.0, 290.0, 10.0 } << QString {};
QTest::newRow("above line mid") << QRectF { 50.0, 90.0, 290.0, 5.0 } << QString {};
QTest::newRow("first two words") << QRectF { 50.0, 90.0, 100.0, 20.0 } << QStringLiteral("The slow");
QTest::newRow("first two words [narrow]") << QRectF { 50.0, 95.0, 100.0, 5.0 } << QStringLiteral("The slow");
QTest::newRow("first character") << QRectF { 103.0, 95.0, 1.0, 5.0 } << QStringLiteral("T");
QTest::newRow("last two words") << QRectF { 285.0, 90.0, 100.0, 20.0 } << QStringLiteral("black dog.");
QTest::newRow("last character") << QRectF { 320.0, 90.0, 8.0, 20.0 } << QStringLiteral("g");
QTest::newRow("middle 'fox'") << QRectF { 190.0, 90.0, 15.0, 20.0 } << QStringLiteral("fox");
QTest::newRow("middle 'x'") << QRectF { 200.0, 90.0, 5.0, 20.0 } << QStringLiteral("x");
}
void TestActualText::checkAllOrientations()
{
QFETCH(int, pageNr);
QFETCH(QRectF, area);
QFETCH(QString, text);
QString path { TESTDATADIR "/unittestcases/orientation.pdf" };
std::unique_ptr<Poppler::Document> doc { Poppler::Document::load(path) };
QVERIFY(doc);
std::unique_ptr<Poppler::Page> page { doc->page(pageNr) };
QVERIFY(page);
QCOMPARE(page->text(area), text);
}
void TestActualText::checkAllOrientations_data()
{
QTest::addColumn<int>("pageNr");
QTest::addColumn<QRectF>("area");
QTest::addColumn<QString>("text");
QTest::newRow("Portrait") << 0 << QRectF {} << QStringLiteral("Portrait");
QTest::newRow("Landscape") << 1 << QRectF {} << QStringLiteral("Landscape");
QTest::newRow("Upside down") << 2 << QRectF {} << QStringLiteral("Upside down");
QTest::newRow("Seacape") << 3 << QRectF {} << QStringLiteral("Seascape");
QTest::newRow("Portrait A4 rect") << 0 << QRectF { 0, 0, 595, 842 } << QStringLiteral("Portrait");
QTest::newRow("Landscape A4 rect") << 1 << QRectF { 0, 0, 842, 595 } << QStringLiteral("Landscape");
QTest::newRow("Upside down A4 rect") << 2 << QRectF { 0, 0, 595, 842 } << QStringLiteral("Upside down");
QTest::newRow("Seacape A4 rect") << 3 << QRectF { 0, 0, 842, 595 } << QStringLiteral("Seascape");
QTest::newRow("Portrait line rect") << 0 << QRectF { 30, 30, 60, 20 } << QStringLiteral("Portrait");
QTest::newRow("Landscape line rect") << 1 << QRectF { 790, 30, 20, 80 } << QStringLiteral("Landscape");
QTest::newRow("Upside down line rect") << 2 << QRectF { 485, 790, 75, 20 } << QStringLiteral("Upside down");
QTest::newRow("Seacape line rect") << 3 << QRectF { 30, 500, 20, 70 } << QStringLiteral("Seascape");
QTest::newRow("Portrait small rect B") << 0 << QRectF { 30, 35, 10, 10 } << QStringLiteral("P");
QTest::newRow("Portrait small rect E") << 0 << QRectF { 80, 35, 10, 10 } << QStringLiteral("t");
QTest::newRow("Landscape small rect B") << 1 << QRectF { 800, 30, 10, 10 } << QStringLiteral("L");
QTest::newRow("Landscape small rect E") << 1 << QRectF { 800, 90, 10, 10 } << QStringLiteral("e");
QTest::newRow("Upside down small rect B") << 2 << QRectF { 550, 800, 10, 10 } << QStringLiteral("U");
QTest::newRow("Upside down small rect E") << 2 << QRectF { 485, 800, 10, 10 } << QStringLiteral("n");
QTest::newRow("Seacape small rect B") << 3 << QRectF { 40, 550, 10, 10 } << QStringLiteral("S");
QTest::newRow("Seacape small rect E") << 3 << QRectF { 40, 510, 10, 10 } << QStringLiteral("p");
}
void TestActualText::checkFakeboldText()
{
QFETCH(int, pageNr);
QFETCH(QRectF, area);
QFETCH(QString, text);
QString path { TESTDATADIR "/unittestcases/fakebold.pdf" };
std::unique_ptr<Poppler::Document> doc { Poppler::Document::load(path) };
QVERIFY(doc);
std::unique_ptr<Poppler::Page> page { doc->page(pageNr) };
QVERIFY(page);
QEXPECT_FAIL("Upright line 3", "Fakebold not matched when bold word is followed with non-bold glyph", Continue);
QEXPECT_FAIL("Upright line 4", "Fakebold not matched when bold word follows non-bold glyph", Continue);
QEXPECT_FAIL("Upright line 5", "Fakebold not matched when bold word is enclosed by non-bold glyphs", Continue);
QEXPECT_FAIL("Rotated 90' line 3", "Fakebold not matched when bold word is followed with non-bold glyph", Continue);
QEXPECT_FAIL("Rotated 90' line 4", "Fakebold not matched when bold word follows non-bold glyph", Continue);
QEXPECT_FAIL("Rotated 90' line 5", "Fakebold not matched when bold word is enclosed by non-bold glyphs", Continue);
QEXPECT_FAIL("Rotated 180' line 3", "Fakebold not matched when bold word is followed with non-bold glyph", Continue);
QEXPECT_FAIL("Rotated 180' line 4", "Fakebold not matched when bold word follows non-bold glyph", Continue);
QEXPECT_FAIL("Rotated 180' line 5", "Fakebold not matched when bold word is enclosed by non-bold glyphs", Continue);
QEXPECT_FAIL("Rotated 270' line 3", "Fakebold not matched when bold word is followed with non-bold glyph", Continue);
QEXPECT_FAIL("Rotated 270' line 4", "Fakebold not matched when bold word follows non-bold glyph", Continue);
QEXPECT_FAIL("Rotated 270' line 5", "Fakebold not matched when bold word is enclosed by non-bold glyphs", Continue);
QCOMPARE(page->text(area), text);
}
void TestActualText::checkFakeboldText_data()
{
QTest::addColumn<int>("pageNr");
QTest::addColumn<QRectF>("area");
QTest::addColumn<QString>("text");
QTest::newRow("Upright line 1") << 0 << QRectF { 0, 0, 595, 80 } << QStringLiteral("1 This is fakebold text.");
QTest::newRow("Upright line 2") << 0 << QRectF { 0, 80, 595, 80 } << QStringLiteral("2 This is a fakebold word.");
QTest::newRow("Upright line 3") << 0 << QRectF { 0, 140, 595, 80 } << QStringLiteral("3 The last word is in fakebold.");
QTest::newRow("Upright line 4") << 0 << QRectF { 0, 220, 595, 80 } << QStringLiteral("4 Hyphenated-fakebold word.");
QTest::newRow("Upright line 5") << 0 << QRectF { 0, 300, 595, 80 } << QStringLiteral("5 Quoted \"fakebold\" word.");
QTest::newRow("Rotated 90' line 1") << 1 << QRectF { 510, 0, 80, 842 } << QStringLiteral("1 This is fakebold text.");
QTest::newRow("Rotated 90' line 2") << 1 << QRectF { 430, 0, 80, 842 } << QStringLiteral("2 This is a fakebold word.");
QTest::newRow("Rotated 90' line 3") << 1 << QRectF { 350, 0, 80, 842 } << QStringLiteral("3 The last word is in fakebold.");
QTest::newRow("Rotated 90' line 4") << 1 << QRectF { 270, 0, 80, 842 } << QStringLiteral("4 Hyphenated-fakebold word.");
QTest::newRow("Rotated 90' line 5") << 1 << QRectF { 190, 0, 80, 842 } << QStringLiteral("5 Quoted \"fakebold\" word.");
QTest::newRow("Rotated 180' line 1") << 2 << QRectF { 0, 760, 595, 80 } << QStringLiteral("1 This is fakebold text.");
QTest::newRow("Rotated 180' line 2") << 2 << QRectF { 0, 680, 595, 80 } << QStringLiteral("2 This is a fakebold word.");
QTest::newRow("Rotated 180' line 3") << 2 << QRectF { 0, 600, 595, 80 } << QStringLiteral("3 The last word is in fakebold.");
QTest::newRow("Rotated 180' line 4") << 2 << QRectF { 0, 520, 595, 80 } << QStringLiteral("4 Hyphenated-fakebold word.");
QTest::newRow("Rotated 180' line 5") << 2 << QRectF { 0, 440, 595, 80 } << QStringLiteral("5 Quoted \"fakebold\" word.");
QTest::newRow("Rotated 270' line 1") << 3 << QRectF { 20, 0, 80, 842 } << QStringLiteral("1 This is fakebold text.");
QTest::newRow("Rotated 270' line 2") << 3 << QRectF { 100, 0, 80, 842 } << QStringLiteral("2 This is a fakebold word.");
QTest::newRow("Rotated 270' line 3") << 3 << QRectF { 160, 0, 80, 842 } << QStringLiteral("3 The last word is in fakebold.");
QTest::newRow("Rotated 270' line 4") << 3 << QRectF { 240, 0, 80, 842 } << QStringLiteral("4 Hyphenated-fakebold word.");
QTest::newRow("Rotated 270' line 5") << 3 << QRectF { 320, 0, 80, 842 } << QStringLiteral("5 Quoted \"fakebold\" word.");
}
QTEST_GUILESS_MAIN(TestActualText)
#include "check_actualtext.moc"