Completed
Pull Request — master (#318)
by
unknown
10:20
created

ParserTest::setUp()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 2
c 1
b 0
f 0
dl 0
loc 5
rs 10
cc 1
nc 1
nop 0
1
<?php
2
3
/**
4
 * @file This file is part of the PdfParser library.
5
 *
6
 * @author  Konrad Abicht <[email protected]>
7
 * @date    2020-06-01
8
 *
9
 * @author  Sébastien MALOT <[email protected]>
10
 * @date    2017-01-03
11
 *
12
 * @license LGPLv3
13
 * @url     <https://github.com/smalot/pdfparser>
14
 *
15
 *  PdfParser is a pdf library written in PHP, extraction oriented.
16
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
17
 *
18
 *  This program is free software: you can redistribute it and/or modify
19
 *  it under the terms of the GNU Lesser General Public License as published by
20
 *  the Free Software Foundation, either version 3 of the License, or
21
 *  (at your option) any later version.
22
 *
23
 *  This program is distributed in the hope that it will be useful,
24
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
25
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26
 *  GNU Lesser General Public License for more details.
27
 *
28
 *  You should have received a copy of the GNU Lesser General Public License
29
 *  along with this program.
30
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
31
 */
32
33
namespace Tests\Smalot\PdfParser\Integration;
34
35
use Exception;
36
use Smalot\PdfParser\Document;
37
use Smalot\PdfParser\Parser;
38
use Smalot\PdfParser\XObject\Image;
39
use Test\Smalot\PdfParser\TestCase;
40
41
class ParserTest extends TestCase
42
{
43
    public function setUp()
44
    {
45
        parent::setUp();
46
47
        $this->fixture = new Parser();
48
    }
49
50
    public function testParseFile()
51
    {
52
        $directory = $this->rootDir.'/samples/bugs';
53
54
        if (is_dir($directory)) {
55
            $files = scandir($directory);
56
57
            foreach ($files as $file) {
58
                if (preg_match('/^.*\.pdf$/i', $file)) {
59
                    try {
60
                        $document = $this->fixture->parseFile($directory.'/'.$file);
61
                        $pages = $document->getPages();
62
                        $this->assertTrue(0 < \count($pages));
63
64
                        foreach ($pages as $page) {
65
                            $content = $page->getText();
66
                            $this->assertTrue(0 < \strlen($content));
67
                        }
68
                    } catch (Exception $e) {
69
                        if (
70
                            'Secured pdf file are currently not supported.' !== $e->getMessage()
71
                            && 0 != strpos($e->getMessage(), 'TCPDF_PARSER')
72
                        ) {
73
                            throw $e;
74
                        }
75
                    }
76
                }
77
            }
78
        }
79
    }
80
81
    /**
82
     * Parsing certain PDFs may lead to following notices:
83
     *
84
     *      Notice: Trying to access array offset on value of type int
85
     *
86
     * and to an exception:
87
     *
88
     *      Missing catalog.
89
     *
90
     * @see https://github.com/smalot/pdfparser/issues/267
91
     */
92
    public function testIssue267()
93
    {
94
        $this->expectException(Exception::class);
95
        $this->expectExceptionMessage('Missing catalog.');
96
97
        $filename = $this->rootDir.'/samples/bugs/Issue267_array_access_on_int.pdf';
98
99
        $document = $this->fixture->parseFile($filename);
100
101
        // triggers the exception
102
        $document->getPages();
103
    }
104
105
    public function docProvider()
106
    {
107
        return [
108
            'adobe-compressed-pdf16.pdf' => ['adobe-compressed-pdf16.pdf'],
109
            'adobe-converted-pdf16.pdf' => ['adobe-converted-pdf16.pdf'],
110
            'google-docs-export-pdf15.pdf' => ['google-docs-export-pdf15.pdf'],
111
        ];
112
    }
113
114
    /**
115
     * @dataProvider docProvider
116
     */
117
    public function testParserForDifferentSource($testDoc)
118
    {
119
        $filename = $this->rootDir."/samples/$testDoc";
120
121
        /** @var Document $document */
122
        $document = $this->fixture->parseFile($filename);
123
124
        $this->assertStringContainsString('Test document', $document->getText());
125
        $this->assertStringContainsString('Test mono', $document->getText());
126
127
        $i = 0;
128
        foreach ($document->getObjects() as $object) {
129
            if (Image::class === \get_class($object)) {
130
                ++$i;
131
            }
132
        }
133
        $this->assertEquals(1, $i, 'Asserting has exactly one image');
134
    }
135
136
    public function testIssue201()
137
    {
138
        $filename = $this->rootDir.'/samples/bugs/issue201.pdf';
139
140
        /** @var Document $document */
141
        $document = $this->fixture->parseFile($filename);
142
143
        $this->assertStringContainsString('The pdf995 suite of products', $document->getText());
144
    }
145
}
146