DocumentTest   A
last analyzed

Complexity

Total Complexity 11

Size/Duplication

Total Lines 218
Duplicated Lines 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
wmc 11
eloc 124
c 2
b 0
f 0
dl 0
loc 218
rs 10

11 Methods

Rating   Name   Duplication   Size   Complexity  
A testGetObjectsByType() 0 13 1
A getDocumentInstance() 0 3 1
A getPagesInstance() 0 3 1
B testGetPages() 0 80 1
A testDictionary() 0 16 1
A testGetObjects() 0 15 1
A testSetObjects() 0 22 1
A getPageInstance() 0 3 1
A getPDFObjectInstance() 0 3 1
A testGetPagesMissingCatalog() 0 8 1
A testExtractXMPMetadataIssue721() 0 27 1
1
<?php
2
3
/**
4
 * @file This file is part of the PdfParser library.
5
 *
6
 * @author  Konrad Abicht <[email protected]>
7
 *
8
 * @date    2020-06-01
9
 *
10
 * @author  Sébastien MALOT <[email protected]>
11
 *
12
 * @date    2017-01-03
13
 *
14
 * @license LGPLv3
15
 *
16
 * @url     <https://github.com/smalot/pdfparser>
17
 *
18
 *  PdfParser is a pdf library written in PHP, extraction oriented.
19
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
20
 *
21
 *  This program is free software: you can redistribute it and/or modify
22
 *  it under the terms of the GNU Lesser General Public License as published by
23
 *  the Free Software Foundation, either version 3 of the License, or
24
 *  (at your option) any later version.
25
 *
26
 *  This program is distributed in the hope that it will be useful,
27
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
28
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
29
 *  GNU Lesser General Public License for more details.
30
 *
31
 *  You should have received a copy of the GNU Lesser General Public License
32
 *  along with this program.
33
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
34
 */
35
36
namespace PHPUnitTests\Integration;
37
38
use PHPUnitTests\TestCase;
39
use Smalot\PdfParser\Document;
40
use Smalot\PdfParser\Header;
41
use Smalot\PdfParser\Page;
42
use Smalot\PdfParser\Pages;
43
use Smalot\PdfParser\PDFObject;
44
45
/**
46
 * General Document related tests.
47
 */
48
class DocumentTest extends TestCase
49
{
50
    protected function getDocumentInstance(): Document
51
    {
52
        return new Document();
53
    }
54
55
    protected function getPDFObjectInstance(Document $document, ?Header $header = null): PDFObject
56
    {
57
        return new PDFObject($document, $header);
58
    }
59
60
    protected function getPageInstance(Document $document, Header $header): PDFObject
61
    {
62
        return new Page($document, $header);
63
    }
64
65
    protected function getPagesInstance(Document $document, Header $header): PDFObject
66
    {
67
        return new Pages($document, $header);
68
    }
69
70
    public function testSetObjects(): void
71
    {
72
        $document = $this->getDocumentInstance();
73
        $object = $this->getPDFObjectInstance($document);
74
75
        // Obj #1 is missing
76
        $this->assertNull($document->getObjectById(1));
77
        $document->setObjects([1 => $object]);
78
79
        // Obj #1 exists
80
        $this->assertTrue($document->getObjectById(1) instanceof PDFObject);
81
82
        $content = '<</Type/Page>>';
83
        $header = Header::parse($content, $document);
84
        $object = $this->getPDFObjectInstance($document, $header);
85
        $document->setObjects([2 => $object]);
86
87
        // Obj #1 is missing
88
        $this->assertNull($document->getObjectById(1));
89
90
        // Obj #2 exists
91
        $this->assertTrue($document->getObjectById(2) instanceof PDFObject);
92
    }
93
94
    public function testGetObjects(): void
95
    {
96
        $document = $this->getDocumentInstance();
97
        $object1 = $this->getPDFObjectInstance($document);
98
        $content = '<</Type/Page>>unparsed content';
99
        $header = Header::parse($content, $document);
100
101
        $object2 = $this->getPageInstance($document, $header);
102
        $document->setObjects([1 => $object1, 2 => $object2]);
103
104
        $objects = $document->getObjects();
105
        $this->assertEquals(2, \count($objects));
106
        $this->assertTrue($objects[1] instanceof PDFObject);
107
        $this->assertTrue($objects[2] instanceof PDFObject);
108
        $this->assertTrue($objects[2] instanceof Page);
109
    }
110
111
    public function testDictionary(): void
112
    {
113
        $document = $this->getDocumentInstance();
114
        $objects = $document->getDictionary();
115
        $this->assertEquals(0, \count($objects));
116
        $object1 = $this->getPDFObjectInstance($document);
117
118
        $content = '<</Type/Page>>';
119
        $header = Header::parse($content, $document);
120
        $object2 = $this->getPageInstance($document, $header);
121
        $document->setObjects([1 => $object1, 2 => $object2]);
122
123
        $objects = $document->getDictionary();
124
        $this->assertEquals(1, \count($objects));
125
        $this->assertEquals(1, \count($objects['Page']['all']));
126
        $this->assertEquals($object2, $objects['Page']['all'][2]);
127
    }
128
129
    public function testGetObjectsByType(): void
130
    {
131
        $document = $this->getDocumentInstance();
132
        $object1 = $this->getPDFObjectInstance($document);
133
        $content = '<</Type/Page>>';
134
        $header = Header::parse($content, $document);
135
        $object2 = $this->getPageInstance($document, $header);
136
        $document->setObjects([1 => $object1, 2 => $object2]);
137
138
        $objects = $document->getObjectsByType('Page');
139
        $this->assertEquals(1, \count($objects));
140
        $this->assertTrue($objects[2] instanceof PDFObject);
141
        $this->assertTrue($objects[2] instanceof Page);
142
    }
143
144
    public function testGetPages(): void
145
    {
146
        $document = $this->getDocumentInstance();
147
148
        // Listing pages from type Page
149
        $content = '<</Type/Page>>';
150
        $header = Header::parse($content, $document);
151
        $object1 = $this->getPageInstance($document, $header);
152
        $header = Header::parse($content, $document);
153
        $object2 = $this->getPageInstance($document, $header);
154
        $document->setObjects([1 => $object1, 2 => $object2]);
155
        $pages = $document->getPages();
156
157
        $this->assertEquals(2, \count($pages));
158
        $this->assertTrue($pages[0] instanceof Page);
159
        $this->assertTrue($pages[1] instanceof Page);
160
161
        // Listing pages from type Pages (kids)
162
        $content = '<</Type/Page>>';
163
        $header = Header::parse($content, $document);
164
        $object1 = $this->getPageInstance($document, $header);
165
        $header = Header::parse($content, $document);
166
        $object2 = $this->getPageInstance($document, $header);
167
        $header = Header::parse($content, $document);
168
        $object3 = $this->getPageInstance($document, $header);
169
170
        $content = '<</Type/Pages/Kids[1 0 R 2 0 R]>>';
171
        $header = Header::parse($content, $document);
172
        $object4 = $this->getPagesInstance($document, $header);
173
174
        $content = '<</Type/Pages/Kids[3 0 R]>>';
175
        $header = Header::parse($content, $document);
176
        $object5 = $this->getPagesInstance($document, $header);
177
178
        $document->setObjects([
179
            '1_0' => $object1,
180
            '2_0' => $object2,
181
            '3_0' => $object3,
182
            '4_0' => $object4,
183
            '5_0' => $object5,
184
        ]);
185
        $pages = $document->getPages();
186
187
        $this->assertEquals(3, \count($pages));
188
        $this->assertTrue($pages[0] instanceof Page);
189
        $this->assertTrue($pages[1] instanceof Page);
190
        $this->assertTrue($pages[2] instanceof Page);
191
192
        // Listing pages from type Catalog
193
        $content = '<</Type/Page>>';
194
        $header = Header::parse($content, $document);
195
        $object1 = $this->getPageInstance($document, $header);
196
        $header = Header::parse($content, $document);
197
        $object2 = $this->getPageInstance($document, $header);
198
        $header = Header::parse($content, $document);
199
        $object3 = $this->getPageInstance($document, $header);
200
        $content = '<</Type/Pages/Kids[1 0 R 2 0 R]>>';
201
        $header = Header::parse($content, $document);
202
        $object4 = $this->getPagesInstance($document, $header);
203
        $content = '<</Type/Pages/Kids[4 0 R 3 0 R]>>';
204
        $header = Header::parse($content, $document);
205
        $object5 = $this->getPagesInstance($document, $header);
206
        $content = '<</Type/Catalog/Pages 5 0 R >>';
207
        $header = Header::parse($content, $document);
208
        $object6 = $this->getPagesInstance($document, $header);
209
        $document->setObjects(
210
            [
211
                '1_0' => $object1,
212
                '2_0' => $object2,
213
                '3_0' => $object3,
214
                '4_0' => $object4,
215
                '5_0' => $object5,
216
                '6_0' => $object6,
217
            ]
218
        );
219
        $pages = $document->getPages();
220
        $this->assertEquals(3, \count($pages));
221
        $this->assertTrue($pages[0] instanceof Page);
222
        $this->assertTrue($pages[1] instanceof Page);
223
        $this->assertTrue($pages[2] instanceof Page);
224
    }
225
226
    public function testGetPagesMissingCatalog(): void
227
    {
228
        $this->expectException(\Exception::class);
229
        $this->expectExceptionMessage('Missing catalog.');
230
231
        // Missing catalog
232
        $document = $this->getDocumentInstance();
233
        $document->getPages();
234
    }
235
236
    /**
237
     * @see https://github.com/smalot/pdfparser/issues/721
238
     */
239
    public function testExtractXMPMetadataIssue721(): void
240
    {
241
        $document = $this->getDocumentInstance();
242
243
        // Check that XMP metadata is parsed even if missing a dc:format tag
244
        $content = '<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
245
<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="Adobe XMP Core 5.6-c015 84.159810, 2016/09/10-02:41:30">
246
   <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
247
      <rdf:Description>
248
         <dc:creator>
249
            <rdf:Seq>
250
               <rdf:li>PdfParser</rdf:li>
251
            </rdf:Seq>
252
         </dc:creator>
253
         <xmp:CreateDate>2018-02-07T11:51:44-05:00</xmp:CreateDate>
254
         <xmp:ModifyDate>2019-10-23T09:56:01-04:00</xmp:ModifyDate>
255
      </rdf:Description>
256
   </rdf:RDF>
257
</x:xmpmeta>';
258
259
        $document->extractXMPMetadata($content);
260
        $document->init();
261
        $details = $document->getDetails();
262
263
        $this->assertEquals(4, \count($details));
264
        $this->assertEquals('PdfParser', $details['dc:creator']);
265
        $this->assertEquals('2019-10-23T09:56:01-04:00', $details['xmp:modifydate']);
266
    }
267
}
268