Passed
Pull Request — master (#435)
by
unknown
02:52
created

Document::getObjects()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 1
dl 0
loc 3
rs 10
c 1
b 0
f 0
ccs 2
cts 2
cp 1
cc 1
nc 1
nop 0
crap 1
1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser;
32
33
/**
34
 * Technical references :
35
 * - http://www.mactech.com/articles/mactech/Vol.15/15.09/PDFIntro/index.html
36
 * - http://framework.zend.com/issues/secure/attachment/12512/Pdf.php
37
 * - http://www.php.net/manual/en/ref.pdf.php#74211
38
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin1Encoding.pm
39
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin9Encoding.pm
40
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/StandardEncoding.pm
41
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/WinAnsiEncoding.pm
42
 *
43
 * Class Document
44
 */
45
class Document
46
{
47
    /**
48
     * @var PDFObject[]
49
     */
50
    protected $objects = [];
51
52
    /**
53
     * @var array
54
     */
55
    protected $dictionary = [];
56
57
    /**
58
     * @var Header
59
     */
60
    protected $trailer = null;
61
62
    /**
63
     * @var array
64
     */
65
    protected $details = null;
66
67 55
    public function __construct()
68
    {
69 55
        $this->trailer = new Header([], $this);
70 55
    }
71
72 36
    public function init()
73
    {
74 36
        $this->buildDictionary();
75
76 36
        $this->buildDetails();
77
78
        // Propagate init to objects.
79 36
        foreach ($this->objects as $object) {
80 36
            $object->getHeader()->init();
81 36
            $object->init();
82
        }
83 36
    }
84
85
    /**
86
     * Build dictionary based on type header field.
87
     */
88 36
    protected function buildDictionary()
89
    {
90
        // Build dictionary.
91 36
        $this->dictionary = [];
92
93 36
        foreach ($this->objects as $id => $object) {
94
            // Cache objects by type and subtype
95 36
            $type = $object->getHeader()->get('Type')->getContent();
96
97 36
            if (null != $type) {
98 36
                if (!isset($this->dictionary[$type])) {
99 36
                    $this->dictionary[$type] = [
100
                        'all' => [],
101
                        'subtype' => [],
102
                    ];
103
                }
104
105 36
                $this->dictionary[$type]['all'][$id] = $object;
106
107 36
                $subtype = $object->getHeader()->get('Subtype')->getContent();
108 36
                if (null != $subtype) {
109 29
                    if (!isset($this->dictionary[$type]['subtype'][$subtype])) {
110 29
                        $this->dictionary[$type]['subtype'][$subtype] = [];
111
                    }
112 29
                    $this->dictionary[$type]['subtype'][$subtype][$id] = $object;
113
                }
114
            }
115
        }
116 36
    }
117
118
    /**
119
     * Build details array.
120
     */
121 36
    protected function buildDetails()
122
    {
123
        // Build details array.
124 36
        $details = [];
125
126
        // Extract document info
127 36
        if ($this->trailer->has('Info')) {
128
            /** @var PDFObject $info */
129 28
            $info = $this->trailer->get('Info');
130
            // This could be an ElementMissing object, so we need to check for
131
            // the getHeader method first.
132 28
            if (null !== $info && method_exists($info, 'getHeader')) {
133 28
                $details = $info->getHeader()->getDetails();
134
            }
135
        }
136
137
        // Retrieve the page count
138
        try {
139 36
            $pages = $this->getPages();
140 35
            $details['Pages'] = \count($pages);
141 2
        } catch (\Exception $e) {
142 2
            $details['Pages'] = 0;
143
        }
144
145 36
        $this->details = $details;
146 36
    }
147
148 1
    public function getDictionary(): array
149
    {
150 1
        return $this->dictionary;
151
    }
152
153
    /**
154
     * @param PDFObject[] $objects
155
     */
156 36
    public function setObjects($objects = [])
157
    {
158 36
        $this->objects = (array) $objects;
159
160 36
        $this->init();
161 36
    }
162
163
    /**
164
     * @return PDFObject[]
165
     */
166 1
    public function getObjects()
167
    {
168 1
        return $this->objects;
169
    }
170
171
    /**
172
     * @return PDFObject|Font|Page|Element|null
173
     */
174 33
    public function getObjectById(string $id)
175
    {
176 33
        if (isset($this->objects[$id])) {
177 33
            return $this->objects[$id];
178
        }
179
180 3
        return null;
181
    }
182
183 37
    public function hasObjectsByType(string $type, ?string $subtype = null): bool
184
    {
185 37
        return 0 < \count($this->getObjectsByType($type, $subtype));
186
    }
187
188 37
    public function getObjectsByType(string $type, ?string $subtype = null): array
189
    {
190 37
        if (!isset($this->dictionary[$type])) {
191 9
            return [];
192
        }
193
194 35
        if (null != $subtype) {
0 ignored issues
show
Bug introduced by
It seems like you are loosely comparing $subtype of type null|string against null; this is ambiguous if the string can be empty. Consider using a strict comparison !== instead.
Loading history...
195
            if (!isset($this->dictionary[$type]['subtype'][$subtype])) {
196
                return [];
197
            }
198
199
            return $this->dictionary[$type]['subtype'][$subtype];
200
        }
201
202 35
        return $this->dictionary[$type]['all'];
203
    }
204
205
    /**
206
     * @return Font[]
207
     */
208 18
    public function getFonts()
209
    {
210 18
        return $this->getObjectsByType('Font');
211
    }
212
213 13
    public function getFirstFont(): ?Font
214
    {
215 13
        $fonts = $this->getFonts();
216
217 13
        return reset($fonts);
218
    }
219
220
    /**
221
     * @return Page[]
222
     *
223
     * @throws \Exception
224
     */
225 37
    public function getPages()
226
    {
227 37
        if ($this->hasObjectsByType('Catalog')) {
228
            // Search for catalog to list pages.
229 29
            $catalogues = $this->getObjectsByType('Catalog');
230 29
            $catalogue = reset($catalogues);
231
232
            /** @var Pages $object */
233 29
            $object = $catalogue->get('Pages');
234 29
            if (method_exists($object, 'getPages')) {
235 29
                return $object->getPages(true);
236
            }
237
        }
238
239 9
        if ($this->hasObjectsByType('Pages')) {
240
            // Search for pages to list kids.
241 1
            $pages = [];
242
243
            /** @var Pages[] $objects */
244 1
            $objects = $this->getObjectsByType('Pages');
245 1
            foreach ($objects as $object) {
246 1
                $pages = array_merge($pages, $object->getPages(true));
247
            }
248
249 1
            return $pages;
250
        }
251
252 9
        if ($this->hasObjectsByType('Page')) {
253
            // Search for 'page' (unordered pages).
254 7
            $pages = $this->getObjectsByType('Page');
255
256 7
            return array_values($pages);
257
        }
258
259 3
        throw new \Exception('Missing catalog.');
260
    }
261
262 8
    public function getText(): string
263
    {
264 8
        $texts = [];
265 8
        $pages = $this->getPages();
266
267 8
        foreach ($pages as $index => $page) {
268
            /**
269
             * In some cases, the $page variable may be null.
270
             */
271 8
            if (null === $page) {
272
                continue;
273
            }
274 8
            if ($text = trim($page->getText())) {
275 8
                $texts[] = $text;
276
            }
277
        }
278
279 8
        return implode("\n\n", $texts);
280
    }
281
282
    public function getTrailer(): Header
283
    {
284
        return $this->trailer;
285
    }
286
287 28
    public function setTrailer(Header $trailer)
288
    {
289 28
        $this->trailer = $trailer;
290 28
    }
291
292
    public function getDetails(): array
293
    {
294
        return $this->details;
295
    }
296
}
297