Passed
Push — master ( 416ff0...c2c117 )
by Konrad
04:12 queued 02:09
created

Document::getFirstFont()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 8
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 2

Importance

Changes 0
Metric Value
cc 2
eloc 4
nc 2
nop 0
dl 0
loc 8
ccs 5
cts 5
cp 1
crap 2
rs 10
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser;
32
33
/**
34
 * Technical references :
35
 * - http://www.mactech.com/articles/mactech/Vol.15/15.09/PDFIntro/index.html
36
 * - http://framework.zend.com/issues/secure/attachment/12512/Pdf.php
37
 * - http://www.php.net/manual/en/ref.pdf.php#74211
38
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin1Encoding.pm
39
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin9Encoding.pm
40
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/StandardEncoding.pm
41
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/WinAnsiEncoding.pm
42
 *
43
 * Class Document
44
 */
45
class Document
46
{
47
    /**
48
     * @var PDFObject[]
49
     */
50
    protected $objects = [];
51
52
    /**
53
     * @var array
54
     */
55
    protected $dictionary = [];
56
57
    /**
58
     * @var Header
59
     */
60
    protected $trailer = null;
61
62
    /**
63
     * @var array
64
     */
65
    protected $details = null;
66
67 64
    public function __construct()
68
    {
69 64
        $this->trailer = new Header([], $this);
70 64
    }
71
72 41
    public function init()
73
    {
74 41
        $this->buildDictionary();
75
76 41
        $this->buildDetails();
77
78
        // Propagate init to objects.
79 41
        foreach ($this->objects as $object) {
80 41
            $object->getHeader()->init();
81 41
            $object->init();
82
        }
83 41
    }
84
85
    /**
86
     * Build dictionary based on type header field.
87
     */
88 41
    protected function buildDictionary()
89
    {
90
        // Build dictionary.
91 41
        $this->dictionary = [];
92
93 41
        foreach ($this->objects as $id => $object) {
94
            // Cache objects by type and subtype
95 41
            $type = $object->getHeader()->get('Type')->getContent();
96
97 41
            if (null != $type) {
98 41
                if (!isset($this->dictionary[$type])) {
99 41
                    $this->dictionary[$type] = [
100
                        'all' => [],
101
                        'subtype' => [],
102
                    ];
103
                }
104
105 41
                $this->dictionary[$type]['all'][$id] = $object;
106
107 41
                $subtype = $object->getHeader()->get('Subtype')->getContent();
108 41
                if (null != $subtype) {
109 34
                    if (!isset($this->dictionary[$type]['subtype'][$subtype])) {
110 34
                        $this->dictionary[$type]['subtype'][$subtype] = [];
111
                    }
112 34
                    $this->dictionary[$type]['subtype'][$subtype][$id] = $object;
113
                }
114
            }
115
        }
116 41
    }
117
118
    /**
119
     * Build details array.
120
     */
121 41
    protected function buildDetails()
122
    {
123
        // Build details array.
124 41
        $details = [];
125
126
        // Extract document info
127 41
        if ($this->trailer->has('Info')) {
128
            /** @var PDFObject $info */
129 33
            $info = $this->trailer->get('Info');
130
            // This could be an ElementMissing object, so we need to check for
131
            // the getHeader method first.
132 33
            if (null !== $info && method_exists($info, 'getHeader')) {
133 33
                $details = $info->getHeader()->getDetails();
134
            }
135
        }
136
137
        // Retrieve the page count
138
        try {
139 41
            $pages = $this->getPages();
140 40
            $details['Pages'] = \count($pages);
141 2
        } catch (\Exception $e) {
142 2
            $details['Pages'] = 0;
143
        }
144
145 41
        $this->details = $details;
146 41
    }
147
148 1
    public function getDictionary(): array
149
    {
150 1
        return $this->dictionary;
151
    }
152
153
    /**
154
     * @param PDFObject[] $objects
155
     */
156 41
    public function setObjects($objects = [])
157
    {
158 41
        $this->objects = (array) $objects;
159
160 41
        $this->init();
161 41
    }
162
163
    /**
164
     * @return PDFObject[]
165
     */
166 1
    public function getObjects()
167
    {
168 1
        return $this->objects;
169
    }
170
171
    /**
172
     * @return PDFObject|Font|Page|Element|null
173
     */
174 38
    public function getObjectById(string $id)
175
    {
176 38
        if (isset($this->objects[$id])) {
177 38
            return $this->objects[$id];
178
        }
179
180 3
        return null;
181
    }
182
183 42
    public function hasObjectsByType(string $type, ?string $subtype = null): bool
184
    {
185 42
        return 0 < \count($this->getObjectsByType($type, $subtype));
186
    }
187
188 45
    public function getObjectsByType(string $type, ?string $subtype = null): array
189
    {
190 45
        if (!isset($this->dictionary[$type])) {
191 12
            return [];
192
        }
193
194 40
        if (null != $subtype) {
0 ignored issues
show
Bug introduced by
It seems like you are loosely comparing $subtype of type null|string against null; this is ambiguous if the string can be empty. Consider using a strict comparison !== instead.
Loading history...
195
            if (!isset($this->dictionary[$type]['subtype'][$subtype])) {
196
                return [];
197
            }
198
199
            return $this->dictionary[$type]['subtype'][$subtype];
200
        }
201
202 40
        return $this->dictionary[$type]['all'];
203
    }
204
205
    /**
206
     * @return Font[]
207
     */
208 21
    public function getFonts()
209
    {
210 21
        return $this->getObjectsByType('Font');
211
    }
212
213 16
    public function getFirstFont(): ?Font
214
    {
215 16
        $fonts = $this->getFonts();
216 16
        if ([] === $fonts) {
217 3
            return null;
218
        }
219
220 13
        return reset($fonts);
221
    }
222
223
    /**
224
     * @return Page[]
225
     *
226
     * @throws \Exception
227
     */
228 42
    public function getPages()
229
    {
230 42
        if ($this->hasObjectsByType('Catalog')) {
231
            // Search for catalog to list pages.
232 34
            $catalogues = $this->getObjectsByType('Catalog');
233 34
            $catalogue = reset($catalogues);
234
235
            /** @var Pages $object */
236 34
            $object = $catalogue->get('Pages');
237 34
            if (method_exists($object, 'getPages')) {
238 34
                return $object->getPages(true);
239
            }
240
        }
241
242 9
        if ($this->hasObjectsByType('Pages')) {
243
            // Search for pages to list kids.
244 1
            $pages = [];
245
246
            /** @var Pages[] $objects */
247 1
            $objects = $this->getObjectsByType('Pages');
248 1
            foreach ($objects as $object) {
249 1
                $pages = array_merge($pages, $object->getPages(true));
250
            }
251
252 1
            return $pages;
253
        }
254
255 9
        if ($this->hasObjectsByType('Page')) {
256
            // Search for 'page' (unordered pages).
257 7
            $pages = $this->getObjectsByType('Page');
258
259 7
            return array_values($pages);
260
        }
261
262 3
        throw new \Exception('Missing catalog.');
263
    }
264
265 8
    public function getText(): string
266
    {
267 8
        $texts = [];
268 8
        $pages = $this->getPages();
269
270 8
        foreach ($pages as $index => $page) {
271
            /**
272
             * In some cases, the $page variable may be null.
273
             */
274 8
            if (null === $page) {
275
                continue;
276
            }
277 8
            if ($text = trim($page->getText())) {
278 8
                $texts[] = $text;
279
            }
280
        }
281
282 8
        return implode("\n\n", $texts);
283
    }
284
285
    public function getTrailer(): Header
286
    {
287
        return $this->trailer;
288
    }
289
290 33
    public function setTrailer(Header $trailer)
291
    {
292 33
        $this->trailer = $trailer;
293 33
    }
294
295 10
    public function getDetails(): array
296
    {
297 10
        return $this->details;
298
    }
299
}
300