Passed
Pull Request — master (#435)
by
unknown
02:10
created

Document::getFonts()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 1
c 1
b 0
f 0
nc 1
nop 0
dl 0
loc 3
ccs 2
cts 2
cp 1
crap 1
rs 10
1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser;
32
33
/**
34
 * Technical references :
35
 * - http://www.mactech.com/articles/mactech/Vol.15/15.09/PDFIntro/index.html
36
 * - http://framework.zend.com/issues/secure/attachment/12512/Pdf.php
37
 * - http://www.php.net/manual/en/ref.pdf.php#74211
38
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin1Encoding.pm
39
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin9Encoding.pm
40
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/StandardEncoding.pm
41
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/WinAnsiEncoding.pm
42
 *
43
 * Class Document
44
 */
45
class Document
46
{
47
    /**
48
     * @var PDFObject[]
49
     */
50
    protected $objects = [];
51
52
    /**
53
     * @var array
54
     */
55
    protected $dictionary = [];
56
57
    /**
58
     * @var Header
59
     */
60
    protected $trailer = null;
61
62
    /**
63
     * @var array
64
     */
65
    protected $details = null;
66
67 56
    public function __construct()
68
    {
69 56
        $this->trailer = new Header([], $this);
70 56
    }
71
72 37
    public function init()
73
    {
74 37
        $this->buildDictionary();
75
76 37
        $this->buildDetails();
77
78
        // Propagate init to objects.
79 37
        foreach ($this->objects as $object) {
80 37
            $object->getHeader()->init();
81 37
            $object->init();
82
        }
83 37
    }
84
85
    /**
86
     * Build dictionary based on type header field.
87
     */
88 37
    protected function buildDictionary()
89
    {
90
        // Build dictionary.
91 37
        $this->dictionary = [];
92
93 37
        foreach ($this->objects as $id => $object) {
94
            // Cache objects by type and subtype
95 37
            $type = $object->getHeader()->get('Type')->getContent();
96
97 37
            if (null != $type) {
98 37
                if (!isset($this->dictionary[$type])) {
99 37
                    $this->dictionary[$type] = [
100
                        'all' => [],
101
                        'subtype' => [],
102
                    ];
103
                }
104
105 37
                $this->dictionary[$type]['all'][$id] = $object;
106
107 37
                $subtype = $object->getHeader()->get('Subtype')->getContent();
108 37
                if (null != $subtype) {
109 30
                    if (!isset($this->dictionary[$type]['subtype'][$subtype])) {
110 30
                        $this->dictionary[$type]['subtype'][$subtype] = [];
111
                    }
112 30
                    $this->dictionary[$type]['subtype'][$subtype][$id] = $object;
113
                }
114
            }
115
        }
116 37
    }
117
118
    /**
119
     * Build details array.
120
     */
121 37
    protected function buildDetails()
122
    {
123
        // Build details array.
124 37
        $details = [];
125
126
        // Extract document info
127 37
        if ($this->trailer->has('Info')) {
128
            /** @var PDFObject $info */
129 29
            $info = $this->trailer->get('Info');
130
            // This could be an ElementMissing object, so we need to check for
131
            // the getHeader method first.
132 29
            if (null !== $info && method_exists($info, 'getHeader')) {
133 29
                $details = $info->getHeader()->getDetails();
134
            }
135
        }
136
137
        // Retrieve the page count
138
        try {
139 37
            $pages = $this->getPages();
140 36
            $details['Pages'] = \count($pages);
141 2
        } catch (\Exception $e) {
142 2
            $details['Pages'] = 0;
143
        }
144
145 37
        $this->details = $details;
146 37
    }
147
148
    /**
149
     * @return array
150
     */
151 1
    public function getDictionary()
152
    {
153 1
        return $this->dictionary;
154
    }
155
156
    /**
157
     * @param PDFObject[] $objects
158
     */
159 37
    public function setObjects($objects = [])
160
    {
161 37
        $this->objects = (array) $objects;
162
163 37
        $this->init();
164 37
    }
165
166
    /**
167
     * @return PDFObject[]
168
     */
169 1
    public function getObjects()
170
    {
171 1
        return $this->objects;
172
    }
173
174
    /**
175
     * @param string $id
176
     *
177
     * @return PDFObject|Font|Page|Element|null
178
     */
179 34
    public function getObjectById($id)
180
    {
181 34
        if (isset($this->objects[$id])) {
182 34
            return $this->objects[$id];
183
        }
184
185 3
        return null;
186
    }
187
188
    /**
189
     * @param string $type
190
     * @param ?string $subtype
191
     *
192
     * @return bool
193
     */
194 38
    public function hasObjectsByType(string $type, ?string $subtype = null): bool
195
    {
196 38
        return 0 < \count($this->getObjectsByType($type, $subtype));
197
    }
198
199
    /**
200
     * @param string $type
201
     * @param string $subtype
202
     *
203
     * @return array
204
     */
205 38
    public function getObjectsByType($type, $subtype = null)
206
    {
207 38
        if (!isset($this->dictionary[$type])) {
208 9
            return [];
209
        }
210
211 36
        if (null != $subtype) {
0 ignored issues
show
Bug introduced by
It seems like you are loosely comparing $subtype of type null|string against null; this is ambiguous if the string can be empty. Consider using a strict comparison !== instead.
Loading history...
212
            if (!isset($this->dictionary[$type]['subtype'][$subtype])) {
213
                return [];
214
            }
215
216
            return $this->dictionary[$type]['subtype'][$subtype];
217
        }
218
219 36
        return $this->dictionary[$type]['all'];
220
    }
221
222
    /**
223
     * @return PDFObject[]
224
     */
225 19
    public function getFonts()
226
    {
227 19
        return $this->getObjectsByType('Font');
228
    }
229
230
    /**
231
     * @return ?PDFObject
232
     */
233 14
    public function getFirstFont(): ?PDFObject
234
    {
235 14
        $fonts = $this->getFonts();
236
237 14
        return reset($fonts);
238
    }
239
240
    /**
241
     * @return Page[]
242
     *
243
     * @throws \Exception
244
     */
245 38
    public function getPages()
246
    {
247 38
        if ($this->hasObjectsByType('Catalog')) {
248
            // Search for catalog to list pages.
249 30
            $catalogues = $this->getObjectsByType('Catalog');
250 30
            $catalogue = reset($catalogues);
251
252
            /** @var Pages $object */
253 30
            $object = $catalogue->get('Pages');
254 30
            if (method_exists($object, 'getPages')) {
255 30
                return $object->getPages(true);
256
            }
257
        }
258
259 9
        if ($this->hasObjectsByType('Pages')) {
260
            // Search for pages to list kids.
261 1
            $pages = [];
262
263
            /** @var Pages[] $objects */
264 1
            $objects = $this->getObjectsByType('Pages');
265 1
            foreach ($objects as $object) {
266 1
                $pages = array_merge($pages, $object->getPages(true));
267
            }
268
269 1
            return $pages;
270
        }
271
272 9
        if ($this->hasObjectsByType('Page')) {
273
            // Search for 'page' (unordered pages).
274 7
            $pages = $this->getObjectsByType('Page');
275
276 7
            return array_values($pages);
277
        }
278
279 3
        throw new \Exception('Missing catalog.');
280
    }
281
282
    /**
283
     * @param Page $page
284
     *
285
     * @return string
286
     */
287 8
    public function getText(Page $page = null)
0 ignored issues
show
Unused Code introduced by
The parameter $page is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

287
    public function getText(/** @scrutinizer ignore-unused */ Page $page = null)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
288
    {
289 8
        $texts = [];
290 8
        $pages = $this->getPages();
291
292 8
        foreach ($pages as $index => $page) {
293
            /**
294
             * In some cases, the $page variable may be null.
295
             */
296 8
            if (null === $page) {
297
                continue;
298
            }
299 8
            if ($text = trim($page->getText())) {
300 8
                $texts[] = $text;
301
            }
302
        }
303
304 8
        return implode("\n\n", $texts);
305
    }
306
307
    /**
308
     * @return Header
309
     */
310
    public function getTrailer()
311
    {
312
        return $this->trailer;
313
    }
314
315 29
    public function setTrailer(Header $trailer)
316
    {
317 29
        $this->trailer = $trailer;
318 29
    }
319
320
    /**
321
     * @return array
322
     */
323
    public function getDetails($deep = true)
0 ignored issues
show
Unused Code introduced by
The parameter $deep is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

323
    public function getDetails(/** @scrutinizer ignore-unused */ $deep = true)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
324
    {
325
        return $this->details;
326
    }
327
}
328