Passed
Pull Request — master (#455)
by
unknown
01:56
created

Document::setTrailer()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
eloc 1
dl 0
loc 3
rs 10
c 0
b 0
f 0
ccs 2
cts 2
cp 1
cc 1
nc 1
nop 1
crap 1
1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser;
32
33
/**
34
 * Technical references :
35
 * - http://www.mactech.com/articles/mactech/Vol.15/15.09/PDFIntro/index.html
36
 * - http://framework.zend.com/issues/secure/attachment/12512/Pdf.php
37
 * - http://www.php.net/manual/en/ref.pdf.php#74211
38
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin1Encoding.pm
39
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin9Encoding.pm
40
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/StandardEncoding.pm
41
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/WinAnsiEncoding.pm
42
 *
43
 * Class Document
44
 */
45
class Document
46
{
47
    /**
48
     * @var PDFObject[]
49
     */
50
    protected $objects = [];
51
52
    /**
53
     * @var array
54
     */
55
    protected $dictionary = [];
56
57
    /**
58
     * @var Header
59
     */
60
    protected $trailer = null;
61
62
    /**
63
     * @var array
64
     */
65
    protected $details = null;
66
67 60
    public function __construct()
68
    {
69 60
        $this->trailer = new Header([], $this);
70 60
    }
71
72 41
    public function init()
73
    {
74 41
        $this->buildDictionary();
75
76 41
        $this->buildDetails();
77
78
        // Propagate init to objects.
79 41
        foreach ($this->objects as $object) {
80 41
            $object->getHeader()->init();
81 41
            $object->init();
82
        }
83 41
    }
84
85
    /**
86
     * Build dictionary based on type header field.
87
     */
88 41
    protected function buildDictionary()
89
    {
90
        // Build dictionary.
91 41
        $this->dictionary = [];
92
93 41
        foreach ($this->objects as $id => $object) {
94 41
            $type = $object->getHeader()->get('Type')->getContent();
95
96 41
            if (!empty($type)) {
97 41
                $this->dictionary[$type][$id] = $id;
98
            }
99
        }
100 41
    }
101
102
    /**
103
     * Build details array.
104
     */
105 41
    protected function buildDetails()
106
    {
107
        // Build details array.
108 41
        $details = [];
109
110
        // Extract document info
111 41
        if ($this->trailer->has('Info')) {
112
            /** @var PDFObject $info */
113 33
            $info = $this->trailer->get('Info');
114
            // This could be an ElementMissing object, so we need to check for
115
            // the getHeader method first.
116 33
            if (null !== $info && method_exists($info, 'getHeader')) {
117 33
                $details = $info->getHeader()->getDetails();
118
            }
119
        }
120
121
        // Retrieve the page count
122
        try {
123 41
            $pages = $this->getPages();
124 40
            $details['Pages'] = \count($pages);
125 2
        } catch (\Exception $e) {
126 2
            $details['Pages'] = 0;
127
        }
128
129 41
        $this->details = $details;
130 41
    }
131
132 1
    public function getDictionary(): array
133
    {
134 1
        return $this->dictionary;
135
    }
136
137
    /**
138
     * @param PDFObject[] $objects
139
     */
140 41
    public function setObjects($objects = [])
141
    {
142 41
        $this->objects = (array) $objects;
143
144 41
        $this->init();
145 41
    }
146
147
    /**
148
     * @return PDFObject[]
149
     */
150 1
    public function getObjects()
151
    {
152 1
        return $this->objects;
153
    }
154
155
    /**
156
     * @return PDFObject|Font|Page|Element|null
157
     */
158 38
    public function getObjectById(string $id)
159
    {
160 38
        if (isset($this->objects[$id])) {
161 38
            return $this->objects[$id];
162
        }
163
164 3
        return null;
165
    }
166
167 25
    public function getObjectsByType(string $type, ?string $subtype = null): array
168
    {
169 25
        $objects = [];
170
171 25
        foreach ($this->objects as $id => $object) {
172 25
            if ($object->getHeader()->get('Type') == $type &&
173 25
                (null === $subtype || $object->getHeader()->get('Subtype') == $subtype)
174
            ) {
175 25
                $objects[$id] = $object;
176
            }
177
        }
178
179 25
        return $objects;
180
    }
181
182
    /**
183
     * @return Font[]
184
     */
185 18
    public function getFonts()
186
    {
187 18
        return $this->getObjectsByType('Font');
188
    }
189
190
    /**
191
     * @return Page[]
192
     *
193
     * @throws \Exception
194
     */
195 42
    public function getPages()
196
    {
197 42
        if (isset($this->dictionary['Catalog'])) {
198
            // Search for catalog to list pages.
199 34
            $id = reset($this->dictionary['Catalog']);
200
201
            /** @var Pages $object */
202 34
            $object = $this->objects[$id]->get('Pages');
203 34
            if (method_exists($object, 'getPages')) {
204 34
                return $object->getPages(true);
205
            }
206
        }
207
208 9
        if (isset($this->dictionary['Pages'])) {
209
            // Search for pages to list kids.
210 1
            $pages = [];
211
212
            /** @var Pages[] $objects */
213 1
            $objects = $this->getObjectsByType('Pages');
214 1
            foreach ($objects as $object) {
215 1
                $pages = array_merge($pages, $object->getPages(true));
216
            }
217
218 1
            return $pages;
219
        }
220
221 9
        if (isset($this->dictionary['Page'])) {
222
            // Search for 'page' (unordered pages).
223 7
            $pages = $this->getObjectsByType('Page');
224
225 7
            return array_values($pages);
226
        }
227
228 3
        throw new \Exception('Missing catalog.');
229
    }
230
231 8
    public function getText(): string
232
    {
233 8
        $texts = [];
234 8
        $pages = $this->getPages();
235
236 8
        foreach ($pages as $index => $page) {
237
            /**
238
             * In some cases, the $page variable may be null.
239
             */
240 8
            if (null === $page) {
241
                continue;
242
            }
243 8
            if ($text = trim($page->getText())) {
244 8
                $texts[] = $text;
245
            }
246
        }
247
248 8
        return implode("\n\n", $texts);
249
    }
250
251
    public function getTrailer(): Header
252
    {
253
        return $this->trailer;
254
    }
255
256 33
    public function setTrailer(Header $trailer)
257
    {
258 33
        $this->trailer = $trailer;
259 33
    }
260
261 10
    public function getDetails(): array
262
    {
263 10
        return $this->details;
264
    }
265
}
266