Passed
Pull Request — master (#312)
by
unknown
02:14
created

Document   A

Complexity

Total Complexity 35

Size/Duplication

Total Lines 240
Duplicated Lines 0 %

Test Coverage

Coverage 93.83%

Importance

Changes 8
Bugs 2 Features 0
Metric Value
eloc 66
dl 0
loc 240
ccs 76
cts 81
cp 0.9383
rs 9.6
c 8
b 2
f 0
wmc 35

15 Methods

Rating   Name   Duplication   Size   Complexity  
A getObjectsByType() 0 13 5
A buildDictionary() 0 10 3
A init() 0 9 2
A getObjects() 0 3 1
A __construct() 0 3 1
A setObjects() 0 5 1
A getFonts() 0 3 1
A buildDetails() 0 25 5
A getDictionary() 0 3 1
A getPages() 0 34 6
A getDetails() 0 3 1
A getObjectById() 0 7 2
A getTrailer() 0 3 1
A setTrailer() 0 3 1
A getText() 0 18 4
1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 * @date    2017-01-03
9
 *
10
 * @license LGPLv3
11
 * @url     <https://github.com/smalot/pdfparser>
12
 *
13
 *  PdfParser is a pdf library written in PHP, extraction oriented.
14
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
15
 *
16
 *  This program is free software: you can redistribute it and/or modify
17
 *  it under the terms of the GNU Lesser General Public License as published by
18
 *  the Free Software Foundation, either version 3 of the License, or
19
 *  (at your option) any later version.
20
 *
21
 *  This program is distributed in the hope that it will be useful,
22
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
23
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
 *  GNU Lesser General Public License for more details.
25
 *
26
 *  You should have received a copy of the GNU Lesser General Public License
27
 *  along with this program.
28
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
29
 */
30
31
namespace Smalot\PdfParser;
32
33
/**
34
 * Technical references :
35
 * - http://www.mactech.com/articles/mactech/Vol.15/15.09/PDFIntro/index.html
36
 * - http://framework.zend.com/issues/secure/attachment/12512/Pdf.php
37
 * - http://www.php.net/manual/en/ref.pdf.php#74211
38
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin1Encoding.pm
39
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin9Encoding.pm
40
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/StandardEncoding.pm
41
 * - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/WinAnsiEncoding.pm
42
 *
43
 * Class Document
44
 */
45
class Document
46
{
47
    /**
48
     * @var PDFObject[]
49
     */
50
    protected $objects = [];
51
52
    /**
53
     * @var array
54
     */
55
    protected $dictionary = [];
56
57
    /**
58
     * @var Header
59
     */
60
    protected $trailer = null;
61
62
    /**
63
     * @var array
64
     */
65
    protected $details = null;
66
67 37
    public function __construct()
68
    {
69 37
        $this->trailer = new Header([], $this);
70 37
    }
71
72 25
    public function init()
73
    {
74 25
        $this->buildDictionary();
75
76 25
        $this->buildDetails();
77
78
        // Propagate init to objects.
79 25
        foreach ($this->objects as $object) {
80 25
            $object->init();
81
        }
82 25
    }
83
84
    /**
85
     * Build dictionary based on type header field.
86
     */
87 25
    protected function buildDictionary()
88
    {
89
        // Build dictionary.
90 25
        $this->dictionary = [];
91
92 25
        foreach ($this->objects as $id => $object) {
93 25
            $type = $object->getHeader()->get('Type')->getContent();
94
95 25
            if (!empty($type)) {
96 25
                $this->dictionary[$type][$id] = $id;
97
            }
98
        }
99 25
    }
100
101
    /**
102
     * Build details array.
103
     */
104 25
    protected function buildDetails()
105
    {
106
        // Build details array.
107 25
        $details = [];
108
109
        // Extract document info
110 25
        if ($this->trailer->has('Info')) {
111
            /** @var PDFObject $info */
112 17
            $info = $this->trailer->get('Info');
113
            // This could be an ElementMissing object, so we need to check for
114
            // the getHeader method first.
115 17
            if (null !== $info && method_exists($info, 'getHeader')) {
116 17
                $details = $info->getHeader()->getDetails();
117
            }
118
        }
119
120
        // Retrieve the page count
121
        try {
122 25
            $pages = $this->getPages();
123 24
            $details['Pages'] = \count($pages);
124 2
        } catch (\Exception $e) {
125 2
            $details['Pages'] = 0;
126
        }
127
128 25
        $this->details = $details;
129 25
    }
130
131
    /**
132
     * @return array
133
     */
134 1
    public function getDictionary()
135
    {
136 1
        return $this->dictionary;
137
    }
138
139
    /**
140
     * @param PDFObject[] $objects
141
     */
142 25
    public function setObjects($objects = [])
143
    {
144 25
        $this->objects = (array) $objects;
145
146 25
        $this->init();
147 25
    }
148
149
    /**
150
     * @return PDFObject[]
151
     */
152 5
    public function getObjects()
153
    {
154 5
        return $this->objects;
155
    }
156
157
    /**
158
     * @param string $id
159
     *
160
     * @return PDFObject|Font|Page|Element|null
161
     */
162 22
    public function getObjectById($id)
163
    {
164 22
        if (isset($this->objects[$id])) {
165 22
            return $this->objects[$id];
166
        }
167
168 3
        return null;
169
    }
170
171
    /**
172
     * @param string $type
173
     * @param string $subtype
174
     *
175
     * @return array
176
     */
177 12
    public function getObjectsByType($type, $subtype = null)
178
    {
179 12
        $objects = [];
180
181 12
        foreach ($this->objects as $id => $object) {
182 12
            if ($object->getHeader()->get('Type') == $type &&
183 12
                (null === $subtype || $object->getHeader()->get('Subtype') == $subtype)
184
            ) {
185 12
                $objects[$id] = $object;
186
            }
187
        }
188
189 12
        return $objects;
190
    }
191
192
    /**
193
     * @return PDFObject[]
194
     */
195 5
    public function getFonts()
196
    {
197 5
        return $this->getObjectsByType('Font');
198
    }
199
200
    /**
201
     * @return Page[]
202
     *
203
     * @throws \Exception
204
     */
205 26
    public function getPages()
206
    {
207 26
        if (isset($this->dictionary['Catalog'])) {
208
            // Search for catalog to list pages.
209 18
            $id = reset($this->dictionary['Catalog']);
210
211
            /** @var Pages $object */
212 18
            $object = $this->objects[$id]->get('Pages');
213 18
            if (method_exists($object, 'getPages')) {
214 18
                return $object->getPages(true);
215
            }
216
        }
217
218 9
        if (isset($this->dictionary['Pages'])) {
219
            // Search for pages to list kids.
220 1
            $pages = [];
221
222
            /** @var Pages[] $objects */
223 1
            $objects = $this->getObjectsByType('Pages');
224 1
            foreach ($objects as $object) {
225 1
                $pages = array_merge($pages, $object->getPages(true));
226
            }
227
228 1
            return $pages;
229
        }
230
231 9
        if (isset($this->dictionary['Page'])) {
232
            // Search for 'page' (unordered pages).
233 7
            $pages = $this->getObjectsByType('Page');
234
235 7
            return array_values($pages);
236
        }
237
238 3
        throw new \Exception('Missing catalog.');
239
    }
240
241
    /**
242
     * @param Page $page
243
     *
244
     * @return string
245
     */
246 1
    public function getText(Page $page = null)
0 ignored issues
show
Unused Code introduced by
The parameter $page is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

246
    public function getText(/** @scrutinizer ignore-unused */ Page $page = null)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
247
    {
248 1
        $texts = [];
249 1
        $pages = $this->getPages();
250
251 1
        foreach ($pages as $index => $page) {
252
            /**
253
             * In some cases, the $page variable may be null.
254
             */
255 1
            if (null === $page) {
256
                continue;
257
            }
258 1
            if ($text = trim($page->getText())) {
259 1
                $texts[] = $text;
260
            }
261
        }
262
263 1
        return implode("\n\n", $texts);
264
    }
265
266
    /**
267
     * @return Header
268
     */
269
    public function getTrailer()
270
    {
271
        return $this->trailer;
272
    }
273
274 17
    public function setTrailer(Header $trailer)
275
    {
276 17
        $this->trailer = $trailer;
277 17
    }
278
279
    /**
280
     * @return array
281
     */
282
    public function getDetails($deep = true)
0 ignored issues
show
Unused Code introduced by
The parameter $deep is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

282
    public function getDetails(/** @scrutinizer ignore-unused */ $deep = true)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
283
    {
284
        return $this->details;
285
    }
286
}
287