Passed
Branch main (d5171d)
by Andreas
26:52 queued 08:26
created

PdfcpuWrapper::importMetadata()   B

Complexity

Conditions 7
Paths 6

Size

Total Lines 34
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 18
CRAP Score 7.0071

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 19
c 1
b 0
f 0
dl 0
loc 34
ccs 18
cts 19
cp 0.9474
rs 8.8333
cc 7
nc 6
nop 2
crap 7.0071
1
<?php
2
/**
3
 * pdfcpu wrapper
4
 *
5
 * @copyright 2014-2024 Institute of Legal Medicine, Medical University of Innsbruck
6
 * @author Andreas Erhard <[email protected]>
7
 * @license LGPL-3.0-only
8
 * @link http://www.gerichtsmedizin.at/
9
 *
10
 * @package pdftk
11
 */
12
13
namespace Gmi\Toolkit\Pdftk;
14
15
use Symfony\Component\Process\Process;
16
17
use Gmi\Toolkit\Pdftk\Exception\FileNotFoundException;
18
use Gmi\Toolkit\Pdftk\Exception\PdfException;
19
use Gmi\Toolkit\Pdftk\Util\Escaper;
20
use Gmi\Toolkit\Pdftk\Util\FileChecker;
21
use Gmi\Toolkit\Pdftk\Util\ProcessFactory;
22
23
use Exception;
24
25
/**
26
 * Wrapper for pdfcpu.
27
 *
28
 * @internal Only the methods exposed by the interfaces should be accessed from outside.
29
 *
30
 * @psalm-suppress PropertyNotSetInConstructor as $binaryPath is defined and set in the BinaryPathAwareTrait
31
 */
32
class PdfcpuWrapper implements WrapperInterface, BinaryPathAwareInterface
33
{
34
    use BinaryPathAwareTrait;
35
36
    private const SUPPORTED_METADATA_ATTRIBUTES = [
37
        'Title', 'Keywords', 'Subject', 'Author', 'Creator', 'Producer', 'CreationDate', 'ModificationDate',
38
    ];
39
40
    /**
41
     * @var ProcessFactory
42
     */
43
    private $processFactory;
44
45
    /**
46
     * @var Escaper
47
     */
48
    private $escaper;
49
50
    /**
51
     * @var FileChecker
52
     */
53
    private $fileChecker;
54
55
    /**
56
     * @var PdfcpuWrapperBookmarksHelper
57
     */
58
    private $bookmarksHelper;
59
60
    /**
61
     * Constructor.
62
     *
63
     * @throws FileNotFoundException
64
     */
65 29
    public function __construct(string $pdftkBinary = null, ProcessFactory $processFactory = null)
66
    {
67 29
        $this->setBinary($pdftkBinary ?: $this->guessBinary(PHP_OS));
68 28
        $this->processFactory = $processFactory ?: new ProcessFactory();
69 28
        $this->escaper = new Escaper();
70 28
        $this->fileChecker = new FileChecker();
71 28
        $this->bookmarksHelper = new PdfcpuWrapperBookmarksHelper($this->getBinary(false), $this->processFactory);
72 28
    }
73
74
    /**
75
     * Guesses the pdfcpu binary path based on the operating system.
76
     */
77 4
    public function guessBinary(string $operatingSystemString): string
78
    {
79 4
        if (strtoupper(substr($operatingSystemString, 0, 3)) === 'WIN') {
80 1
            $binary = 'C:\\Program Files\\pdfcpu\\pdfcpu.exe';
81
        } else {
82 4
            $binary = '/usr/bin/pdfcpu';
83
        }
84
85 4
        return $binary;
86
    }
87
88
    /**
89
     * {@inheritDoc}
90
     */
91 5
    public function join(array $filePaths, string $outfile): void
92
    {
93 5
        $esc = $this->escaper;
94
95
        $filePathsEscaped = array_map(function (string $filePath) use ($esc) {
96 5
            return $esc->shellArg($filePath);
97 5
        }, $filePaths);
98
99 5
        $fileList = implode(' ', $filePathsEscaped);
100
101 5
        $commandLine = sprintf('%s merge %s %s', $this->getBinary(), $esc->shellArg($outfile), $fileList);
102
103
        /**
104
         * @var Process
105
         */
106 5
        $process = $this->processFactory->createProcess($commandLine);
107
108
        try {
109 5
            $process->mustRun();
110 1
        } catch (Exception $e) {
111 1
            throw new PdfException($e->getMessage(), 0, $e, $process->getErrorOutput(), $process->getOutput());
112
        }
113
114 4
        $process->getOutput();
115 4
    }
116
117
    /**
118
     * {@inheritDoc}
119
     */
120 6
    public function split(string $infile, array $mapping, string $outputFolder = null): void
121
    {
122 6
        $esc = $this->escaper;
123
124 6
        foreach ($mapping as $filename => $pages) {
125 6
            if ($outputFolder) {
126 2
                $target = sprintf('%s/%s', $outputFolder, $filename);
127
            } else {
128 4
                $target = $filename;
129
            }
130
131 6
            $commandLine = sprintf(
132 6
                '%s collect -pages %s %s %s',
133 6
                $this->getBinary(),
134 6
                implode(',', $pages),
135 6
                $esc->shellArg($infile),
136 6
                $esc->shellArg($target)
137
            );
138
139 6
            $process = $this->processFactory->createProcess($commandLine);
140
141
            try {
142 6
                $process->mustRun();
143 1
            } catch (Exception $e) {
144 1
                throw new PdfException($e->getMessage(), 0, $e, $process->getErrorOutput(), $process->getOutput());
145
            }
146
        }
147 5
    }
148
149
    /**
150
     * {@inheritDoc}
151
     */
152 4
    public function reorder(string $infile, array $order, string $outfile = null): void
153
    {
154 4
        $temporaryOutFile = false;
155
156 4
        if ($outfile === null || $infile === $outfile) {
157 1
            $temporaryOutFile = true;
158 1
            $outfile = tempnam(sys_get_temp_dir(), 'pdf') . '.pdf';
159
        }
160
161 4
        $esc = $this->escaper;
162
163 4
        $commandLine = sprintf(
164 4
            '%s collect -pages %s %s %s',
165 4
            $this->getBinary(),
166 4
            implode(',', $order),
167 4
            $esc->shellArg($infile),
168 4
            $esc->shellArg($outfile)
169
        );
170
171 4
        $process = $this->processFactory->createProcess($commandLine);
172
173
        try {
174 4
            $process->mustRun();
175 1
        } catch (Exception $e) {
176 1
            throw new PdfException(
177 1
                sprintf('Failed to reorder PDF "%s"! Error: %s', $infile, $e->getMessage()),
178 1
                0,
179
                $e,
180 1
                $process->getErrorOutput(),
181 1
                $process->getOutput()
182
            );
183
        }
184
185 3
        if ($temporaryOutFile) {
186 1
            unlink($infile);
187 1
            rename($outfile, $infile);
188
        }
189 3
    }
190
191
    /**
192
     * {@inheritDoc}
193
     */
194 7
    public function applyBookmarks(Bookmarks $bookmarks, string $infile, string $outfile = null): self
195
    {
196 7
        $this->bookmarksHelper->applyBookmarks($bookmarks, $infile, $outfile);
197
198 5
        return $this;
199
    }
200
201
    /**
202
     * {@inheritDoc}
203
     */
204 11
    public function importBookmarks(Bookmarks $bookmarks, string $infile): self
205
    {
206 11
        $this->bookmarksHelper->importBookmarks($bookmarks, $infile);
207
208 9
        return $this;
209
    }
210
211
    /**
212
     * {@inheritDoc}
213
     */
214 11
    public function importPages(Pages $pages, string $infile): self
215
    {
216 11
        $this->fileChecker->checkPdfFileExists($infile);
217
218 10
        $cmd = sprintf('%s info -pages 1- -j %s', $this->getBinary(), $this->escaper->shellArg($infile));
219
220 10
        $process = $this->processFactory->createProcess($cmd);
221
222
        try {
223 10
            $process->mustRun();
224 1
        } catch (Exception $e) {
225 1
            $exception = new PdfException(
226 1
                sprintf('Failed to read pages data from "%s"! Error: %s', $infile, $e->getMessage()),
227 1
                0,
228
                $e,
229 1
                $process->getErrorOutput(),
230 1
                $process->getOutput()
231
            );
232
233 1
            throw $exception;
234
        }
235
236
        /**
237
         * Remove invalid JSON (useless line with the page numbers at the beginning)
238
         * @todo Remove when pdfcpu does not emit the extra pages line before JSON anymore
239
         */
240 9
        $outputCleaned = preg_replace('/^pages: (\d,?)+$/mu', '', $process->getOutput());
241 9
        $infoRaw = json_decode($outputCleaned, true);
242
243 9
        $pageBoundaries = $infoRaw['infos'][0]['pageBoundaries'];
244
245
        // the page numbers in the JSON are strings, not numbers and sorted as strings, ensure natural sort
246 9
        ksort($pageBoundaries, SORT_NATURAL);
247
248 9
        foreach ($pageBoundaries as $pageNumber => $pageInfo) {
249 9
            $page = new Page();
250
251
            $page
252 9
                ->setPageNumber((int) $pageNumber)
253 9
                ->setRotation((int) $pageInfo['rot'])
254 9
                ->setWidth((float) $pageInfo['mediaBox']['rect']['ur']['x'])
255 9
                ->setHeight((float) $pageInfo['mediaBox']['rect']['ur']['y'])
256
            ;
257
258 9
            $pages->add($page);
259
        }
260
261 9
        return $this;
262
    }
263
264
    /**
265
     * {@inheritDoc}
266
     */
267 7
    public function applyMetadata(Metadata $metadata, string $infile, string $outfile = null): self
268
    {
269 7
        $temporaryOutFile = false;
270
271 7
        $this->fileChecker->checkPdfFileExists($infile);
272
273 6
        $properties = [];
274 6
        foreach ($metadata->all() as $key => $value) {
275 6
            $properties[] = sprintf('%s=%s', $key, $this->escaper->shellArg($value));
276
        }
277
278 6
        $propArgs = implode(' ', $properties);
279
280 6
        if ($outfile === null || $infile === $outfile) {
281 1
            $temporaryOutFile = true;
282 1
            $outfile = tempnam(sys_get_temp_dir(), 'pdf') . '.pdf';
283
        }
284
285 6
        copy($infile, $outfile);
286
287 6
        $cmd = sprintf('%s properties add %s %s', $this->getBinary(), $this->escaper->shellArg($outfile), $propArgs);
288 6
        $process = $this->processFactory->createProcess($cmd);
289
290
        try {
291 6
            $process->mustRun();
292 1
        } catch (Exception $e) {
293 1
            $exception = new PdfException(
294 1
                sprintf('Failed to write PDF metadata to "%s"! Error: %s', $outfile, $e->getMessage()),
295 1
                0,
296
                $e,
297 1
                $process->getErrorOutput(),
298 1
                $process->getOutput()
299
            );
300
        }
301
302 6
        if ($temporaryOutFile && !isset($exception)) {
303 1
            unlink($infile);
304 1
            rename($outfile, $infile);
305
        }
306
307 6
        if (isset($exception)) {
308 1
            throw $exception;
309
        }
310
311 5
        return $this;
312
    }
313
314
    /**
315
     * {@inheritDoc}
316
     */
317 7
    public function importMetadata(Metadata $metadata, string $infile): self
318
    {
319 7
        $cmd = sprintf('%s info -j %s', $this->getBinary(), $this->escaper->shellArg($infile));
320
321 7
        $process = $this->processFactory->createProcess($cmd);
322
323
        try {
324 7
            $process->mustRun();
325 1
        } catch (Exception $e) {
326 1
            throw new PdfException(
327 1
                sprintf('Failed to read metadata data from "%s"! Error: %s', $infile, $e->getMessage()),
328 1
                0,
329
                $e,
330 1
                $process->getErrorOutput(),
331 1
                $process->getOutput()
332
            );
333
        }
334
335 6
        $raw = json_decode($process->getOutput(), true);
336 6
        $metadataArray = $raw['infos'][0];
337
338 6
        foreach (self::SUPPORTED_METADATA_ATTRIBUTES as $attribute) {
339 6
            $attributeNormalized = lcfirst($attribute);
340
341 6
            if ($attributeNormalized === 'keywords' && isset($metadataArray['keywords'])) {
342
                $metadataArray['keywords'] = implode(', ', $metadataArray['keywords']);
343
            }
344
345 6
            if (isset($metadataArray[$attributeNormalized]) && '' !== trim($metadataArray[$attributeNormalized])) {
346 5
                $metadata->set($attribute, $metadataArray[$attributeNormalized]);
347
            }
348
        }
349
350 6
        return $this;
351
    }
352
}
353