Issues (3936)

Classes/Controller/GetFileController.php (9 issues)

1
<?php
2
3
namespace EWW\Dpf\Controller;
4
5
/*
6
 * This file is part of the TYPO3 CMS project.
7
 *
8
 * It is free software; you can redistribute it and/or modify it under
9
 * the terms of the GNU General Public License, either version 2
10
 * of the License, or any later version.
11
 *
12
 * For the full copyright and license information, please read the
13
 * LICENSE.txt file that was distributed with this source code.
14
 *
15
 * The TYPO3 project - inspiring people to share!
16
 */
17
18
/**
19
 * API to return METS dissemination and Attachments from Fedora.
20
 * Also renders METS XML for preview. Structure of the URIs totally
21
 * depend on proper RealURL configuration.
22
 *
23
 * Example:
24
 *
25
 * 1. METS from Fedora
26
 *   http://localhost/api/qucosa:1234/mets/
27
 *
28
 *   This always returns METS which is supplemented with additional information.
29
 *   The embedded MODS record is not the original MODS as it is stored in the
30
 *   repository datastream.
31
 *
32
 * 2. Attachment from Fedora
33
 *   http://localhost/api/qucosa:1234/attachment/ATT-0/
34
 *
35
 * 3. METS from Kitodo.Publication (this extension)
36
 *   http://localhost/api/3/preview/
37
 *
38
 * 4. DataCite from Kitodo.Publication (this extension)
39
 *
40
 * @author Alexander Bigga <[email protected]>
41
 * @author Ralf Claussnitzer <[email protected]>
42
 * @author Florian Rügamer <[email protected]>
43
 */
44
45
use EWW\Dpf\Domain\Model\File;
46
use EWW\Dpf\Helper\XSLTransformator;
47
use EWW\Dpf\Services\ParserGenerator;
48
use TYPO3\CMS\Core\Utility\GeneralUtility;
49
50
/**
51
 * GetFileController
52
 */
53
class GetFileController extends \EWW\Dpf\Controller\AbstractController
54
{
55
56
    /**
57
     * documentRepository
58
     *
59
     * @var \EWW\Dpf\Domain\Repository\DocumentRepository
60
     * @TYPO3\CMS\Extbase\Annotation\Inject
61
     */
62
    protected $documentRepository;
63
64
    /**
65
     * clientConfigurationManager
66
     *
67
     * @var \EWW\Dpf\Configuration\ClientConfigurationManager
68
     * @TYPO3\CMS\Extbase\Annotation\Inject
69
     */
70
    protected $clientConfigurationManager;
71
72
    public function attachmentAction()
73
    {
74
75
        $piVars = GeneralUtility::_GP('tx_dpf'); // get GET params from powermail
76
77
        $fedoraHost = $this->clientConfigurationManager->getFedoraHost();
78
79
        if ($this->isForbidden($piVars['action'])) {
80
            $this->response->setStatus(403);
0 ignored issues
show
The method setStatus() does not exist on TYPO3\CMS\Extbase\Mvc\Response. It seems like you code against a sub-type of TYPO3\CMS\Extbase\Mvc\Response such as TYPO3\CMS\Extbase\Mvc\Web\Response. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

80
            $this->response->/** @scrutinizer ignore-call */ 
81
                             setStatus(403);
Loading history...
81
            return 'Forbidden';
82
        }
83
84
        $isRepositoryObject = !is_numeric($piVars['qid']);
85
86
        $fedoraNamespace = $this->clientConfigurationManager->getFedoraNamespace();
87
88
        switch ($piVars['action']) {
89
            case 'mets':
90
                $path = rtrim('http://' . $fedoraHost,
91
                        "/") . '/fedora/objects/' . $piVars['qid'] . '/methods/' . $fedoraNamespace . ':SDef/getMETSDissemination?supplement=yes';
92
                break;
93
94
            case 'preview':
95
                // Fixme: Can be removed due to the details page.
96
                $document = $this->documentRepository->findByUid($piVars['qid']);
97
98
                if ($document) {
0 ignored issues
show
$document is of type object, thus it always evaluated to true.
Loading history...
99
100
                    $metsXml = $this->buildMetsXml($document);
101
                    $this->response->setHeader('Content-Type', 'text/xml; charset=UTF-8');
0 ignored issues
show
The method setHeader() does not exist on TYPO3\CMS\Extbase\Mvc\Response. It seems like you code against a sub-type of TYPO3\CMS\Extbase\Mvc\Response such as TYPO3\CMS\Extbase\Mvc\Web\Response. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

101
                    $this->response->/** @scrutinizer ignore-call */ 
102
                                     setHeader('Content-Type', 'text/xml; charset=UTF-8');
Loading history...
102
                    return $metsXml;
103
104
                } else {
105
106
                    $this->response->setStatus(404);
107
                    return 'No such document';
108
109
                }
110
111
            case 'attachment':
112
113
                $qid = $piVars['qid'];
114
115
                $attachment = $piVars['attachment'];
116
117
                if (is_numeric($piVars['qid'])) {
118
119
                    // qid is local uid
120
                    $document = $this->documentRepository->findByUid($piVars['qid']);
121
122
                    /** @var File $file */
123
                    if (is_a($this->getFile(), '\TYPO3\CMS\Extbase\Persistence\ObjectStorage')) {
0 ignored issues
show
The method getFile() does not exist on EWW\Dpf\Controller\GetFileController. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

123
                    if (is_a($this->/** @scrutinizer ignore-call */ getFile(), '\TYPO3\CMS\Extbase\Persistence\ObjectStorage')) {

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
124
                        foreach ($document->getFile() as $file) {
125
                            if (!$file->isFileGroupDeleted()) {
126
                                if ($file->getDownload()) {
127
                                    if ($file->getDatastreamIdentifier() == $attachment) {
128
                                        $path = $file->getUrl();
129
                                        $contentType = $file->getContentType();
130
                                        break;
131
                                    }
132
                                }
133
                            }
134
                        }
135
                    }
136
137
                } else {
138
139
                    $path = rtrim('http://' . $fedoraHost,
140
                            "/") . '/fedora/objects/' . $qid . '/datastreams/' . $attachment . '/content';
141
142
                }
143
144
                if (empty($path)) {
145
                    $this->response->setStatus(404);
146
                    return 'No file found';
147
                }
148
149
                break;
150
151
            case 'dataCite':
152
153
                $qid = $piVars['qid'];
154
                $source = explode(':', $qid);
155
                if ($source[0] == $fedoraNamespace) {
156
157
                    $path = rtrim('http://' . $fedoraHost,
158
                            "/") . '/fedora/objects/' . $piVars['qid'] . '/methods/' . $fedoraNamespace . ':SDef/getMETSDissemination?supplement=yes';
159
                    $metsXml = str_replace('&', '&amp;', file_get_contents($path));
160
                    $dataCiteXml = \EWW\Dpf\Helper\DataCiteXml::convertFromMetsXml($metsXml);
161
162
                } elseif ($document = $this->documentRepository->findByUid($piVars['qid'])) {
163
164
                    $metsXml = str_replace('&', '&amp;', $this->buildMetsXml($document));
165
                    $dataCiteXml = \EWW\Dpf\Helper\DataCiteXml::convertFromMetsXml($metsXml);
166
167
                } else {
168
169
                    $this->response->setStatus(404);
170
                    return 'No such document';
171
172
                }
173
                $dom = new \DOMDocument('1.0', 'UTF-8');
174
                $dom->loadXML($dataCiteXml);
175
                $title = $dom->getElementsByTagName('title')[0];
176
177
                $this->response->setHeader('Content-Disposition',
178
                    'attachment; filename="' . self::sanitizeFilename($title->nodeValue) . '.DataCite.xml"');
179
                $this->response->setHeader('Content-Type', 'text/xml; charset=UTF-8');
180
                return $dataCiteXml;
181
182
                break;
0 ignored issues
show
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
183
184
            case 'zip':
185
                // FIXME Service locations on Fedora host are hard coded
186
                $metsUrl = rtrim('http://' . $fedoraHost, "/") . '/mets?pid=' . $piVars['qid'];
187
                $path = rtrim('http://' . $fedoraHost, "/") . '/zip?xmdpfilter=true&metsurl=' . rawurlencode($metsUrl);
188
                break;
189
190
            default:
191
192
                $this->response->setStatus(404);
193
194
                return 'No such action';
195
        }
196
197
        // stop here, if inactive Fedora objects are not allowed to be disseminated
198
199
        // allow dissemination if a request parameter 'deliverInactive' has the secret
200
        // TYPOScript configuration value 'deliverInactiveSecretKey'
201
202
        $restrictToActiveDocuments = true;
203
        $deliverInactiveSecretKey = $this->settings['deliverInactiveSecretKey'];
204
205
        if ($deliverInactiveSecretKey == $piVars['deliverInactive']) {
206
            $restrictToActiveDocuments = false;
207
        }
208
209
        if (true === $isRepositoryObject) {
210
            if (true === $restrictToActiveDocuments) {
211
                // if restriction applies, check object state before dissemination
212
                $objectProfileURI = rtrim('http://' . $fedoraHost,
213
                        "/") . '/fedora/objects/' . $piVars['qid'] . '?format=XML';
214
                $objectProfileXML = file_get_contents($objectProfileURI);
215
                if (false !== $objectProfileXML) {
216
                    $objectProfileDOM = new \DOMDocument('1.0', 'UTF-8');
217
                    if (true === $objectProfileDOM->loadXML($objectProfileXML)) {
218
                        $objectState = $objectProfileDOM->getElementsByTagName('objState')[0];
219
                        if ('I' === $objectState->nodeValue) {
220
                            $this->response->setStatus(403);
221
                            return 'Forbidden';
222
                        }
223
                        if ('D' === $objectState->nodeValue) {
224
                            $this->response->setStatus(404);
225
                            return 'Not Found';
226
                        }
227
                    }
228
                } else {
229
                    $this->response->setStatus(500);
230
                    return 'Internal Server Error';
231
                }
232
            }
233
        }
234
235
        // get remote header and set it before passtrough
236
        $headers = get_headers($path);
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $path does not seem to be defined for all execution paths leading up to this point.
Loading history...
237
238
        if (false === $headers) {
239
            $this->response->setStatus(500);
240
            return 'Error while fetching headers';
241
        }
242
243
        $contentDispFlag = false;
244
        $contentTypeFlag = false;
245
246
        foreach ($headers as $value) {
247
248
            if (false !== stripos($value, 'Content-Disposition')) {
249
                header($value);
250
                $contentDispFlag = true;
251
                continue;
252
            }
253
254
            if (false !== stripos($value, 'Content-Type')) {
255
                header($value);
256
                $contentTypeFlag = true;
257
                continue;
258
            }
259
260
            if (false !== stripos($value, 'Content-Length')) {
261
                header($value);
262
                continue;
263
            }
264
        }
265
266
        if (!$contentDispFlag) {
267
            header('Content-Disposition: attachment');
268
        }
269
270
        if (!$contentTypeFlag) {
271
            header('Content-Type: ' . $contentType);
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $contentType does not seem to be defined for all execution paths leading up to this point.
Loading history...
272
        }
273
274
        if ($stream = fopen($path, 'r')) {
275
276
            // close active session if any
277
            session_write_close();
278
279
            // stop output buffering
280
            ob_end_clean();
281
282
            fpassthru($stream);
283
284
            fclose($stream);
285
286
            // Hard exit PHP script to avoid sending TYPO3 framework HTTP artifacts
287
            exit;
0 ignored issues
show
Using exit here is not recommended.

In general, usage of exit should be done with care and only when running in a scripting context like a CLI script.

Loading history...
288
289
        } else {
290
            $this->response->setStatus(500);
291
            return 'Error while opening stream';
292
        }
293
294
    }
295
296
    private static function sanitizeFilename($filename)
297
    {
298
        // remove anything which isn't a word, whitespace, number or any of the following caracters -_~,;[]().
299
        $filename = mb_ereg_replace("([^\w\s\d\-_~,;\[\]\(\).])", '', $filename);
300
        // turn diacritical characters to ASCII
301
        setlocale(LC_ALL, 'en_US.utf8');
302
        $filename = iconv('utf-8', 'us-ascii//TRANSLIT', trim($filename));
303
        // replace whitespaces with underscore
304
        $filename = preg_replace('/\s+/', '_', $filename);
305
306
        return $filename;
307
    }
308
309
    private function buildMetsXml($document)
310
    {
311
        $parserGenerator = new ParserGenerator();
312
        $parserGenerator->setXML($document->getXmlData());
313
314
        if (empty($document->getObjectIdentifier())) {
315
            $parserGenerator->setObjId($document->getUid());
0 ignored issues
show
The method setObjId() does not exist on EWW\Dpf\Services\ParserGenerator. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

315
            $parserGenerator->/** @scrutinizer ignore-call */ 
316
                              setObjId($document->getUid());

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
316
        } else {
317
            $parserGenerator->setObjId($document->getObjectIdentifier());
318
        }
319
320
        $document->setXmlData($parserGenerator->getXMLData());
321
322
        $XSLTransformator = new XSLTransformator();
323
        return $XSLTransformator->getTransformedOutputXML($document);
324
    }
325
326
    private function isForbidden($action)
327
    {
328
        $allowed =
329
            array_key_exists('allowedActions', $this->settings)
330
            && is_array($this->settings['allowedActions'])
331
            && in_array($action, $this->settings['allowedActions']);
332
        return !$allowed;
333
    }
334
}
335
336