Passed
Pull Request — master (#108)
by Ralf
05:26
created

GetFileController::isForbidden()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 7
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 5
nc 3
nop 1
dl 0
loc 7
rs 10
c 0
b 0
f 0
1
<?php
2
namespace EWW\Dpf\Controller;
3
4
/*
5
 * This file is part of the TYPO3 CMS project.
6
 *
7
 * It is free software; you can redistribute it and/or modify it under
8
 * the terms of the GNU General Public License, either version 2
9
 * of the License, or any later version.
10
 *
11
 * For the full copyright and license information, please read the
12
 * LICENSE.txt file that was distributed with this source code.
13
 *
14
 * The TYPO3 project - inspiring people to share!
15
 */
16
17
/**
18
 * API to return METS dissemination and Attachments from Fedora.
19
 * Also renders METS XML for preview. Structure of the URIs totally
20
 * depend on proper RealURL configuration.
21
 *
22
 * Example:
23
 *
24
 * 1. METS from Fedora
25
 *   http://localhost/api/qucosa:1234/mets/
26
 *
27
 *   This always returns METS which is supplemented with additional information.
28
 *   The embedded MODS record is not the original MODS as it is stored in the
29
 *   repository datastream.
30
 *
31
 * 2. Attachment from Fedora
32
 *   http://localhost/api/qucosa:1234/attachment/ATT-0/
33
 *
34
 * 3. METS from Kitodo.Publication (this extension)
35
 *   http://localhost/api/3/preview/
36
 *
37
 * 4. DataCite from Kitodo.Publication (this extension)
38
 *
39
 * @author    Alexander Bigga <[email protected]>
40
 * @author    Ralf Claussnitzer <[email protected]>
41
 * @author    Florian Rügamer <[email protected]>
42
 */
43
44
use TYPO3\CMS\Core\Utility\GeneralUtility;
0 ignored issues
show
Bug introduced by
The type TYPO3\CMS\Core\Utility\GeneralUtility was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
45
46
/**
47
 * GetFileController
48
 */
49
class GetFileController extends \EWW\Dpf\Controller\AbstractController
50
{
51
52
    /**
53
     * documentRepository
54
     *
55
     * @var \EWW\Dpf\Domain\Repository\DocumentRepository
56
     * @inject
57
     */
58
    protected $documentRepository;
59
60
    /**
61
     * clientConfigurationManager
62
     *
63
     * @var \EWW\Dpf\Configuration\ClientConfigurationManager
64
     * @inject
65
     */
66
    protected $clientConfigurationManager;
67
68
    public function attachmentAction()
69
    {
70
71
        $piVars = GeneralUtility::_GP('tx_dpf'); // get GET params from powermail
72
73
        $fedoraHost = $this->clientConfigurationManager->getFedoraHost();
74
75
        if ($this->isForbidden($piVars['action'])) {
76
            $this->response->setStatus(403);
77
            return 'Forbidden';
78
        }
79
80
        switch ($piVars['action']) {
81
            case 'mets':
82
                $path = rtrim('http://' . $fedoraHost,"/").'/fedora/objects/'.$piVars['qid'].'/methods/qucosa:SDef/getMETSDissemination?supplement=yes';
83
                break;
84
85
            case 'preview':
86
87
                $document = $this->documentRepository->findByUid($piVars['qid']);
88
89
                if ($document) {
90
91
                    $metsXml = $this->buildMetsXml($document);
92
                    $this->response->setHeader('Content-Type', 'text/xml; charset=UTF-8');
93
                    return $metsXml;
94
95
                } else {
96
97
                    $this->response->setStatus(404);
98
                    return 'No such document';
99
100
                }
101
102
            case 'attachment':
103
104
                $qid = $piVars['qid'];
105
106
                $attachment = $piVars['attachment'];
107
108
                if (is_numeric($piVars['qid'])) {
109
110
                    // qid is local uid
111
                    $document = $this->documentRepository->findByUid($piVars['qid']);
112
113
                    $files = $document->getCurrentFileData();
114
115
                    foreach ($files['download'] as $id => $file) {
116
117
                        if ($file['id'] == $attachment) {
118
119
                            $path = $file['path'];
120
121
                            $contentType = $file['type'];
122
123
                            break;
124
125
                        }
126
                    }
127
128
                } else {
129
130
                    $path = rtrim('http://' . $fedoraHost, "/") . '/fedora/objects/' . $qid . '/datastreams/' . $attachment . '/content';
131
132
                }
133
134
                if (empty($path)) {
135
                    $this->response->setStatus(404);
136
                    return 'No file found';
137
                }
138
139
                break;
140
141
            case 'dataCite':
142
143
                $qid = $piVars['qid'];
144
                $source = explode(':', $qid);
145
                if($source[0] == 'qucosa') {
146
147
                    $path = rtrim('http://' . $fedoraHost,"/").'/fedora/objects/'.$piVars['qid'].'/methods/qucosa:SDef/getMETSDissemination?supplement=yes';
148
                    $metsXml = str_replace('&', '&amp;', file_get_contents($path));
149
                    $dataCiteXml = \EWW\Dpf\Helper\DataCiteXml::convertFromMetsXml($metsXml);
150
151
                } elseif($document = $this->documentRepository->findByUid($piVars['qid'])) {
152
153
                    $metsXml = str_replace('&', '&amp;', $this->buildMetsXml($document));
154
                    $dataCiteXml = \EWW\Dpf\Helper\DataCiteXml::convertFromMetsXml($metsXml);
155
156
                } else {
157
158
                    $this->response->setStatus(404);
159
                    return 'No such document';
160
161
                }
162
                $dom = new \DOMDocument('1.0', 'UTF-8');
163
                $dom->loadXML($dataCiteXml);
164
                $title = $dom->getElementsByTagName('title')[0];
165
166
                $this->response->setHeader('Content-Disposition', 'attachment; filename="' . self::sanitizeFilename($title->nodeValue) . '.DataCite.xml"');
167
                $this->response->setHeader('Content-Type', 'text/xml; charset=UTF-8');
168
                return $dataCiteXml;
169
170
                break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
171
172
            case 'zip':
173
                // FIXME Service locations on Fedora host are hard coded
174
                $metsUrl = rtrim('http://' . $fedoraHost,"/") . '/mets?pid=' . $piVars['qid'];
175
                $path = rtrim('http://' . $fedoraHost,"/") . '/zip?metsurl=' . rawurlencode($metsUrl);
176
                break;
177
178
            default:
179
180
                $this->response->setStatus(404);
181
182
                return 'No such action';
183
        }
184
185
        // stop here, if inactive Fedora objects are not allowed to be disseminated
186
187
        $restrictToActiveDocuments = TRUE;
188
189
        if ('yes' == $piVars['deliverInactive']) {
190
            $restrictToActiveDocuments = FALSE;
191
        }
192
193
        if (TRUE === $restrictToActiveDocuments) {
194
            $objectProfileURI = rtrim('http://' . $fedoraHost,"/").'/fedora/objects/'.$piVars['qid'].'?format=XML';
195
            $objectProfileXML = file_get_contents($objectProfileURI);
196
            if (FALSE !== $objectProfileXML) {
197
                $objectProfileDOM = new \DOMDocument('1.0', 'UTF-8');
198
                if (TRUE === $objectProfileDOM->loadXML($objectProfileXML)) {
199
                    $objectState = $objectProfileDOM->getElementsByTagName('objState')[0];
200
                    if ('I' === $objectState->nodeValue) {
201
                        $this->response->setStatus(403);
202
                        return 'Forbidden';
203
                    }
204
                    if ('D' === $objectState->nodeValue) {
205
                        $this->response->setStatus(404);
206
                        return 'Not Found';
207
                    }
208
                }
209
            }
210
        }
211
212
        // get remote header and set it before passtrough
213
        $headers = get_headers($path);
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $path does not seem to be defined for all execution paths leading up to this point.
Loading history...
214
215
        if (FALSE === $headers) {
216
            $this->response->setStatus(500);
217
            return 'Error while fetching headers';
218
        }
219
220
        $contentDispFlag = false;
221
        $contentTypeFlag = false;
222
223
        foreach ($headers as $value) {
224
225
            if (FALSE !== stripos($value, 'Content-Disposition')) {
226
                header($value);
227
                $contentDispFlag = true;
228
                continue;
229
            }
230
231
            if (FALSE !== stripos($value, 'Content-Type')) {
232
                header($value);
233
                $contentTypeFlag = true;
234
                continue;
235
            }
236
237
            if (FALSE !== stripos($value, 'Content-Length')) {
238
                header($value);
239
                continue;
240
            }
241
        }
242
243
        if (!$contentDispFlag) {
244
            header('Content-Disposition: attachment');
245
        }
246
247
        if (!$contentTypeFlag) {
248
            header('Content-Type: ' . $contentType);
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $contentType does not seem to be defined for all execution paths leading up to this point.
Loading history...
249
        }
250
251
        if ($stream = fopen($path, 'r')) {
252
253
            // close active session if any
254
            session_write_close();
255
256
            // stop output buffering
257
            ob_end_clean();
258
259
            fpassthru($stream);
260
261
            fclose($stream);
262
263
            // Hard exit PHP script to avoid sending TYPO3 framework HTTP artifacts
264
            exit;
0 ignored issues
show
Best Practice introduced by
Using exit here is not recommended.

In general, usage of exit should be done with care and only when running in a scripting context like a CLI script.

Loading history...
265
266
        } else {
267
            $this->response->setStatus(500);
268
            return 'Error while opening stream';
269
        }
270
271
    }
272
273
    private static function sanitizeFilename($filename)
274
    {
275
        // remove anything which isn't a word, whitespace, number or any of the following caracters -_~,;[]().
276
        $filename = mb_ereg_replace("([^\w\s\d\-_~,;\[\]\(\).])", '', $filename);
277
        // turn diacritical characters to ASCII
278
        setlocale(LC_ALL, 'en_US.utf8');
279
        $filename = iconv('utf-8', 'us-ascii//TRANSLIT', trim($filename));
280
        // replace whitespaces with underscore
281
        $filename = preg_replace('/\s+/', '_', $filename);
282
283
        return $filename;
284
    }
285
286
    private function buildMetsXml($document)
287
    {
288
289
        $exporter = new \EWW\Dpf\Services\MetsExporter();
290
        $fileData = $document->getCurrentFileData();
291
        $exporter->setFileData($fileData);
292
        $exporter->setMods($document->getXmlData());
293
        $exporter->setSlubInfo($document->getSlubInfoData());
294
295
        if (empty($document->getObjectIdentifier())) {
296
297
            $exporter->setObjId($document->getUid());
298
299
        } else {
300
301
            $exporter->setObjId($document->getObjectIdentifier());
302
303
        }
304
305
        $exporter->buildMets();
306
        $metsXml = $exporter->getMetsData();
307
308
        return $metsXml;
309
    }
310
311
    private function isForbidden($action)
312
    {
313
        $allowed =
314
            array_key_exists('allowedActions', $this->settings)
315
            && is_array($this->settings['allowedActions'])
316
            && in_array($action, $this->settings['allowedActions']);
317
        return !$allowed;
318
    }
319
}
320
321