Completed
Push — master ( 8f3881...b07378 )
by Timo
16s
created

ServerService::getLogData()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 2
nc 1
nop 2
dl 0
loc 4
ccs 3
cts 3
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
namespace ApacheSolrForTypo3\Tika\Service\Tika;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2015 Ingo Renner <[email protected]>
8
 *  All rights reserved
9
 *
10
 *  This script is part of the TYPO3 project. The TYPO3 project is
11
 *  free software; you can redistribute it and/or modify
12
 *  it under the terms of the GNU General Public License as published by
13
 *  the Free Software Foundation; either version 2 of the License, or
14
 *  (at your option) any later version.
15
 *
16
 *  The GNU General Public License can be found at
17
 *  http://www.gnu.org/copyleft/gpl.html.
18
 *
19
 *  This script is distributed in the hope that it will be useful,
20
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
 *  GNU General Public License for more details.
23
 *
24
 *  This copyright notice MUST APPEAR in all copies of the script!
25
 ***************************************************************/
26
27
use ApacheSolrForTypo3\Tika\Process;
28
use ApacheSolrForTypo3\Tika\Utility\FileUtility;
29
use TYPO3\CMS\Core\Registry;
30
use TYPO3\CMS\Core\Resource\FileInterface;
31
use TYPO3\CMS\Core\Utility\CommandUtility;
32
use TYPO3\CMS\Core\Utility\GeneralUtility;
33
34
/**
35
 * A Tika service implementation using the tika-server.jar
36
 *
37
 */
38
class ServerService extends AbstractService
39
{
40
41
    /**
42
     * Tika server URL
43
     *
44
     * @var string
45
     */
46
    protected $tikaUrl;
47
48
    /**
49
     * @var array
50
     */
51
    protected static $supportedMimeTypes = [];
52
53
    /**
54
     * Service initialization
55
     *
56
     * @return void
57
     */
58 44
    protected function initializeService()
59
    {
60
        // Fallback default configuration is with http protocol
61 44
        $this->tikaUrl = 'http://' . $this->configuration['tikaServerHost'];
62
63
        // Overwrite configuration of tikaServerScheme is configured
64 44
        if (!empty($this->configuration['tikaServerScheme'])) {
65 44
            $this->tikaUrl = $this->configuration['tikaServerScheme'] . '://' . $this->configuration['tikaServerHost'];
66
        }
67
68
        // Only append tikaServerPort if configured
69 44
        if (!empty($this->configuration['tikaServerPort'])) {
70 44
            $this->tikaUrl .= ':' . $this->configuration['tikaServerPort'];
71
        }
72 44
    }
73
74
    /**
75
     * Initializes a Tika server process.
76
     *
77
     * @param string $arguments
78
     * @return \ApacheSolrForTypo3\Tika\Process
79
     */
80 5
    protected function getProcess($arguments = '')
81
    {
82 5
        $process = GeneralUtility::makeInstance(Process::class, CommandUtility::getCommand('java'), $arguments);
83
84 5
        return $process;
85
    }
86
87
    /**
88
     * Creates the command to start the Tika server.
89
     *
90
     * @return string
91
     */
92 4
    protected function getStartCommand()
93
    {
94 4
        $tikaJar = FileUtility::getAbsoluteFilePath($this->configuration['tikaServerPath']);
95 4
        $command = '-jar ' . escapeshellarg($tikaJar);
96 4
        $command .= ' -p ' . escapeshellarg($this->configuration['tikaServerPort']);
97
98 4
        $command = escapeshellcmd($command);
99
100 4
        return $command;
101
    }
102
103
    /**
104
     * Starts the Tika server
105
     *
106
     * @return void
107
     */
108 1
    public function startServer()
109
    {
110 1
        $process = $this->getProcess($this->getStartCommand());
111 1
        $process->start();
112 1
        $pid = $process->getPid();
113
114 1
        $registry = GeneralUtility::makeInstance(Registry::class);
115 1
        $registry->set('tx_tika', 'server.pid', $pid);
116 1
    }
117
118
    /**
119
     * Stops the Tika server
120
     *
121
     * @return void
122
     */
123 1
    public function stopServer()
124
    {
125 1
        $pid = $this->getServerPid();
126
127 1
        $process = $this->getProcess();
128 1
        $process->setPid($pid);
129 1
        $process->stop();
130
131
        // unset pid in registry
132 1
        $registry = GeneralUtility::makeInstance(Registry::class);
133 1
        $registry->remove('tx_tika', 'server.pid');
134 1
    }
135
136
    /**
137
     * Gets the Tika server pid.
138
     *
139
     * Tries to retrieve the pid from the TYPO3 registry first, then using ps.
140
     *
141
     * @return int|null Null if the pid can't be found, otherwise the pid
142
     */
143 6
    public function getServerPid()
144
    {
145 6
        $registry = GeneralUtility::makeInstance(Registry::class);
146 6
        $pid = $registry->get('tx_tika', 'server.pid');
147
148 6
        if (empty($pid)) {
149 3
            $process = $this->getProcess($this->getStartCommand());
150 3
            $pid = $process->findPid();
151
        }
152
153 6
        return $pid;
154
    }
155
156
    /**
157
     * Check if the Tika server is running
158
     *
159
     * @return bool
160
     */
161 3
    public function isServerRunning()
162
    {
163 3
        $pid = $this->getServerPid();
164
165 3
        return !empty($pid);
166
    }
167
168
    /**
169
     * Ping the Tika server
170
     *
171
     * @return bool true if the Tika server can be reached, false if not
172
     * @throws \Exception
173
     */
174 1
    public function ping()
175
    {
176 1
        $tikaPing = $this->queryTika('/tika');
177 1
        $tikaReachable = GeneralUtility::isFirstPartOfStr($tikaPing, 'This is Tika Server');
178
179 1
        return $tikaReachable;
180
    }
181
182
    /**
183
     * The tika server is available when the server is pingable.
184
     *
185
     * @return bool
186
     */
187
    public function isAvailable()
188
    {
189
        return $this->ping();
190
    }
191
192
    /**
193
     * Constructs the Tika server URL.
194
     *
195
     * @return string Tika server URL
196
     */
197 32
    public function getTikaServerUrl()
198
    {
199 32
        return $this->tikaUrl;
200
    }
201
202
    /**
203
     * Gets the Tika server version
204
     *
205
     * @return string Tika server version string
206
     * @throws \Exception
207
     */
208
    public function getTikaVersion()
209
    {
210
        $version = 'unknown';
211
212
        if ($this->isServerRunning()) {
213
            $version = $this->queryTika('/version');
214
        }
215
216
        return $version;
217
    }
218
219
    /**
220
     * Query a Tika server endpoint
221
     *
222
     * @param string $endpoint
223
     * @param resource $context optional stream context
224
     * @return string Tika output
225
     * @throws \Exception
226
     */
227 31
    protected function queryTika($endpoint, $context = null)
228
    {
229 31
        $url = $this->getTikaServerUrl();
230 31
        $url .= $endpoint;
231
232 31
        $tikaOutput = '';
0 ignored issues
show
Unused Code introduced by
The assignment to $tikaOutput is dead and can be removed.
Loading history...
233
        try {
234 31
            $tikaOutput = file_get_contents($url, false, $context);
235
        } catch (\Exception $e) {
236
            $message = $e->getMessage();
237
            if (strpos($message, 'Connection refused') === false && strpos($message, 'HTTP request failed') === false) {
238
                // If the server is simply not available it would say Connection refused
239
                // since that is not the case something else went wrong
240
                throw $e;
241
            }
242
        }
243
244 31
        return $tikaOutput;
245
    }
246
247
    /**
248
     * Takes a file reference and extracts the text from it.
249
     *
250
     * @param \TYPO3\CMS\Core\Resource\FileInterface $file
251
     * @return string
252
     */
253 3
    public function extractText(FileInterface $file)
254
    {
255 3
        $headers = [$this->getUserAgent(), 'Accept: text/plain', 'Content-Type: application/octet-stream', 'Connection: close'];
256
257 3
        $context = stream_context_create(['http' => ['protocol_version' => 1.1, 'method' => 'PUT', 'header' => implode(CRLF, $headers), 'content' => $file->getContents()]]);
258
259 3
        $response = $this->queryTika('/tika', $context);
260
261 3
        if ($response === FALSE) {
262
            $this->log('Text Extraction using Tika Server failed', $this->getLogData($file, $response), 2);
263
        } else {
264 3
            $this->log('Text Extraction using Tika Server', $this->getLogData($file, $response));
265
        }
266
267 3
        return $response;
268
    }
269
270
    /**
271
     * Takes a file reference and extracts its meta data.
272
     *
273
     * @param \TYPO3\CMS\Core\Resource\FileInterface $file
274
     * @return array
275
     */
276 2
    public function extractMetaData(FileInterface $file)
277
    {
278 2
        $headers = [$this->getUserAgent(), 'Accept: application/json', 'Content-Type: application/octet-stream', 'Connection: close'];
279
280 2
        $context = stream_context_create(['http' => ['protocol_version' => 1.1, 'method' => 'PUT', 'header' => implode(CRLF, $headers), 'content' => $file->getContents()]]);
281
282 2
        $rawResponse = $this->queryTika('/meta', $context);
283 2
        $response = (array)json_decode($rawResponse);
284
285 2
        if ($response === FALSE) {
0 ignored issues
show
introduced by
The condition $response === FALSE is always false.
Loading history...
286
            $this->log('Meta Data Extraction using Tika Server failed', $this->getLogData($file, $response), 2);
287
        } else {
288 2
            $this->log('Meta Data Extraction using Tika Server', $this->getLogData($file, $response));
0 ignored issues
show
Bug introduced by
$response of type array is incompatible with the type string expected by parameter $response of ApacheSolrForTypo3\Tika\...erService::getLogData(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

288
            $this->log('Meta Data Extraction using Tika Server', $this->getLogData($file, /** @scrutinizer ignore-type */ $response));
Loading history...
289
        }
290
291
292 2
        return $response;
293
    }
294
295
    /**
296
     * Takes a file reference and detects its content's language.
297
     *
298
     * @param \TYPO3\CMS\Core\Resource\FileInterface $file
299
     * @return string Language ISO code
300
     */
301 14
    public function detectLanguageFromFile(FileInterface $file)
302
    {
303 14
        $headers = [$this->getUserAgent(), 'Content-Type: application/octet-stream', 'Connection: close'];
304
305 14
        $context = stream_context_create(['http' => ['protocol_version' => 1.1, 'method' => 'PUT', 'header' => implode(CRLF, $headers), 'content' => $file->getContents()]]);
306
307 14
        $response = $this->queryTika('/language/stream', $context);
308
309 14
        if ($response === FALSE) {
310
            $this->log('Language Detection using Tika Server failed', $this->getLogData($file, $response), 2);
311
        } else {
312 14
            $this->log('Language Detection using Tika Server', $this->getLogData($file, $response));
313
        }
314
315 14
        return $response;
316
    }
317
318
    /**
319
     * Takes a string as input and detects its language.
320
     *
321
     * @param string $input
322
     * @return string Language ISO code
323
     */
324 14
    public function detectLanguageFromString($input)
325
    {
326 14
        $headers = [$this->getUserAgent(), 'Content-Type: application/octet-stream', 'Connection: close'];
327
328 14
        $context = stream_context_create(['http' => ['protocol_version' => 1.1, 'method' => 'PUT', 'header' => implode(CRLF, $headers), 'content' => $input]]);
329
330 14
        $response = $this->queryTika('/language/string', $context);
331
332 14
        return $response;
333
    }
334
335
    /**
336
     * @return array
337
     */
338 1
    public function getSupportedMimeTypes()
339
    {
340 1
        if (is_array(self::$supportedMimeTypes) && count(self::$supportedMimeTypes) > 0) {
341
            return self::$supportedMimeTypes;
342
        }
343
344 1
        self::$supportedMimeTypes = $this->buildSupportedMimeTypes();
345
346 1
        return self::$supportedMimeTypes;
347
    }
348
349
    /**
350
     * @return string
351
     */
352 1
    protected function getMimeTypeJsonFromTikaServer()
353
    {
354 1
        $headers = [$this->getUserAgent(), 'Content-Type: application/octet-stream', 'Accept: application/json', 'Connection: close'];
355
356 1
        $context = stream_context_create(['http' => ['protocol_version' => 1.1, 'method' => 'GET', 'header' => implode(CRLF, $headers),]]);
357
358 1
        $response = $this->queryTika('/mime-types', $context);
359 1
        return $response;
360
    }
361
362
    /**
363
     * @return array
364
     */
365 1
    protected function buildSupportedMimeTypes()
366
    {
367 1
        $response = $this->getMimeTypeJsonFromTikaServer();
368
369 1
        $result = (json_decode($response));
370 1
        $definitions = get_object_vars($result);
371 1
        $coreTypes = [];
372 1
        $aliasTypes = [];
373 1
        foreach ($definitions as $coreMimeType => $configuration) {
374 1
            if (isset($configuration->alias) && is_array($configuration->alias)) {
375 1
                $aliasTypes += $configuration->alias;
376
            }
377 1
            $coreTypes[] = $coreMimeType;
378
        }
379
380 1
        $supportedTypes = $coreTypes + $aliasTypes;
381 1
        $supportedTypes = array_filter($supportedTypes);
382 1
        asort($supportedTypes);
383 1
        return $supportedTypes;
384
    }
385
386
    /**
387
     * @return string
388
     */
389 34
    protected function getUserAgent()
390
    {
391 34
        return 'User-Agent: ' . $GLOBALS['TYPO3_CONF_VARS']['HTTP']['headers']['User-Agent'] ?? 'TYPO3';
392
    }
393
394
    /**
395
     * @param \TYPO3\CMS\Core\Resource\FileInterface $file
396
     * @param string $response
397
     * @return array
398
     */
399 16
    protected function getLogData($file, $response)
400
    {
401 16
        $logData = ['file' => $file->getName(), 'file_path' => $file->getPublicUrl(), 'tika_url' => $this->getTikaServerUrl(), 'response' => $response];
402 16
        return $logData;
403
    }
404
}
405