1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace ApacheSolrForTypo3\Tika\Service\Tika; |
6
|
|
|
|
7
|
|
|
/* |
8
|
|
|
* This file is part of the TYPO3 CMS project. |
9
|
|
|
* |
10
|
|
|
* It is free software; you can redistribute it and/or modify it under |
11
|
|
|
* the terms of the GNU General Public License, either version 2 |
12
|
|
|
* of the License, or any later version. |
13
|
|
|
* |
14
|
|
|
* For the full copyright and license information, please read the |
15
|
|
|
* LICENSE.txt file that was distributed with this source code. |
16
|
|
|
* |
17
|
|
|
* The TYPO3 project - inspiring people to share! |
18
|
|
|
*/ |
19
|
|
|
|
20
|
|
|
use ApacheSolrForTypo3\Tika\Process; |
21
|
|
|
use ApacheSolrForTypo3\Tika\Utility\FileUtility; |
22
|
|
|
use GuzzleHttp\Exception\BadResponseException; |
23
|
|
|
use Psr\Container\ContainerExceptionInterface; |
24
|
|
|
use Psr\Container\NotFoundExceptionInterface; |
25
|
|
|
use Psr\Http\Client\ClientExceptionInterface; |
26
|
|
|
use Psr\Http\Client\ClientInterface; |
27
|
|
|
use Psr\Http\Message\RequestInterface; |
28
|
|
|
use Psr\Http\Message\UriInterface; |
29
|
|
|
use Psr\Log\LogLevel; |
30
|
|
|
use Throwable; |
31
|
|
|
use TYPO3\CMS\Core\Http\RequestFactory; |
32
|
|
|
use TYPO3\CMS\Core\Http\Stream; |
33
|
|
|
use TYPO3\CMS\Core\Http\Uri; |
34
|
|
|
use TYPO3\CMS\Core\Registry; |
35
|
|
|
use TYPO3\CMS\Core\Resource\FileInterface; |
36
|
|
|
use TYPO3\CMS\Core\Utility\CommandUtility; |
37
|
|
|
use TYPO3\CMS\Core\Utility\GeneralUtility; |
38
|
|
|
|
39
|
|
|
use function str_starts_with; |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* A Tika service implementation using the tika-server.jar |
43
|
|
|
* |
44
|
|
|
* @copyright (c) 2015 Ingo Renner <[email protected]> |
45
|
|
|
*/ |
46
|
|
|
class ServerService extends AbstractService |
47
|
|
|
{ |
48
|
|
|
/** |
49
|
|
|
* @var ClientInterface |
50
|
|
|
*/ |
51
|
|
|
protected ClientInterface $psr7Client; |
52
|
|
|
|
53
|
|
|
/** |
54
|
|
|
* List of valid status codes |
55
|
|
|
* |
56
|
|
|
* @var int[] |
57
|
|
|
*/ |
58
|
|
|
protected array $validStatusCodes = [200, 202, 204]; |
59
|
|
|
|
60
|
|
|
/** |
61
|
|
|
* Tika server URL |
62
|
|
|
* |
63
|
|
|
* @var Uri |
64
|
|
|
*/ |
65
|
|
|
protected Uri $tikaUri; |
66
|
|
|
|
67
|
|
|
/** |
68
|
|
|
* @var array |
69
|
|
|
*/ |
70
|
|
|
protected static array $supportedMimeTypes = []; |
71
|
|
|
|
72
|
|
|
/** |
73
|
|
|
* Service initialization |
74
|
|
|
* |
75
|
|
|
* @throws ContainerExceptionInterface |
76
|
|
|
* @throws NotFoundExceptionInterface |
77
|
|
|
* |
78
|
|
|
* @noinspection PhpUnused |
79
|
|
|
*/ |
80
|
45 |
|
protected function initializeService(): void |
81
|
|
|
{ |
82
|
45 |
|
$this->psr7Client = GeneralUtility::getContainer()->get(ClientInterface::class); |
83
|
|
|
|
84
|
|
|
// Fallback default configuration is with http protocol |
85
|
45 |
|
$this->tikaUri = new Uri('http://' . $this->configuration['tikaServerHost']); |
86
|
|
|
|
87
|
|
|
// Overwrite configuration of tikaServerScheme is configured |
88
|
45 |
|
if (!empty($this->configuration['tikaServerScheme'])) { |
89
|
45 |
|
$this->tikaUri = $this->tikaUri->withScheme($this->configuration['tikaServerScheme']); |
90
|
|
|
} |
91
|
|
|
|
92
|
|
|
// Only append tikaServerPort if configured |
93
|
45 |
|
if (!empty($this->configuration['tikaServerPort'])) { |
94
|
45 |
|
$this->tikaUri = $this->tikaUri->withPort((int)$this->configuration['tikaServerPort']); |
95
|
|
|
} |
96
|
|
|
} |
97
|
|
|
|
98
|
|
|
/** |
99
|
|
|
* Initializes a Tika server process. |
100
|
|
|
* |
101
|
|
|
* @param string $arguments |
102
|
|
|
* @return Process |
103
|
|
|
*/ |
104
|
5 |
|
protected function getProcess(string $arguments = ''): Process |
105
|
|
|
{ |
106
|
5 |
|
$arguments = trim($this->getAdditionalCommandOptions() . ' ' . $arguments); |
107
|
|
|
|
108
|
5 |
|
return GeneralUtility::makeInstance(Process::class, CommandUtility::getCommand('java'), $arguments); |
109
|
|
|
} |
110
|
|
|
|
111
|
|
|
/** |
112
|
|
|
* Creates the command to start the Tika server. |
113
|
|
|
* |
114
|
|
|
* @return string |
115
|
|
|
*/ |
116
|
4 |
|
protected function getStartCommand(): string |
117
|
|
|
{ |
118
|
4 |
|
$tikaJar = FileUtility::getAbsoluteFilePath($this->configuration['tikaServerPath']); |
119
|
4 |
|
$command = '-jar ' . escapeshellarg($tikaJar); |
120
|
4 |
|
$command .= ' -p ' . escapeshellarg($this->configuration['tikaServerPort']); |
121
|
|
|
|
122
|
4 |
|
return escapeshellcmd($command); |
123
|
|
|
} |
124
|
|
|
|
125
|
|
|
/** |
126
|
|
|
* Starts the Tika server |
127
|
|
|
*/ |
128
|
1 |
|
public function startServer(): void |
129
|
|
|
{ |
130
|
1 |
|
$process = $this->getProcess($this->getStartCommand()); |
131
|
1 |
|
$process->start(); |
132
|
1 |
|
$pid = $process->getPid(); |
133
|
|
|
|
134
|
|
|
/* @var Registry $registry */ |
135
|
1 |
|
$registry = GeneralUtility::makeInstance(Registry::class); |
136
|
1 |
|
$registry->set('tx_tika', 'server.pid', $pid); |
137
|
|
|
} |
138
|
|
|
|
139
|
|
|
/** |
140
|
|
|
* Stops the Tika server |
141
|
|
|
*/ |
142
|
1 |
|
public function stopServer(): void |
143
|
|
|
{ |
144
|
1 |
|
$pid = $this->getServerPid(); |
145
|
1 |
|
if ($pid === null) { |
146
|
|
|
return; |
147
|
|
|
} |
148
|
|
|
|
149
|
1 |
|
$process = $this->getProcess(); |
150
|
1 |
|
$process->setPid($pid); |
151
|
1 |
|
$process->stop(); |
152
|
|
|
|
153
|
|
|
// unset pid in registry |
154
|
|
|
/* @var Registry $registry */ |
155
|
1 |
|
$registry = GeneralUtility::makeInstance(Registry::class); |
156
|
1 |
|
$registry->remove('tx_tika', 'server.pid'); |
157
|
|
|
} |
158
|
|
|
|
159
|
|
|
/** |
160
|
|
|
* Gets the Tika server pid. |
161
|
|
|
* |
162
|
|
|
* Tries to retrieve the pid from the TYPO3 registry first, then using ps. |
163
|
|
|
* |
164
|
|
|
* @return int|null Null if the pid can't be found, otherwise the pid |
165
|
|
|
*/ |
166
|
6 |
|
public function getServerPid(): ?int |
167
|
|
|
{ |
168
|
|
|
/* @var Registry $registry */ |
169
|
6 |
|
$registry = GeneralUtility::makeInstance(Registry::class); |
170
|
6 |
|
$pid = $registry->get('tx_tika', 'server.pid'); |
171
|
|
|
|
172
|
6 |
|
if (empty($pid)) { |
173
|
3 |
|
$process = $this->getProcess($this->getStartCommand()); |
174
|
3 |
|
$pid = $process->findPid(); |
175
|
|
|
} |
176
|
|
|
|
177
|
6 |
|
if (empty($pid)) { |
178
|
1 |
|
return null; |
179
|
|
|
} |
180
|
|
|
|
181
|
5 |
|
return (int)$pid; |
182
|
|
|
} |
183
|
|
|
|
184
|
|
|
/** |
185
|
|
|
* Check if the Tika server is running |
186
|
|
|
* |
187
|
|
|
* @return bool |
188
|
|
|
*/ |
189
|
3 |
|
public function isServerRunning(): bool |
190
|
|
|
{ |
191
|
3 |
|
$pid = $this->getServerPid(); |
192
|
|
|
|
193
|
3 |
|
return !empty($pid); |
194
|
|
|
} |
195
|
|
|
|
196
|
|
|
/** |
197
|
|
|
* Ping the Tika server |
198
|
|
|
* |
199
|
|
|
* @return bool true if the Tika server can be reached, false if not |
200
|
|
|
*/ |
201
|
1 |
|
public function ping(): bool |
202
|
|
|
{ |
203
|
|
|
try { |
204
|
1 |
|
$tikaPing = $this->queryTika($this->createRequestForEndpoint('/tika')); |
205
|
1 |
|
return str_starts_with($tikaPing, 'This is Tika Server'); |
206
|
|
|
} catch (Throwable $exception) { |
207
|
|
|
return false; |
208
|
|
|
} |
209
|
|
|
} |
210
|
|
|
|
211
|
|
|
/** |
212
|
|
|
* The tika server is available when the server is pingable. |
213
|
|
|
* |
214
|
|
|
* @return bool |
215
|
|
|
*/ |
216
|
|
|
public function isAvailable(): bool |
217
|
|
|
{ |
218
|
|
|
return $this->ping(); |
219
|
|
|
} |
220
|
|
|
|
221
|
|
|
/** |
222
|
|
|
* Constructs the Tika server URL. |
223
|
|
|
* |
224
|
|
|
* @return string Tika server URL |
225
|
|
|
*/ |
226
|
18 |
|
public function getTikaServerUrl(): string |
227
|
|
|
{ |
228
|
18 |
|
return (string)$this->tikaUri; |
229
|
|
|
} |
230
|
|
|
|
231
|
|
|
/** |
232
|
|
|
* Constructs the Tika server Uri. |
233
|
|
|
* |
234
|
|
|
* @return Uri Tika server Uri |
235
|
|
|
*/ |
236
|
36 |
|
public function getTikaServerUri(): Uri |
237
|
|
|
{ |
238
|
36 |
|
return $this->tikaUri; |
239
|
|
|
} |
240
|
|
|
|
241
|
|
|
/** |
242
|
|
|
* Gets the Tika server version |
243
|
|
|
* |
244
|
|
|
* @return string Tika server version string |
245
|
|
|
* @throws ClientExceptionInterface |
246
|
|
|
* @throws Throwable |
247
|
|
|
*/ |
248
|
|
|
public function getTikaVersion(): string |
249
|
|
|
{ |
250
|
|
|
$version = 'unknown'; |
251
|
|
|
|
252
|
|
|
if ($this->isServerRunning()) { |
253
|
|
|
$version = $this->queryTika($this->createRequestForEndpoint('/version')); |
254
|
|
|
} |
255
|
|
|
|
256
|
|
|
return $version; |
257
|
|
|
} |
258
|
|
|
|
259
|
|
|
/** |
260
|
|
|
* Query a Tika server endpoint |
261
|
|
|
* |
262
|
|
|
* @param RequestInterface $request |
263
|
|
|
* @return string Tika output |
264
|
|
|
* @throws ClientExceptionInterface |
265
|
|
|
* @throws Throwable |
266
|
|
|
*/ |
267
|
36 |
|
protected function queryTika(RequestInterface $request): string |
268
|
|
|
{ |
269
|
36 |
|
$tikaOutput = ''; |
270
|
|
|
try { |
271
|
36 |
|
$response = $this->psr7Client->sendRequest($request); |
272
|
36 |
|
if (!in_array($response->getStatusCode(), $this->validStatusCodes)) { |
273
|
|
|
throw new BadResponseException( |
274
|
|
|
'Invalid status code ' . $response->getStatusCode(), |
275
|
|
|
$request, |
276
|
|
|
$response |
277
|
|
|
); |
278
|
|
|
} |
279
|
|
|
|
280
|
36 |
|
$tikaOutput = $response->getBody()->getContents(); |
281
|
|
|
} catch (Throwable $exception) { |
282
|
|
|
$message = $exception->getMessage(); |
283
|
|
|
if ( |
284
|
|
|
strpos($message, 'Connection refused') === false && |
285
|
|
|
strpos($message, 'HTTP request failed') === false |
286
|
|
|
) { |
287
|
|
|
// If the server is simply not available it would say Connection refused |
288
|
|
|
// since that is not the case something else went wrong |
289
|
|
|
throw $exception; |
290
|
|
|
} |
291
|
|
|
|
292
|
|
|
$this->log($exception->getMessage(), [], LogLevel::ERROR); |
293
|
|
|
} |
294
|
|
|
|
295
|
36 |
|
return $tikaOutput; |
296
|
|
|
} |
297
|
|
|
|
298
|
|
|
/** |
299
|
|
|
* Takes a file reference and extracts the text from it. |
300
|
|
|
* |
301
|
|
|
* @param FileInterface $file |
302
|
|
|
* @return string |
303
|
|
|
* @throws ClientExceptionInterface |
304
|
|
|
* @throws Throwable |
305
|
|
|
*/ |
306
|
3 |
|
public function extractText(FileInterface $file): string |
307
|
|
|
{ |
308
|
3 |
|
$request = $this->createRequestForEndpoint('/tika', 'PUT') |
309
|
3 |
|
->withAddedHeader('Content-Type', 'application/octet-stream') |
310
|
3 |
|
->withAddedHeader('Accept', 'text/plain') |
311
|
3 |
|
->withAddedHeader('Connection', 'close') |
312
|
3 |
|
->withProtocolVersion('1.1') |
313
|
3 |
|
->withBody($this->convertFileIntoStream($file)); |
314
|
|
|
|
315
|
3 |
|
$response = $this->queryTika($request); |
316
|
|
|
|
317
|
3 |
|
if (empty($response)) { |
318
|
|
|
$this->log( |
319
|
|
|
'Text Extraction using Tika Server failed', |
320
|
|
|
$this->getLogData($file, $response), |
321
|
|
|
LogLevel::ERROR |
322
|
|
|
); |
323
|
|
|
} else { |
324
|
3 |
|
$this->log( |
325
|
3 |
|
'Text Extraction using Tika Server', |
326
|
3 |
|
$this->getLogData($file, $response) |
327
|
3 |
|
); |
328
|
|
|
} |
329
|
|
|
|
330
|
3 |
|
return $response; |
331
|
|
|
} |
332
|
|
|
|
333
|
|
|
/** |
334
|
|
|
* Takes a file reference and extracts its meta-data. |
335
|
|
|
* |
336
|
|
|
* @param FileInterface $file |
337
|
|
|
* @return array |
338
|
|
|
* @throws ClientExceptionInterface |
339
|
|
|
* @throws Throwable |
340
|
|
|
*/ |
341
|
3 |
|
public function extractMetaData(FileInterface $file): array |
342
|
|
|
{ |
343
|
3 |
|
$request = $this->createRequestForEndpoint('/meta', 'PUT') |
344
|
3 |
|
->withAddedHeader('Content-Type', 'application/octet-stream') |
345
|
3 |
|
->withAddedHeader('Accept', 'application/json') |
346
|
3 |
|
->withAddedHeader('Connection', 'close') |
347
|
3 |
|
->withProtocolVersion('1.1') |
348
|
3 |
|
->withBody($this->convertFileIntoStream($file)); |
349
|
|
|
|
350
|
3 |
|
$rawResponse = $this->queryTika($request); |
351
|
3 |
|
$response = json_decode($rawResponse, true); |
352
|
|
|
|
353
|
3 |
|
if (!is_array($response)) { |
354
|
|
|
$this->log( |
355
|
|
|
'Meta Data Extraction using Tika Server failed', |
356
|
|
|
$this->getLogData($file, $rawResponse), |
357
|
|
|
LogLevel::ERROR |
358
|
|
|
); |
359
|
|
|
return []; |
360
|
|
|
} |
361
|
|
|
|
362
|
3 |
|
$this->log( |
363
|
3 |
|
'Meta Data Extraction using Tika Server', |
364
|
3 |
|
$this->getLogData($file, $rawResponse) |
365
|
3 |
|
); |
366
|
3 |
|
return $response; |
367
|
|
|
} |
368
|
|
|
|
369
|
|
|
/** |
370
|
|
|
* Takes a file reference and detects its content's language. |
371
|
|
|
* |
372
|
|
|
* @param FileInterface $file |
373
|
|
|
* @return string |
374
|
|
|
* @throws ClientExceptionInterface |
375
|
|
|
* @throws Throwable |
376
|
|
|
*/ |
377
|
14 |
|
public function detectLanguageFromFile(FileInterface $file): string |
378
|
|
|
{ |
379
|
14 |
|
$request = $this->createRequestForEndpoint('/language/stream', 'PUT') |
380
|
14 |
|
->withAddedHeader('Content-Type', 'application/octet-stream') |
381
|
14 |
|
->withAddedHeader('Connection', 'close') |
382
|
14 |
|
->withProtocolVersion('1.1') |
383
|
14 |
|
->withBody($this->convertFileIntoStream($file)); |
384
|
|
|
|
385
|
14 |
|
$response = $this->queryTika($request); |
386
|
|
|
|
387
|
14 |
|
if (empty($response)) { |
388
|
|
|
$this->log( |
389
|
|
|
'Language Detection using Tika Server failed', |
390
|
|
|
$this->getLogData($file, $response), |
391
|
|
|
LogLevel::ERROR |
392
|
|
|
); |
393
|
|
|
} else { |
394
|
14 |
|
$this->log( |
395
|
14 |
|
'Language Detection using Tika Server', |
396
|
14 |
|
$this->getLogData($file, $response) |
397
|
14 |
|
); |
398
|
|
|
} |
399
|
|
|
|
400
|
14 |
|
return $response; |
401
|
|
|
} |
402
|
|
|
|
403
|
|
|
/** |
404
|
|
|
* Takes a string as input and detects its language. |
405
|
|
|
* |
406
|
|
|
* @param string $input |
407
|
|
|
* @return string |
408
|
|
|
* @throws ClientExceptionInterface |
409
|
|
|
* @throws Throwable |
410
|
|
|
*/ |
411
|
14 |
|
public function detectLanguageFromString(string $input): string |
412
|
|
|
{ |
413
|
14 |
|
$stream = new Stream('php://temp', 'rw'); |
414
|
14 |
|
$stream->write($input); |
415
|
14 |
|
$request = $this->createRequestForEndpoint('/language/string', 'PUT') |
416
|
14 |
|
->withAddedHeader('Content-Type', 'application/octet-stream') |
417
|
14 |
|
->withAddedHeader('Connection', 'close') |
418
|
14 |
|
->withProtocolVersion('1.1') |
419
|
14 |
|
->withBody($stream); |
420
|
|
|
|
421
|
14 |
|
return $this->queryTika($request); |
422
|
|
|
} |
423
|
|
|
|
424
|
|
|
/** |
425
|
|
|
* List of supported mime types |
426
|
|
|
* |
427
|
|
|
* @return array |
428
|
|
|
* @throws ClientExceptionInterface |
429
|
|
|
* @throws Throwable |
430
|
|
|
*/ |
431
|
1 |
|
public function getSupportedMimeTypes(): array |
432
|
|
|
{ |
433
|
1 |
|
if (is_array(self::$supportedMimeTypes) && count(self::$supportedMimeTypes) > 0) { |
434
|
|
|
return self::$supportedMimeTypes; |
435
|
|
|
} |
436
|
|
|
|
437
|
1 |
|
self::$supportedMimeTypes = $this->buildSupportedMimeTypes(); |
438
|
|
|
|
439
|
1 |
|
return self::$supportedMimeTypes; |
440
|
|
|
} |
441
|
|
|
|
442
|
|
|
/** |
443
|
|
|
* Returns the mime type from the tika server |
444
|
|
|
* |
445
|
|
|
* @return string |
446
|
|
|
* @throws ClientExceptionInterface |
447
|
|
|
* @throws Throwable |
448
|
|
|
*/ |
449
|
1 |
|
protected function getMimeTypeJsonFromTikaServer(): string |
450
|
|
|
{ |
451
|
1 |
|
$request = $this->createRequestForEndpoint('/mime-types') |
452
|
1 |
|
->withAddedHeader('Content-Type', 'application/octet-stream') |
453
|
1 |
|
->withAddedHeader('Accept', 'application/json') |
454
|
1 |
|
->withAddedHeader('Connection', 'close') |
455
|
1 |
|
->withProtocolVersion('1.1'); |
456
|
|
|
|
457
|
1 |
|
return $this->queryTika($request); |
458
|
|
|
} |
459
|
|
|
|
460
|
|
|
/** |
461
|
|
|
* Build the list of supported mime types |
462
|
|
|
* |
463
|
|
|
* @return array |
464
|
|
|
* @throws ClientExceptionInterface |
465
|
|
|
* @throws Throwable |
466
|
|
|
*/ |
467
|
1 |
|
protected function buildSupportedMimeTypes(): array |
468
|
|
|
{ |
469
|
1 |
|
$response = $this->getMimeTypeJsonFromTikaServer(); |
470
|
|
|
// In case the response is empty, there need no more further processing |
471
|
1 |
|
if (empty($response)) { |
472
|
|
|
return []; |
473
|
|
|
} |
474
|
|
|
|
475
|
1 |
|
$result = (json_decode($response)); |
476
|
|
|
|
477
|
|
|
// In case the result is not an object, there need no more further processing |
478
|
1 |
|
if (!(is_object($result))) { |
479
|
|
|
return []; |
480
|
|
|
} |
481
|
|
|
|
482
|
1 |
|
$definitions = get_object_vars($result); |
483
|
1 |
|
$coreTypes = []; |
484
|
1 |
|
$aliasTypes = []; |
485
|
1 |
|
foreach ($definitions as $coreMimeType => $configuration) { |
486
|
1 |
|
if (isset($configuration->alias) && is_array($configuration->alias)) { |
487
|
1 |
|
$aliasTypes += $configuration->alias; |
488
|
|
|
} |
489
|
1 |
|
$coreTypes[] = $coreMimeType; |
490
|
|
|
} |
491
|
|
|
|
492
|
1 |
|
$supportedTypes = $coreTypes + $aliasTypes; |
493
|
1 |
|
$supportedTypes = array_filter($supportedTypes); |
494
|
1 |
|
asort($supportedTypes); |
495
|
1 |
|
return $supportedTypes; |
496
|
|
|
} |
497
|
|
|
|
498
|
|
|
/** |
499
|
|
|
* Creates a new request with given method and given endpoint |
500
|
|
|
* This method is a wrapper for createRequest() |
501
|
|
|
* |
502
|
|
|
* @param string $endpoint |
503
|
|
|
* @param string $method |
504
|
|
|
* @return RequestInterface |
505
|
|
|
*/ |
506
|
36 |
|
protected function createRequestForEndpoint(string $endpoint, string $method = 'GET'): RequestInterface |
507
|
|
|
{ |
508
|
36 |
|
return $this->createRequest($this->createEndpoint($endpoint), $method); |
509
|
|
|
} |
510
|
|
|
|
511
|
|
|
/** |
512
|
|
|
* Creates a new request with given method and uri |
513
|
|
|
* |
514
|
|
|
* @param UriInterface $uri |
515
|
|
|
* @param string $method |
516
|
|
|
* @return RequestInterface |
517
|
|
|
*/ |
518
|
36 |
|
protected function createRequest(UriInterface $uri, string $method = 'GET'): RequestInterface |
519
|
|
|
{ |
520
|
|
|
/* @var RequestFactory $requestFactory */ |
521
|
36 |
|
$requestFactory = GeneralUtility::makeInstance(RequestFactory::class); |
522
|
36 |
|
$request = $requestFactory->createRequest( |
523
|
36 |
|
$method, |
524
|
36 |
|
$uri |
525
|
36 |
|
); |
526
|
36 |
|
return $request->withAddedHeader('User-Agent', $this->getUserAgent()); |
527
|
|
|
} |
528
|
|
|
|
529
|
|
|
/** |
530
|
|
|
* Creates a new URI with given endpoint |
531
|
|
|
* |
532
|
|
|
* @param string $endpoint |
533
|
|
|
* @return Uri |
534
|
|
|
*/ |
535
|
36 |
|
protected function createEndpoint(string $endpoint): Uri |
536
|
|
|
{ |
537
|
36 |
|
return $this->getTikaServerUri() |
538
|
36 |
|
->withPath($endpoint); |
539
|
|
|
} |
540
|
|
|
|
541
|
|
|
/** |
542
|
|
|
* Convert a file into a stream |
543
|
|
|
* |
544
|
|
|
* @param FileInterface $file |
545
|
|
|
* @return Stream |
546
|
|
|
*/ |
547
|
20 |
|
protected function convertFileIntoStream(FileInterface $file): Stream |
548
|
|
|
{ |
549
|
20 |
|
$stream = new Stream('php://temp', 'rw'); |
550
|
20 |
|
$stream->write($file->getContents()); |
551
|
20 |
|
return $stream; |
552
|
|
|
} |
553
|
|
|
|
554
|
|
|
/** |
555
|
|
|
* Returns the user agent that should be used for the requests |
556
|
|
|
* |
557
|
|
|
* @return string |
558
|
|
|
*/ |
559
|
36 |
|
protected function getUserAgent(): string |
560
|
|
|
{ |
561
|
36 |
|
return $GLOBALS['TYPO3_CONF_VARS']['HTTP']['headers']['User-Agent'] ?? 'TYPO3'; |
562
|
|
|
} |
563
|
|
|
|
564
|
|
|
/** |
565
|
|
|
* Build the log information |
566
|
|
|
* |
567
|
|
|
* @param FileInterface $file |
568
|
|
|
* @param string $response |
569
|
|
|
* @return array |
570
|
|
|
*/ |
571
|
17 |
|
protected function getLogData(FileInterface $file, string $response): array |
572
|
|
|
{ |
573
|
17 |
|
return [ |
574
|
17 |
|
'file' => $file->getName(), |
575
|
17 |
|
'file_path' => $file->getPublicUrl(), |
576
|
17 |
|
'tika_url' => $this->getTikaServerUrl(), |
577
|
17 |
|
'response' => $response, |
578
|
17 |
|
]; |
579
|
|
|
} |
580
|
|
|
} |
581
|
|
|
|