1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace ApacheSolrForTypo3\Tika\Report; |
6
|
|
|
|
7
|
|
|
/* |
8
|
|
|
* This file is part of the TYPO3 CMS project. |
9
|
|
|
* |
10
|
|
|
* It is free software; you can redistribute it and/or modify it under |
11
|
|
|
* the terms of the GNU General Public License, either version 2 |
12
|
|
|
* of the License, or any later version. |
13
|
|
|
* |
14
|
|
|
* For the full copyright and license information, please read the |
15
|
|
|
* LICENSE.txt file that was distributed with this source code. |
16
|
|
|
* |
17
|
|
|
* The TYPO3 project - inspiring people to share! |
18
|
|
|
*/ |
19
|
|
|
|
20
|
|
|
use ApacheSolrForTypo3\Solr\ConnectionManager; |
21
|
|
|
use ApacheSolrForTypo3\Solr\System\Solr\SolrConnection; |
22
|
|
|
use ApacheSolrForTypo3\Tika\Service\Tika\ServerService; |
23
|
|
|
use ApacheSolrForTypo3\Tika\Util; |
24
|
|
|
use ApacheSolrForTypo3\Tika\Utility\FileUtility; |
25
|
|
|
use Exception; |
26
|
|
|
use Psr\Log\LoggerAwareInterface; |
27
|
|
|
use Psr\Log\LoggerAwareTrait; |
28
|
|
|
use Solarium\QueryType\Extract\Query; |
29
|
|
|
use TYPO3\CMS\Core\Configuration\Exception\ExtensionConfigurationExtensionNotConfiguredException; |
30
|
|
|
use TYPO3\CMS\Core\Configuration\Exception\ExtensionConfigurationPathDoesNotExistException; |
31
|
|
|
use TYPO3\CMS\Core\Utility\CommandUtility; |
32
|
|
|
use TYPO3\CMS\Core\Utility\ExtensionManagementUtility; |
33
|
|
|
use TYPO3\CMS\Core\Utility\GeneralUtility; |
34
|
|
|
use TYPO3\CMS\Reports\Status; |
35
|
|
|
use TYPO3\CMS\Reports\StatusProviderInterface; |
36
|
|
|
|
37
|
|
|
/** |
38
|
|
|
* Provides a status report about whether Tika is properly configured |
39
|
|
|
* |
40
|
|
|
* @author Ingo Renner <[email protected]> |
41
|
|
|
* @copyright (c) 2010-2015 Ingo Renner <[email protected]> |
42
|
|
|
*/ |
43
|
|
|
class TikaStatus implements StatusProviderInterface, LoggerAwareInterface |
44
|
|
|
{ |
45
|
|
|
use LoggerAwareTrait; |
46
|
|
|
|
47
|
|
|
/** |
48
|
|
|
* EXT:tika configuration. |
49
|
|
|
* |
50
|
|
|
* @var array |
51
|
|
|
*/ |
52
|
|
|
protected array $tikaConfiguration = []; |
53
|
|
|
|
54
|
|
|
/** |
55
|
|
|
* Constructor, reads the extension's configuration |
56
|
|
|
* @param array|null $extensionConfiguration |
57
|
|
|
* @throws ExtensionConfigurationExtensionNotConfiguredException |
58
|
|
|
* @throws ExtensionConfigurationPathDoesNotExistException |
59
|
|
|
*/ |
60
|
|
|
public function __construct(array $extensionConfiguration = null) |
61
|
|
|
{ |
62
|
|
|
$this->tikaConfiguration = $extensionConfiguration ?? Util::getTikaExtensionConfiguration(); |
63
|
|
|
} |
64
|
|
|
|
65
|
|
|
/** |
66
|
|
|
* Checks whether Tika is properly configured |
67
|
|
|
* @return array |
68
|
|
|
* |
69
|
|
|
* @throws Exception |
70
|
|
|
*/ |
71
|
|
|
public function getStatus() |
72
|
|
|
{ |
73
|
|
|
$checks = []; |
74
|
|
|
|
75
|
|
|
switch ($this->tikaConfiguration['extractor']) { |
76
|
|
|
case 'jar': |
77
|
|
|
case 'tika': // backwards compatibility only |
78
|
|
|
// for the app java is required |
79
|
|
|
$checks[] = $this->getJavaInstalledStatus(Status::ERROR); |
80
|
|
|
$checks[] = $this->getAppConfigurationStatus(); |
81
|
|
|
|
82
|
|
|
break; |
83
|
|
|
case 'server': |
84
|
|
|
// for the server only recommended since it could also run on another node |
85
|
|
|
$checks[] = $this->getJavaInstalledStatus(Status::WARNING); |
86
|
|
|
$checks[] = $this->getServerConfigurationStatus(); |
87
|
|
|
break; |
88
|
|
|
case 'solr': |
89
|
|
|
$checks[] = $this->getSolrCellConfigurationStatus(); |
90
|
|
|
break; |
91
|
|
|
} |
92
|
|
|
|
93
|
|
|
return $checks; |
94
|
|
|
} |
95
|
|
|
|
96
|
|
|
/** |
97
|
|
|
* Creates a configuration OK status. |
98
|
|
|
* |
99
|
|
|
* @return Status |
100
|
|
|
*/ |
101
|
|
|
protected function getOkStatus(): Status |
102
|
|
|
{ |
103
|
|
|
return GeneralUtility::makeInstance( |
104
|
|
|
Status::class, |
105
|
|
|
'Apache Tika', |
106
|
|
|
'Configuration OK' |
107
|
|
|
); |
108
|
|
|
} |
109
|
|
|
|
110
|
|
|
/** |
111
|
|
|
* Creates a system status report status checking whether Java is installed. |
112
|
|
|
* |
113
|
|
|
* @param int $severity |
114
|
|
|
* @return Status |
115
|
|
|
*/ |
116
|
|
|
protected function getJavaInstalledStatus(int $severity = Status::ERROR): Status |
117
|
|
|
{ |
118
|
|
|
/* @var Status $status */ |
119
|
|
|
$status = GeneralUtility::makeInstance( |
120
|
|
|
Status::class, |
121
|
|
|
'Apache Tika', |
122
|
|
|
'Java OK' |
123
|
|
|
); |
124
|
|
|
|
125
|
|
|
if (!$this->isJavaInstalled()) { |
126
|
|
|
$status = GeneralUtility::makeInstance( |
127
|
|
|
Status::class, |
128
|
|
|
'Apache Tika', |
129
|
|
|
'Java Not Found', |
130
|
|
|
'<p>Please install Java.</p>', |
131
|
|
|
$severity |
132
|
|
|
); |
133
|
|
|
} |
134
|
|
|
|
135
|
|
|
return $status; |
136
|
|
|
} |
137
|
|
|
|
138
|
|
|
/** |
139
|
|
|
* Checks configuration for use with Tika app jar |
140
|
|
|
* |
141
|
|
|
* @return Status |
142
|
|
|
*/ |
143
|
|
|
protected function getAppConfigurationStatus(): Status |
144
|
|
|
{ |
145
|
|
|
$status = $this->getOkStatus(); |
146
|
|
|
if (!$this->isFilePresent($this->tikaConfiguration['tikaPath'])) { |
147
|
|
|
$status = GeneralUtility::makeInstance( |
148
|
|
|
Status::class, |
149
|
|
|
'Apache Tika', |
150
|
|
|
'Configuration Incomplete', |
151
|
|
|
'<p>Could not find Tika app jar.</p>', |
152
|
|
|
Status::ERROR |
153
|
|
|
); |
154
|
|
|
} |
155
|
|
|
|
156
|
|
|
return $status; |
157
|
|
|
} |
158
|
|
|
|
159
|
|
|
/** |
160
|
|
|
* Checks configuration for use with Tika server jar |
161
|
|
|
* |
162
|
|
|
* @return Status |
163
|
|
|
* @throws Exception |
164
|
|
|
*/ |
165
|
|
|
protected function getServerConfigurationStatus(): Status |
166
|
|
|
{ |
167
|
|
|
$status = $this->getOkStatus(); |
168
|
|
|
|
169
|
|
|
$tikaServer = $this->getTikaServiceFromTikaConfiguration(); |
170
|
|
|
if (!$tikaServer->isAvailable()) { |
171
|
|
|
$status = GeneralUtility::makeInstance( |
172
|
|
|
Status::class, |
173
|
|
|
'Apache Tika', |
174
|
|
|
'Configuration Incomplete', |
175
|
|
|
'<p>Could not connect to Tika server.</p>', |
176
|
|
|
Status::ERROR |
177
|
|
|
); |
178
|
|
|
} |
179
|
|
|
|
180
|
|
|
return $status; |
181
|
|
|
} |
182
|
|
|
|
183
|
|
|
/** |
184
|
|
|
* Checks configuration for use with Solr |
185
|
|
|
* |
186
|
|
|
* @return Status |
187
|
|
|
*/ |
188
|
|
|
protected function getSolrCellConfigurationStatus(): Status |
189
|
|
|
{ |
190
|
|
|
$status = $this->getOkStatus(); |
191
|
|
|
|
192
|
|
|
$solrCellConfigurationOk = false; |
193
|
|
|
try { |
194
|
|
|
$solr = $this->getSolrConnectionFromTikaConfiguration(); |
195
|
|
|
|
196
|
|
|
// try to extract text & meta data |
197
|
|
|
/** @var $query Query */ |
198
|
|
|
$query = GeneralUtility::makeInstance(Query::class); |
199
|
|
|
$query->setExtractOnly(true); |
200
|
|
|
$query->setFile(ExtensionManagementUtility::extPath('tika', 'ext_emconf.php')); |
201
|
|
|
$query->addParam('extractFormat', 'text'); |
202
|
|
|
|
203
|
|
|
[$extractedContent, $extractedMetadata] = $solr->getWriteService()->extractByQuery($query); |
204
|
|
|
|
205
|
|
|
if (!is_null($extractedContent) && !empty($extractedMetadata)) { |
206
|
|
|
$solrCellConfigurationOk = true; |
207
|
|
|
} |
208
|
|
|
} catch (Exception $e) { |
209
|
|
|
$this->writeDevLog( |
210
|
|
|
'Exception while trying to extract file content', |
211
|
|
|
'tika', |
212
|
|
|
[ |
213
|
|
|
'configuration' => $this->tikaConfiguration, |
214
|
|
|
'exception' => $e, |
215
|
|
|
] |
216
|
|
|
); |
217
|
|
|
} |
218
|
|
|
|
219
|
|
|
if (!$solrCellConfigurationOk) { |
220
|
|
|
$status = GeneralUtility::makeInstance( |
221
|
|
|
Status::class, |
222
|
|
|
'Apache Tika', |
223
|
|
|
'Configuration Incomplete', |
224
|
|
|
'<p>Could not extract file contents with Solr Cell.</p>', |
225
|
|
|
Status::ERROR |
226
|
|
|
); |
227
|
|
|
} |
228
|
|
|
|
229
|
|
|
return $status; |
230
|
|
|
} |
231
|
|
|
|
232
|
|
|
/** |
233
|
|
|
* @return SolrConnection |
234
|
|
|
*/ |
235
|
|
|
protected function getSolrConnectionFromTikaConfiguration(): SolrConnection |
236
|
|
|
{ |
237
|
|
|
$solrConfig = [ |
238
|
|
|
'host' => $this->tikaConfiguration['solrHost'], |
239
|
|
|
'port' => (int)$this->tikaConfiguration['solrPort'], |
240
|
|
|
'path' => $this->tikaConfiguration['solrPath'], |
241
|
|
|
'scheme' => $this->tikaConfiguration['solrScheme'], |
242
|
|
|
]; |
243
|
|
|
|
244
|
|
|
$config = [ |
245
|
|
|
'read' => $solrConfig, |
246
|
|
|
'write' => $solrConfig, |
247
|
|
|
]; |
248
|
|
|
return GeneralUtility::makeInstance(ConnectionManager::class)->getConnectionFromConfiguration($config); |
249
|
|
|
} |
250
|
|
|
|
251
|
|
|
/** |
252
|
|
|
* @return ServerService |
253
|
|
|
* @noinspection PhpIncompatibleReturnTypeInspection |
254
|
|
|
*/ |
255
|
|
|
protected function getTikaServiceFromTikaConfiguration(): ServerService |
256
|
|
|
{ |
257
|
|
|
return GeneralUtility::makeInstance( |
258
|
|
|
ServerService::class, |
259
|
|
|
$this->tikaConfiguration |
260
|
|
|
); |
261
|
|
|
} |
262
|
|
|
|
263
|
|
|
/** |
264
|
|
|
* Checks if java is installed. |
265
|
|
|
* |
266
|
|
|
* @return bool |
267
|
|
|
*/ |
268
|
|
|
protected function isJavaInstalled(): bool |
269
|
|
|
{ |
270
|
|
|
return CommandUtility::checkCommand('java'); |
271
|
|
|
} |
272
|
|
|
|
273
|
|
|
/** |
274
|
|
|
* Checks if a certain file name is present. |
275
|
|
|
* |
276
|
|
|
* @param string $fileName |
277
|
|
|
* @return bool |
278
|
|
|
*/ |
279
|
|
|
protected function isFilePresent(string $fileName): bool |
280
|
|
|
{ |
281
|
|
|
return is_file(FileUtility::getAbsoluteFilePath($fileName)); |
282
|
|
|
} |
283
|
|
|
|
284
|
|
|
/** |
285
|
|
|
* Wrapper for GeneralUtility::devLog, used during testing to mock logging. |
286
|
|
|
* |
287
|
|
|
* @param string $message message |
288
|
|
|
* @param string $extKey extension key |
289
|
|
|
* @param array $data data |
290
|
|
|
*/ |
291
|
|
|
protected function writeDevLog(string $message, string $extKey, array $data = []): void |
292
|
|
|
{ |
293
|
|
|
$this->logger->/** @scrutinizer ignore-call */ debug( |
294
|
|
|
$message, |
295
|
|
|
[ |
296
|
|
|
'extension' => $extKey, |
297
|
|
|
'data' => $data, |
298
|
|
|
] |
299
|
|
|
); |
300
|
|
|
} |
301
|
|
|
} |
302
|
|
|
|