|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
/** |
|
4
|
|
|
* (c) Kitodo. Key to digital objects e.V. <[email protected]> |
|
5
|
|
|
* |
|
6
|
|
|
* This file is part of the Kitodo and TYPO3 projects. |
|
7
|
|
|
* |
|
8
|
|
|
* @license GNU General Public License version 3 or later. |
|
9
|
|
|
* For the full copyright and license information, please read the |
|
10
|
|
|
* LICENSE.txt file that was distributed with this source code. |
|
11
|
|
|
*/ |
|
12
|
|
|
|
|
13
|
|
|
namespace Kitodo\Dlf\Command; |
|
14
|
|
|
|
|
15
|
|
|
use Symfony\Component\Console\Command\Command; |
|
16
|
|
|
use Symfony\Component\Console\Input\InputInterface; |
|
17
|
|
|
use Symfony\Component\Console\Input\InputOption; |
|
18
|
|
|
use Symfony\Component\Console\Output\OutputInterface; |
|
19
|
|
|
use Symfony\Component\Console\Style\SymfonyStyle; |
|
20
|
|
|
use TYPO3\CMS\Core\Core\Bootstrap; |
|
21
|
|
|
use TYPO3\CMS\Core\Utility\GeneralUtility; |
|
22
|
|
|
use TYPO3\CMS\Core\Database\ConnectionPool; |
|
23
|
|
|
use TYPO3\CMS\Core\Database\Connection; |
|
24
|
|
|
use Kitodo\Dlf\Command\BaseCommand; |
|
25
|
|
|
use Kitodo\Dlf\Common\Document; |
|
26
|
|
|
|
|
27
|
|
|
/** |
|
28
|
|
|
* CLI Command for re-indexing collections into database and Solr. |
|
29
|
|
|
* |
|
30
|
|
|
* @author Alexander Bigga <[email protected]> |
|
31
|
|
|
* @package TYPO3 |
|
32
|
|
|
* @subpackage dlf |
|
33
|
|
|
* @access public |
|
34
|
|
|
*/ |
|
35
|
|
|
class ReindexCommand extends BaseCommand |
|
36
|
|
|
{ |
|
37
|
|
|
/** |
|
38
|
|
|
* Configure the command by defining the name, options and arguments |
|
39
|
|
|
* |
|
40
|
|
|
* @return void |
|
41
|
|
|
*/ |
|
42
|
|
|
public function configure() |
|
43
|
|
|
{ |
|
44
|
|
|
$this |
|
45
|
|
|
->setDescription('Reindex a collection into database and Solr.') |
|
46
|
|
|
->setHelp('') |
|
47
|
|
|
->addOption( |
|
48
|
|
|
'dry-run', |
|
49
|
|
|
null, |
|
50
|
|
|
InputOption::VALUE_NONE, |
|
51
|
|
|
'If this option is set, the files will not actually be processed but the location URI is shown.' |
|
52
|
|
|
) |
|
53
|
|
|
->addOption( |
|
54
|
|
|
'coll', |
|
55
|
|
|
'c', |
|
56
|
|
|
InputOption::VALUE_REQUIRED, |
|
57
|
|
|
'UID of the collection.' |
|
58
|
|
|
) |
|
59
|
|
|
->addOption( |
|
60
|
|
|
'pid', |
|
61
|
|
|
'p', |
|
62
|
|
|
InputOption::VALUE_REQUIRED, |
|
63
|
|
|
'UID of the page the documents should be added to.' |
|
64
|
|
|
) |
|
65
|
|
|
->addOption( |
|
66
|
|
|
'solr', |
|
67
|
|
|
's', |
|
68
|
|
|
InputOption::VALUE_REQUIRED, |
|
69
|
|
|
'[UID|index_name] of the Solr core the document should be added to.' |
|
70
|
|
|
) |
|
71
|
|
|
->addOption( |
|
72
|
|
|
'all', |
|
73
|
|
|
'a', |
|
74
|
|
|
InputOption::VALUE_NONE, |
|
75
|
|
|
'Reindex all documents on the given page.' |
|
76
|
|
|
); |
|
77
|
|
|
} |
|
78
|
|
|
|
|
79
|
|
|
/** |
|
80
|
|
|
* Executes the command to index the given document to db and solr. |
|
81
|
|
|
* |
|
82
|
|
|
* @param InputInterface $input The input parameters |
|
83
|
|
|
* @param OutputInterface $output The Symfony interface for outputs on console |
|
84
|
|
|
* |
|
85
|
|
|
* @return void |
|
86
|
|
|
*/ |
|
87
|
|
|
protected function execute(InputInterface $input, OutputInterface $output) |
|
88
|
|
|
{ |
|
89
|
|
|
// Make sure the _cli_ user is loaded |
|
90
|
|
|
Bootstrap::getInstance()->initializeBackendAuthentication(); |
|
|
|
|
|
|
91
|
|
|
|
|
92
|
|
|
$dryRun = $input->getOption('dry-run') != false ? true : false; |
|
93
|
|
|
|
|
94
|
|
|
$io = new SymfonyStyle($input, $output); |
|
95
|
|
|
$io->title($this->getDescription()); |
|
96
|
|
|
|
|
97
|
|
|
$startingPoint = $this->getStartingPoint($input->getOption('pid')); |
|
98
|
|
|
|
|
99
|
|
|
if ($startingPoint == 0) { |
|
100
|
|
|
$io->error('ERROR: No valid PID (' . $startingPoint . ') given.'); |
|
101
|
|
|
exit(1); |
|
102
|
|
|
} |
|
103
|
|
|
|
|
104
|
|
|
if ( |
|
105
|
|
|
!empty($input->getOption('solr')) |
|
106
|
|
|
&& !is_array($input->getOption('solr')) |
|
107
|
|
|
) { |
|
108
|
|
|
$allSolrCores = $this->getSolrCores($startingPoint); |
|
109
|
|
|
$solrCoreUid = $this->getSolrCoreUid($allSolrCores, $input->getOption('solr')); |
|
|
|
|
|
|
110
|
|
|
|
|
111
|
|
|
// Abort if solrCoreUid is empty or not in the array of allowed solr cores. |
|
112
|
|
|
if (empty($solrCoreUid) || !in_array($solrCoreUid, $allSolrCores)) { |
|
113
|
|
|
$output_solrCores = []; |
|
114
|
|
|
foreach ($allSolrCores as $index_name => $uid) { |
|
115
|
|
|
$output_solrCores[] = $uid . ' : ' . $index_name; |
|
116
|
|
|
} |
|
117
|
|
|
if (empty($output_solrCores)) { |
|
118
|
|
|
$io->error('ERROR: No valid Solr core ("' . $input->getOption('solr') . '") given. No valid cores found on PID ' . $startingPoint . ".\n"); |
|
119
|
|
|
exit(1); |
|
120
|
|
|
} else { |
|
121
|
|
|
$io->error('ERROR: No valid Solr core ("' . $input->getOption('solr') . '") given. ' . "Valid cores are (<uid>:<index_name>):\n" . implode("\n", $output_solrCores) . "\n"); |
|
122
|
|
|
exit(1); |
|
123
|
|
|
} |
|
124
|
|
|
} |
|
125
|
|
|
} else { |
|
126
|
|
|
$io->error('ERROR: Required parameter --solr|-s is missing or array.'); |
|
127
|
|
|
exit(1); |
|
128
|
|
|
} |
|
129
|
|
|
|
|
130
|
|
|
if (!empty($input->getOption('all'))) { |
|
131
|
|
|
// Get all documents. |
|
132
|
|
|
$documents = $this->getAllDocuments($startingPoint); |
|
133
|
|
|
} elseif ( |
|
134
|
|
|
!empty($input->getOption('coll')) |
|
135
|
|
|
&& !is_array($input->getOption('coll')) |
|
136
|
|
|
) { |
|
137
|
|
|
// "coll" may be a single integer or a comma-separated list of integers. |
|
138
|
|
|
if (empty(array_filter(GeneralUtility::intExplode(',', $input->getOption('coll'), true)))) { |
|
139
|
|
|
$io->error('ERROR: Parameter --coll|-c is not a valid comma-separated list of collection UIDs.'); |
|
140
|
|
|
exit(1); |
|
141
|
|
|
} |
|
142
|
|
|
$documents = $this->getDocumentsToProceed($input->getOption('coll'), $startingPoint); |
|
143
|
|
|
} else { |
|
144
|
|
|
$io->error('ERROR: One of parameters --all|-a or --coll|-c must be given.'); |
|
145
|
|
|
exit(1); |
|
146
|
|
|
} |
|
147
|
|
|
|
|
148
|
|
|
foreach ($documents as $id => $document) { |
|
149
|
|
|
$doc = Document::getInstance($document, $startingPoint, true); |
|
150
|
|
|
if ($doc->ready) { |
|
151
|
|
|
if ($dryRun) { |
|
152
|
|
|
$io->writeln('DRY RUN: Would index ' . $id . '/' . count($documents) . ' ' . $doc->uid . ' ("' . $doc->location . '") on UID ' . $startingPoint . ' and Solr core ' . $solrCoreUid . '.'); |
|
153
|
|
|
} else { |
|
154
|
|
|
if ($io->isVerbose()) { |
|
155
|
|
|
$io->writeln(date('Y-m-d H:i:s') . ' Indexing ' . $id . '/' . count($documents) . ' ' . $doc->uid . ' ("' . $doc->location . '") on UID ' . $startingPoint . ' and Solr core ' . $solrCoreUid . '.'); |
|
156
|
|
|
} |
|
157
|
|
|
// ...and save it to the database... |
|
158
|
|
|
if (!$doc->save($startingPoint, $solrCoreUid)) { |
|
159
|
|
|
$io->error('ERROR: Document "' . $id . '" not saved and indexed.'); |
|
160
|
|
|
} |
|
161
|
|
|
} |
|
162
|
|
|
} else { |
|
163
|
|
|
$io->error('ERROR: Document "' . $id . '" could not be loaded.'); |
|
164
|
|
|
} |
|
165
|
|
|
// Clear document registry to prevent memory exhaustion. |
|
166
|
|
|
Document::clearRegistry(); |
|
167
|
|
|
} |
|
168
|
|
|
|
|
169
|
|
|
$io->success('All done!'); |
|
170
|
|
|
} |
|
171
|
|
|
|
|
172
|
|
|
/** |
|
173
|
|
|
* Fetches all documents with given collection. |
|
174
|
|
|
* |
|
175
|
|
|
* @param string $collId A comma separated list of collection UIDs |
|
176
|
|
|
* @param int $pageId The PID of the collections' documents |
|
177
|
|
|
* |
|
178
|
|
|
* @return array Array of documents to index |
|
179
|
|
|
*/ |
|
180
|
|
|
protected function getDocumentsToProceed(string $collIds, int $pageId): array |
|
181
|
|
|
{ |
|
182
|
|
|
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
|
183
|
|
|
->getQueryBuilderForTable('tx_dlf_documents'); |
|
184
|
|
|
|
|
185
|
|
|
$documents = []; |
|
186
|
|
|
$result = $queryBuilder |
|
187
|
|
|
->select('tx_dlf_documents.uid') |
|
188
|
|
|
->from('tx_dlf_documents') |
|
189
|
|
|
->join( |
|
190
|
|
|
'tx_dlf_documents', |
|
191
|
|
|
'tx_dlf_relations', |
|
192
|
|
|
'tx_dlf_relations_joins', |
|
193
|
|
|
$queryBuilder->expr()->eq( |
|
194
|
|
|
'tx_dlf_relations_joins.uid_local', |
|
195
|
|
|
'tx_dlf_documents.uid' |
|
196
|
|
|
) |
|
197
|
|
|
) |
|
198
|
|
|
->join( |
|
199
|
|
|
'tx_dlf_relations_joins', |
|
200
|
|
|
'tx_dlf_collections', |
|
201
|
|
|
'tx_dlf_collections_join', |
|
202
|
|
|
$queryBuilder->expr()->eq( |
|
203
|
|
|
'tx_dlf_relations_joins.uid_foreign', |
|
204
|
|
|
'tx_dlf_collections_join.uid' |
|
205
|
|
|
) |
|
206
|
|
|
) |
|
207
|
|
|
->where( |
|
208
|
|
|
$queryBuilder->expr()->andX( |
|
209
|
|
|
$queryBuilder->expr()->in( |
|
210
|
|
|
'tx_dlf_collections_join.uid', |
|
211
|
|
|
$queryBuilder->createNamedParameter( |
|
212
|
|
|
GeneralUtility::intExplode(',', $collIds, true), |
|
213
|
|
|
Connection::PARAM_INT_ARRAY |
|
214
|
|
|
) |
|
215
|
|
|
), |
|
216
|
|
|
$queryBuilder->expr()->eq( |
|
217
|
|
|
'tx_dlf_collections_join.pid', |
|
218
|
|
|
$queryBuilder->createNamedParameter((int) $pageId, Connection::PARAM_INT) |
|
219
|
|
|
), |
|
220
|
|
|
$queryBuilder->expr()->eq( |
|
221
|
|
|
'tx_dlf_relations_joins.ident', |
|
222
|
|
|
$queryBuilder->createNamedParameter('docs_colls') |
|
223
|
|
|
) |
|
224
|
|
|
) |
|
225
|
|
|
) |
|
226
|
|
|
->groupBy('tx_dlf_documents.uid') |
|
227
|
|
|
->orderBy('tx_dlf_documents.uid', 'ASC') |
|
228
|
|
|
->execute(); |
|
229
|
|
|
|
|
230
|
|
|
while ($record = $result->fetch()) { |
|
231
|
|
|
$documents[] = $record['uid']; |
|
232
|
|
|
} |
|
233
|
|
|
|
|
234
|
|
|
return $documents; |
|
235
|
|
|
} |
|
236
|
|
|
|
|
237
|
|
|
/** |
|
238
|
|
|
* Fetches all documents of given page. |
|
239
|
|
|
* |
|
240
|
|
|
* @param int $pageId The documents' PID |
|
241
|
|
|
* |
|
242
|
|
|
* @return array Array of documents to index |
|
243
|
|
|
*/ |
|
244
|
|
|
protected function getAllDocuments(int $pageId): array |
|
245
|
|
|
{ |
|
246
|
|
|
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
|
247
|
|
|
->getQueryBuilderForTable('tx_dlf_documents'); |
|
248
|
|
|
|
|
249
|
|
|
$documents = []; |
|
250
|
|
|
$pageId = (int) $pageId; |
|
251
|
|
|
$result = $queryBuilder |
|
252
|
|
|
->select('uid') |
|
253
|
|
|
->from('tx_dlf_documents') |
|
254
|
|
|
->where( |
|
255
|
|
|
$queryBuilder->expr()->eq( |
|
256
|
|
|
'tx_dlf_documents.pid', |
|
257
|
|
|
$queryBuilder->createNamedParameter($pageId, Connection::PARAM_INT) |
|
258
|
|
|
) |
|
259
|
|
|
) |
|
260
|
|
|
->orderBy('tx_dlf_documents.uid', 'ASC') |
|
261
|
|
|
->execute(); |
|
262
|
|
|
|
|
263
|
|
|
while ($record = $result->fetch()) { |
|
264
|
|
|
$documents[] = $record['uid']; |
|
265
|
|
|
} |
|
266
|
|
|
return $documents; |
|
267
|
|
|
} |
|
268
|
|
|
} |
|
269
|
|
|
|
This function has been deprecated. The supplier of the function has supplied an explanatory message.
The explanatory message should give you some clue as to whether and when the function will be removed and what other function to use instead.