1
|
|
|
<?php |
2
|
|
|
namespace TYPO3\CMS\IndexedSearch\Domain\Repository; |
3
|
|
|
|
4
|
|
|
/* |
5
|
|
|
* This file is part of the TYPO3 CMS project. |
6
|
|
|
* |
7
|
|
|
* It is free software; you can redistribute it and/or modify it under |
8
|
|
|
* the terms of the GNU General Public License, either version 2 |
9
|
|
|
* of the License, or any later version. |
10
|
|
|
* |
11
|
|
|
* For the full copyright and license information, please read the |
12
|
|
|
* LICENSE.txt file that was distributed with this source code. |
13
|
|
|
* |
14
|
|
|
* The TYPO3 project - inspiring people to share! |
15
|
|
|
*/ |
16
|
|
|
|
17
|
|
|
use Doctrine\DBAL\Driver\Statement; |
18
|
|
|
use TYPO3\CMS\Core\Configuration\ExtensionConfiguration; |
19
|
|
|
use TYPO3\CMS\Core\Database\Connection; |
20
|
|
|
use TYPO3\CMS\Core\Database\ConnectionPool; |
21
|
|
|
use TYPO3\CMS\Core\Database\Query\QueryHelper; |
22
|
|
|
use TYPO3\CMS\Core\Database\Query\Restriction\FrontendRestrictionContainer; |
23
|
|
|
use TYPO3\CMS\Core\TimeTracker\TimeTracker; |
24
|
|
|
use TYPO3\CMS\Core\Utility\GeneralUtility; |
25
|
|
|
use TYPO3\CMS\Core\Utility\MathUtility; |
26
|
|
|
use TYPO3\CMS\IndexedSearch\Indexer; |
27
|
|
|
use TYPO3\CMS\IndexedSearch\Utility; |
28
|
|
|
|
29
|
|
|
/** |
30
|
|
|
* Index search abstraction to search through the index |
31
|
|
|
*/ |
32
|
|
|
class IndexSearchRepository |
33
|
|
|
{ |
34
|
|
|
/** |
35
|
|
|
* Indexer object |
36
|
|
|
* |
37
|
|
|
* @var Indexer |
38
|
|
|
*/ |
39
|
|
|
protected $indexerObj; |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* External Parsers |
43
|
|
|
* |
44
|
|
|
* @var array |
45
|
|
|
*/ |
46
|
|
|
protected $externalParsers = []; |
47
|
|
|
|
48
|
|
|
/** |
49
|
|
|
* Frontend User Group List |
50
|
|
|
* |
51
|
|
|
* @var string |
52
|
|
|
*/ |
53
|
|
|
protected $frontendUserGroupList = ''; |
54
|
|
|
|
55
|
|
|
/** |
56
|
|
|
* Sections |
57
|
|
|
* formally known as $this->piVars['sections'] |
58
|
|
|
* |
59
|
|
|
* @var string |
60
|
|
|
*/ |
61
|
|
|
protected $sections = null; |
62
|
|
|
|
63
|
|
|
/** |
64
|
|
|
* Search type |
65
|
|
|
* formally known as $this->piVars['type'] |
66
|
|
|
* |
67
|
|
|
* @var string |
68
|
|
|
*/ |
69
|
|
|
protected $searchType = null; |
70
|
|
|
|
71
|
|
|
/** |
72
|
|
|
* Language uid |
73
|
|
|
* formally known as $this->piVars['lang'] |
74
|
|
|
* |
75
|
|
|
* @var int |
76
|
|
|
*/ |
77
|
|
|
protected $languageUid = null; |
78
|
|
|
|
79
|
|
|
/** |
80
|
|
|
* Media type |
81
|
|
|
* formally known as $this->piVars['media'] |
82
|
|
|
* |
83
|
|
|
* @var int |
84
|
|
|
*/ |
85
|
|
|
protected $mediaType = null; |
86
|
|
|
|
87
|
|
|
/** |
88
|
|
|
* Sort order |
89
|
|
|
* formally known as $this->piVars['sort_order'] |
90
|
|
|
* |
91
|
|
|
* @var string |
92
|
|
|
*/ |
93
|
|
|
protected $sortOrder = null; |
94
|
|
|
|
95
|
|
|
/** |
96
|
|
|
* Descending sort order flag |
97
|
|
|
* formally known as $this->piVars['desc'] |
98
|
|
|
* |
99
|
|
|
* @var bool |
100
|
|
|
*/ |
101
|
|
|
protected $descendingSortOrderFlag = null; |
102
|
|
|
|
103
|
|
|
/** |
104
|
|
|
* Result page pointer |
105
|
|
|
* formally known as $this->piVars['pointer'] |
106
|
|
|
* |
107
|
|
|
* @var int |
108
|
|
|
*/ |
109
|
|
|
protected $resultpagePointer = 0; |
110
|
|
|
|
111
|
|
|
/** |
112
|
|
|
* Number of results |
113
|
|
|
* formally known as $this->piVars['result'] |
114
|
|
|
* |
115
|
|
|
* @var int |
116
|
|
|
*/ |
117
|
|
|
protected $numberOfResults = 10; |
118
|
|
|
|
119
|
|
|
/** |
120
|
|
|
* list of all root pages that will be used |
121
|
|
|
* If this value is set to less than zero (eg. -1) searching will happen |
122
|
|
|
* in ALL of the page tree with no regard to branches at all. |
123
|
|
|
* |
124
|
|
|
* @var string |
125
|
|
|
*/ |
126
|
|
|
protected $searchRootPageIdList; |
127
|
|
|
|
128
|
|
|
/** |
129
|
|
|
* formally known as $conf['search.']['searchSkipExtendToSubpagesChecking'] |
130
|
|
|
* enabled through settings.searchSkipExtendToSubpagesChecking |
131
|
|
|
* |
132
|
|
|
* @var bool |
133
|
|
|
*/ |
134
|
|
|
protected $joinPagesForQuery = false; |
135
|
|
|
|
136
|
|
|
/** |
137
|
|
|
* Select clauses for individual words, will be filled during the search |
138
|
|
|
* |
139
|
|
|
* @var array |
140
|
|
|
*/ |
141
|
|
|
protected $wSelClauses = []; |
142
|
|
|
|
143
|
|
|
/** |
144
|
|
|
* Flag for exact search count |
145
|
|
|
* formally known as $conf['search.']['exactCount'] |
146
|
|
|
* |
147
|
|
|
* Continue counting and checking of results even if we are sure |
148
|
|
|
* they are not displayed in this request. This will slow down your |
149
|
|
|
* page rendering, but it allows precise search result counters. |
150
|
|
|
* enabled through settings.exactCount |
151
|
|
|
* |
152
|
|
|
* @var bool |
153
|
|
|
*/ |
154
|
|
|
protected $useExactCount = false; |
155
|
|
|
|
156
|
|
|
/** |
157
|
|
|
* Display forbidden records |
158
|
|
|
* formally known as $this->conf['show.']['forbiddenRecords'] |
159
|
|
|
* |
160
|
|
|
* enabled through settings.displayForbiddenRecords |
161
|
|
|
* |
162
|
|
|
* @var bool |
163
|
|
|
*/ |
164
|
|
|
protected $displayForbiddenRecords = false; |
165
|
|
|
|
166
|
|
|
/** |
167
|
|
|
* initialize all options that are necessary for the search |
168
|
|
|
* |
169
|
|
|
* @param array $settings the extbase plugin settings |
170
|
|
|
* @param array $searchData the search data |
171
|
|
|
* @param array $externalParsers |
172
|
|
|
* @param string $searchRootPageIdList |
173
|
|
|
*/ |
174
|
|
|
public function initialize($settings, $searchData, $externalParsers, $searchRootPageIdList) |
175
|
|
|
{ |
176
|
|
|
// Initialize the indexer-class - just to use a few function (for making hashes) |
177
|
|
|
$this->indexerObj = GeneralUtility::makeInstance(Indexer::class); |
178
|
|
|
$this->externalParsers = $externalParsers; |
179
|
|
|
$this->searchRootPageIdList = $searchRootPageIdList; |
180
|
|
|
$this->frontendUserGroupList = $this->getTypoScriptFrontendController()->gr_list; |
181
|
|
|
// Should we use joinPagesForQuery instead of long lists of uids? |
182
|
|
|
if ($settings['searchSkipExtendToSubpagesChecking']) { |
183
|
|
|
$this->joinPagesForQuery = 1; |
|
|
|
|
184
|
|
|
} |
185
|
|
|
if ($settings['exactCount']) { |
186
|
|
|
$this->useExactCount = true; |
187
|
|
|
} |
188
|
|
|
if ($settings['displayForbiddenRecords']) { |
189
|
|
|
$this->displayForbiddenRecords = true; |
190
|
|
|
} |
191
|
|
|
$this->sections = $searchData['sections']; |
192
|
|
|
$this->searchType = $searchData['searchType']; |
193
|
|
|
$this->languageUid = $searchData['languageUid']; |
194
|
|
|
$this->mediaType = $searchData['mediaType'] ?? false; |
|
|
|
|
195
|
|
|
$this->sortOrder = $searchData['sortOrder']; |
196
|
|
|
$this->descendingSortOrderFlag = $searchData['desc']; |
197
|
|
|
$this->resultpagePointer = $searchData['pointer']; |
198
|
|
|
if (isset($searchData['numberOfResults']) && is_numeric($searchData['numberOfResults'])) { |
199
|
|
|
$this->numberOfResults = (int)$searchData['numberOfResults']; |
200
|
|
|
} |
201
|
|
|
} |
202
|
|
|
|
203
|
|
|
/** |
204
|
|
|
* Get search result rows / data from database. Returned as data in array. |
205
|
|
|
* |
206
|
|
|
* @param array $searchWords Search word array |
207
|
|
|
* @param int $freeIndexUid Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content. |
208
|
|
|
* @return bool|array FALSE if no result, otherwise an array with keys for first row, result rows and total number of results found. |
209
|
|
|
*/ |
210
|
|
|
public function doSearch($searchWords, $freeIndexUid = -1) |
211
|
|
|
{ |
212
|
|
|
$useMysqlFulltext = (bool)GeneralUtility::makeInstance(ExtensionConfiguration::class)->get('indexed_search', 'useMysqlFulltext'); |
213
|
|
|
// Getting SQL result pointer: |
214
|
|
|
$this->getTimeTracker()->push('Searching result'); |
215
|
|
|
if ($hookObj = &$this->hookRequest('getResultRows_SQLpointer')) { |
216
|
|
|
$result = $hookObj->getResultRows_SQLpointer($searchWords, $freeIndexUid); |
217
|
|
|
} elseif ($useMysqlFulltext) { |
218
|
|
|
$result = $this->getResultRows_SQLpointerMysqlFulltext($searchWords, $freeIndexUid); |
219
|
|
|
} else { |
220
|
|
|
$result = $this->getResultRows_SQLpointer($searchWords, $freeIndexUid); |
221
|
|
|
} |
222
|
|
|
$this->getTimeTracker()->pull(); |
223
|
|
|
// Organize and process result: |
224
|
|
|
if ($result) { |
225
|
|
|
// Total search-result count |
226
|
|
|
$count = $result->rowCount(); |
|
|
|
|
227
|
|
|
// The pointer is set to the result page that is currently being viewed |
228
|
|
|
$pointer = MathUtility::forceIntegerInRange($this->resultpagePointer, 0, floor($count / $this->numberOfResults)); |
|
|
|
|
229
|
|
|
// Initialize result accumulation variables: |
230
|
|
|
$c = 0; |
231
|
|
|
// Result pointer: Counts up the position in the current search-result |
232
|
|
|
$grouping_phashes = []; |
233
|
|
|
// Used to filter out duplicates. |
234
|
|
|
$grouping_chashes = []; |
235
|
|
|
// Used to filter out duplicates BASED ON cHash. |
236
|
|
|
$firstRow = []; |
237
|
|
|
// Will hold the first row in result - used to calculate relative hit-ratings. |
238
|
|
|
$resultRows = []; |
239
|
|
|
// Will hold the results rows for display. |
240
|
|
|
// Now, traverse result and put the rows to be displayed into an array |
241
|
|
|
// Each row should contain the fields from 'ISEC.*, IP.*' combined |
242
|
|
|
// + artificial fields "show_resume" (bool) and "result_number" (counter) |
243
|
|
|
while ($row = $result->fetch()) { |
|
|
|
|
244
|
|
|
// Set first row |
245
|
|
|
if (!$c) { |
246
|
|
|
$firstRow = $row; |
247
|
|
|
} |
248
|
|
|
// Tells whether we can link directly to a document |
249
|
|
|
// or not (depends on possible right problems) |
250
|
|
|
$row['show_resume'] = $this->checkResume($row); |
251
|
|
|
$phashGr = !in_array($row['phash_grouping'], $grouping_phashes); |
252
|
|
|
$chashGr = !in_array(($row['contentHash'] . '.' . $row['data_page_id']), $grouping_chashes); |
253
|
|
|
if ($phashGr && $chashGr) { |
254
|
|
|
// Only if the resume may be shown are we going to filter out duplicates... |
255
|
|
|
if ($row['show_resume'] || $this->displayForbiddenRecords) { |
256
|
|
|
// Only on documents which are not multiple pages documents |
257
|
|
|
if (!$this->multiplePagesType($row['item_type'])) { |
258
|
|
|
$grouping_phashes[] = $row['phash_grouping']; |
259
|
|
|
} |
260
|
|
|
$grouping_chashes[] = $row['contentHash'] . '.' . $row['data_page_id']; |
261
|
|
|
// Increase the result pointer |
262
|
|
|
$c++; |
263
|
|
|
// All rows for display is put into resultRows[] |
264
|
|
|
if ($c > $pointer * $this->numberOfResults && $c <= $pointer * $this->numberOfResults + $this->numberOfResults) { |
265
|
|
|
$row['result_number'] = $c; |
266
|
|
|
$resultRows[] = $row; |
267
|
|
|
// This may lead to a problem: If the result check is not stopped here, the search will take longer. |
268
|
|
|
// However the result counter will not filter out grouped cHashes/pHashes that were not processed yet. |
269
|
|
|
// You can change this behavior using the "search.exactCount" property (see above). |
270
|
|
|
if (!$this->useExactCount && $c + 1 > ($pointer + 1) * $this->numberOfResults) { |
271
|
|
|
break; |
272
|
|
|
} |
273
|
|
|
} |
274
|
|
|
} else { |
275
|
|
|
// Skip this row if the user cannot |
276
|
|
|
// view it (missing permission) |
277
|
|
|
$count--; |
278
|
|
|
} |
279
|
|
|
} else { |
280
|
|
|
// For each time a phash_grouping document is found |
281
|
|
|
// (which is thus not displayed) the search-result count is reduced, |
282
|
|
|
// so that it matches the number of rows displayed. |
283
|
|
|
$count--; |
284
|
|
|
} |
285
|
|
|
} |
286
|
|
|
|
287
|
|
|
$result->closeCursor(); |
|
|
|
|
288
|
|
|
|
289
|
|
|
return [ |
290
|
|
|
'resultRows' => $resultRows, |
291
|
|
|
'firstRow' => $firstRow, |
292
|
|
|
'count' => $count |
293
|
|
|
]; |
294
|
|
|
} |
295
|
|
|
// No results found |
296
|
|
|
return false; |
297
|
|
|
} |
298
|
|
|
|
299
|
|
|
/** |
300
|
|
|
* Gets a SQL result pointer to traverse for the search records. |
301
|
|
|
* |
302
|
|
|
* @param array $searchWords Search words |
303
|
|
|
* @param int $freeIndexUid Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content. |
304
|
|
|
* @return Statement |
305
|
|
|
*/ |
306
|
|
|
protected function getResultRows_SQLpointer($searchWords, $freeIndexUid = -1) |
307
|
|
|
{ |
308
|
|
|
// This SEARCHES for the searchwords in $searchWords AND returns a |
309
|
|
|
// COMPLETE list of phash-integers of the matches. |
310
|
|
|
$list = $this->getPhashList($searchWords); |
311
|
|
|
// Perform SQL Search / collection of result rows array: |
312
|
|
|
if ($list) { |
313
|
|
|
// Do the search: |
314
|
|
|
$this->getTimeTracker()->push('execFinalQuery'); |
315
|
|
|
$res = $this->execFinalQuery($list, $freeIndexUid); |
316
|
|
|
$this->getTimeTracker()->pull(); |
317
|
|
|
return $res; |
318
|
|
|
} |
319
|
|
|
return false; |
|
|
|
|
320
|
|
|
} |
321
|
|
|
|
322
|
|
|
/** |
323
|
|
|
* Gets a SQL result pointer to traverse for the search records. |
324
|
|
|
* |
325
|
|
|
* mysql fulltext specific version triggered by ext_conf_template setting 'useMysqlFulltext' |
326
|
|
|
* |
327
|
|
|
* @param array $searchWordsArray Search words |
328
|
|
|
* @param int $freeIndexUid Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content. |
329
|
|
|
* @return bool|\mysqli_result|object MySQLi result object / DBAL object |
330
|
|
|
*/ |
331
|
|
|
protected function getResultRows_SQLpointerMysqlFulltext($searchWordsArray, $freeIndexUid = -1) |
332
|
|
|
{ |
333
|
|
|
$connection = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable('index_fulltext'); |
334
|
|
|
if (strpos($connection->getServerVersion(), 'MySQL') !== 0) { |
335
|
|
|
throw new \RuntimeException( |
336
|
|
|
'Extension indexed_search is configured to use mysql fulltext, but table \'index_fulltext\'' |
337
|
|
|
. ' is running on a different DBMS.', |
338
|
|
|
1472585525 |
339
|
|
|
); |
340
|
|
|
} |
341
|
|
|
// Build the search string, detect which fulltext index to use, and decide whether boolean search is needed or not |
342
|
|
|
$searchData = $this->getSearchString($searchWordsArray); |
343
|
|
|
// Perform SQL Search / collection of result rows array: |
344
|
|
|
$resource = false; |
345
|
|
|
if ($searchData) { |
346
|
|
|
/** @var TimeTracker $timeTracker */ |
347
|
|
|
$timeTracker = GeneralUtility::makeInstance(TimeTracker::class); |
348
|
|
|
// Do the search: |
349
|
|
|
$timeTracker->push('execFinalQuery'); |
350
|
|
|
$resource = $this->execFinalQuery_fulltext($searchData, $freeIndexUid); |
|
|
|
|
351
|
|
|
$timeTracker->pull(); |
352
|
|
|
} |
353
|
|
|
return $resource; |
|
|
|
|
354
|
|
|
} |
355
|
|
|
|
356
|
|
|
/** |
357
|
|
|
* Returns a search string for use with MySQL FULLTEXT query |
358
|
|
|
* |
359
|
|
|
* mysql fulltext specific helper method |
360
|
|
|
* |
361
|
|
|
* @param array $searchWordArray Search word array |
362
|
|
|
* @return string Search string |
363
|
|
|
*/ |
364
|
|
|
protected function getSearchString($searchWordArray) |
365
|
|
|
{ |
366
|
|
|
// Initialize variables: |
367
|
|
|
$count = 0; |
368
|
|
|
// Change this to TRUE to force BOOLEAN SEARCH MODE (useful if fulltext index is still empty) |
369
|
|
|
$searchBoolean = false; |
370
|
|
|
$fulltextIndex = 'index_fulltext.fulltextdata'; |
371
|
|
|
// This holds the result if the search is natural (doesn't contain any boolean operators) |
372
|
|
|
$naturalSearchString = ''; |
373
|
|
|
// This holds the result if the search is boolen (contains +/-/| operators) |
374
|
|
|
$booleanSearchString = ''; |
375
|
|
|
|
376
|
|
|
$searchType = (string)$this->getSearchType(); |
377
|
|
|
|
378
|
|
|
// Traverse searchwords and prefix them with corresponding operator |
379
|
|
|
foreach ($searchWordArray as $searchWordData) { |
380
|
|
|
// Making the query for a single search word based on the search-type |
381
|
|
|
$searchWord = $searchWordData['sword']; |
382
|
|
|
$wildcard = ''; |
383
|
|
|
if (strstr($searchWord, ' ')) { |
384
|
|
|
$searchType = '20'; |
385
|
|
|
} |
386
|
|
|
switch ($searchType) { |
387
|
|
|
case '1': |
388
|
|
|
case '2': |
389
|
|
|
case '3': |
390
|
|
|
// First part of word |
391
|
|
|
$wildcard = '*'; |
392
|
|
|
// Part-of-word search requires boolean mode! |
393
|
|
|
$searchBoolean = true; |
394
|
|
|
break; |
395
|
|
|
case '10': |
396
|
|
|
$indexerObj = GeneralUtility::makeInstance(Indexer::class); |
397
|
|
|
// Initialize the indexer-class |
398
|
|
|
/** @var Indexer $indexerObj */ |
399
|
|
|
$searchWord = $indexerObj->metaphone($searchWord, $indexerObj->storeMetaphoneInfoAsWords); |
400
|
|
|
unset($indexerObj); |
401
|
|
|
$fulltextIndex = 'index_fulltext.metaphonedata'; |
402
|
|
|
break; |
403
|
|
|
case '20': |
404
|
|
|
$searchBoolean = true; |
405
|
|
|
// Remove existing quotes and fix misplaced quotes. |
406
|
|
|
$searchWord = trim(str_replace('"', ' ', $searchWord)); |
407
|
|
|
break; |
408
|
|
|
} |
409
|
|
|
// Perform search for word: |
410
|
|
|
switch ($searchWordData['oper']) { |
411
|
|
|
case 'AND NOT': |
412
|
|
|
$booleanSearchString .= ' -' . $searchWord . $wildcard; |
413
|
|
|
$searchBoolean = true; |
414
|
|
|
break; |
415
|
|
|
case 'OR': |
416
|
|
|
$booleanSearchString .= ' ' . $searchWord . $wildcard; |
417
|
|
|
$searchBoolean = true; |
418
|
|
|
break; |
419
|
|
|
default: |
420
|
|
|
$booleanSearchString .= ' +' . $searchWord . $wildcard; |
421
|
|
|
$naturalSearchString .= ' ' . $searchWord; |
422
|
|
|
} |
423
|
|
|
$count++; |
424
|
|
|
} |
425
|
|
|
if ($searchType == '20') { |
426
|
|
|
$searchString = '"' . trim($naturalSearchString) . '"'; |
427
|
|
|
} elseif ($searchBoolean) { |
428
|
|
|
$searchString = trim($booleanSearchString); |
429
|
|
|
} else { |
430
|
|
|
$searchString = trim($naturalSearchString); |
431
|
|
|
} |
432
|
|
|
return [ |
|
|
|
|
433
|
|
|
'searchBoolean' => $searchBoolean, |
434
|
|
|
'searchString' => $searchString, |
435
|
|
|
'fulltextIndex' => $fulltextIndex |
436
|
|
|
]; |
437
|
|
|
} |
438
|
|
|
|
439
|
|
|
/** |
440
|
|
|
* Execute final query, based on phash integer list. The main point is sorting the result in the right order. |
441
|
|
|
* |
442
|
|
|
* mysql fulltext specific helper method |
443
|
|
|
* |
444
|
|
|
* @param array $searchData Array with search string, boolean indicator, and fulltext index reference |
445
|
|
|
* @param int $freeIndexUid Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content. |
446
|
|
|
* @return Statement |
447
|
|
|
*/ |
448
|
|
|
protected function execFinalQuery_fulltext($searchData, $freeIndexUid = -1) |
449
|
|
|
{ |
450
|
|
|
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_fulltext'); |
451
|
|
|
$queryBuilder->getRestrictions()->removeAll(); |
452
|
|
|
$queryBuilder->select('index_fulltext.*', 'ISEC.*', 'IP.*') |
453
|
|
|
->from('index_fulltext') |
454
|
|
|
->join( |
455
|
|
|
'index_fulltext', |
456
|
|
|
'index_phash', |
457
|
|
|
'IP', |
458
|
|
|
$queryBuilder->expr()->eq('index_fulltext.phash', $queryBuilder->quoteIdentifier('IP.phash')) |
459
|
|
|
) |
460
|
|
|
->join( |
461
|
|
|
'IP', |
462
|
|
|
'index_section', |
463
|
|
|
'ISEC', |
464
|
|
|
$queryBuilder->expr()->eq('IP.phash', $queryBuilder->quoteIdentifier('ISEC.phash')) |
465
|
|
|
); |
466
|
|
|
|
467
|
|
|
// Calling hook for alternative creation of page ID list |
468
|
|
|
$searchRootPageIdList = $this->getSearchRootPageIdList(); |
469
|
|
|
if ($hookObj = &$this->hookRequest('execFinalQuery_idList')) { |
470
|
|
|
$pageWhere = $hookObj->execFinalQuery_idList(''); |
471
|
|
|
$queryBuilder->andWhere(QueryHelper::stripLogicalOperatorPrefix($pageWhere)); |
472
|
|
|
} elseif ($this->joinPagesForQuery) { |
473
|
|
|
// Alternative to getting all page ids by ->getTreeList() where "excludeSubpages" is NOT respected. |
474
|
|
|
$queryBuilder |
475
|
|
|
->join( |
476
|
|
|
'ISEC', |
477
|
|
|
'pages', |
478
|
|
|
'pages', |
479
|
|
|
$queryBuilder->expr()->eq('ISEC.page_id', $queryBuilder->quoteIdentifier('pages.uid')) |
480
|
|
|
) |
481
|
|
|
->andWhere( |
482
|
|
|
$queryBuilder->expr()->eq( |
483
|
|
|
'pages.no_search', |
484
|
|
|
$queryBuilder->createNamedParameter(0, \PDO::PARAM_INT) |
485
|
|
|
) |
486
|
|
|
) |
487
|
|
|
->andWhere( |
488
|
|
|
$queryBuilder->expr()->lt( |
489
|
|
|
'pages.doktype', |
490
|
|
|
$queryBuilder->createNamedParameter(200, \PDO::PARAM_INT) |
491
|
|
|
) |
492
|
|
|
); |
493
|
|
|
$queryBuilder->setRestrictions(GeneralUtility::makeInstance(FrontendRestrictionContainer::class)); |
494
|
|
|
} elseif ($searchRootPageIdList[0] >= 0) { |
495
|
|
|
// Collecting all pages IDs in which to search; |
496
|
|
|
// filtering out ALL pages that are not accessible due to restriction containers. Does NOT look for "no_search" field! |
497
|
|
|
$idList = []; |
498
|
|
|
foreach ($searchRootPageIdList as $rootId) { |
499
|
|
|
/** @var \TYPO3\CMS\Frontend\ContentObject\ContentObjectRenderer $cObj */ |
500
|
|
|
$cObj = GeneralUtility::makeInstance(\TYPO3\CMS\Frontend\ContentObject\ContentObjectRenderer::class); |
501
|
|
|
$idList[] = $cObj->getTreeList(-1 * $rootId, 9999); |
502
|
|
|
} |
503
|
|
|
$idList = GeneralUtility::intExplode(',', implode(',', $idList)); |
504
|
|
|
$queryBuilder->andWhere( |
505
|
|
|
$queryBuilder->expr()->in( |
506
|
|
|
'ISEC.page_id', |
507
|
|
|
$queryBuilder->createNamedParameter($idList, Connection::PARAM_INT_ARRAY) |
508
|
|
|
) |
509
|
|
|
); |
510
|
|
|
} |
511
|
|
|
|
512
|
|
|
$searchBoolean = ''; |
513
|
|
|
if ($searchData['searchBoolean']) { |
514
|
|
|
$searchBoolean = ' IN BOOLEAN MODE'; |
515
|
|
|
} |
516
|
|
|
$queryBuilder->andWhere( |
517
|
|
|
'MATCH (' . $queryBuilder->quoteIdentifier($searchData['fulltextIndex']) . ')' |
518
|
|
|
. ' AGAINST (' . $queryBuilder->createNamedParameter($searchData['searchString']) |
519
|
|
|
. $searchBoolean |
520
|
|
|
. ')' |
521
|
|
|
); |
522
|
|
|
|
523
|
|
|
$queryBuilder->andWhere( |
524
|
|
|
QueryHelper::stripLogicalOperatorPrefix($this->mediaTypeWhere()), |
525
|
|
|
QueryHelper::stripLogicalOperatorPrefix($this->languageWhere()), |
526
|
|
|
QueryHelper::stripLogicalOperatorPrefix($this->freeIndexUidWhere($freeIndexUid)), |
527
|
|
|
QueryHelper::stripLogicalOperatorPrefix($this->sectionTableWhere()) |
528
|
|
|
); |
529
|
|
|
|
530
|
|
|
$queryBuilder->groupBy( |
531
|
|
|
'IP.phash', |
532
|
|
|
'ISEC.phash', |
533
|
|
|
'ISEC.phash_t3', |
534
|
|
|
'ISEC.rl0', |
535
|
|
|
'ISEC.rl1', |
536
|
|
|
'ISEC.rl2', |
537
|
|
|
'ISEC.page_id', |
538
|
|
|
'ISEC.uniqid', |
539
|
|
|
'IP.phash_grouping', |
540
|
|
|
'IP.data_filename', |
541
|
|
|
'IP.data_page_id', |
542
|
|
|
'IP.data_page_reg1', |
543
|
|
|
'IP.data_page_type', |
544
|
|
|
'IP.data_page_mp', |
545
|
|
|
'IP.gr_list', |
546
|
|
|
'IP.item_type', |
547
|
|
|
'IP.item_title', |
548
|
|
|
'IP.item_description', |
549
|
|
|
'IP.item_mtime', |
550
|
|
|
'IP.tstamp', |
551
|
|
|
'IP.item_size', |
552
|
|
|
'IP.contentHash', |
553
|
|
|
'IP.crdate', |
554
|
|
|
'IP.parsetime', |
555
|
|
|
'IP.sys_language_uid', |
556
|
|
|
'IP.item_crdate', |
557
|
|
|
'IP.cHashParams', |
558
|
|
|
'IP.externalUrl', |
559
|
|
|
'IP.recordUid', |
560
|
|
|
'IP.freeIndexUid', |
561
|
|
|
'IP.freeIndexSetId' |
562
|
|
|
); |
563
|
|
|
|
564
|
|
|
return $queryBuilder->execute(); |
565
|
|
|
} |
566
|
|
|
|
567
|
|
|
/*********************************** |
568
|
|
|
* |
569
|
|
|
* Helper functions on searching (SQL) |
570
|
|
|
* |
571
|
|
|
***********************************/ |
572
|
|
|
/** |
573
|
|
|
* Returns a COMPLETE list of phash-integers matching the search-result composed of the search-words in the $searchWords array. |
574
|
|
|
* The list of phash integers are unsorted and should be used for subsequent selection of index_phash records for display of the result. |
575
|
|
|
* |
576
|
|
|
* @param array $searchWords Search word array |
577
|
|
|
* @return string List of integers |
578
|
|
|
*/ |
579
|
|
|
protected function getPhashList($searchWords) |
580
|
|
|
{ |
581
|
|
|
// Initialize variables: |
582
|
|
|
$c = 0; |
583
|
|
|
// This array accumulates the phash-values |
584
|
|
|
$totalHashList = []; |
585
|
|
|
$this->wSelClauses = []; |
586
|
|
|
// Traverse searchwords; for each, select all phash integers and merge/diff/intersect them with previous word (based on operator) |
587
|
|
|
foreach ($searchWords as $k => $v) { |
588
|
|
|
// Making the query for a single search word based on the search-type |
589
|
|
|
$sWord = $v['sword']; |
590
|
|
|
$theType = (string)$this->searchType; |
591
|
|
|
// If there are spaces in the search-word, make a full text search instead. |
592
|
|
|
if (strstr($sWord, ' ')) { |
593
|
|
|
$theType = 20; |
594
|
|
|
} |
595
|
|
|
$this->getTimeTracker()->push('SearchWord "' . $sWord . '" - $theType=' . $theType); |
596
|
|
|
// Perform search for word: |
597
|
|
|
switch ($theType) { |
598
|
|
|
case '1': |
599
|
|
|
// Part of word |
600
|
|
|
$res = $this->searchWord($sWord, Utility\LikeWildcard::BOTH); |
601
|
|
|
break; |
602
|
|
|
case '2': |
603
|
|
|
// First part of word |
604
|
|
|
$res = $this->searchWord($sWord, Utility\LikeWildcard::RIGHT); |
605
|
|
|
break; |
606
|
|
|
case '3': |
607
|
|
|
// Last part of word |
608
|
|
|
$res = $this->searchWord($sWord, Utility\LikeWildcard::LEFT); |
609
|
|
|
break; |
610
|
|
|
case '10': |
611
|
|
|
// Sounds like |
612
|
|
|
/** |
613
|
|
|
* Indexer object |
614
|
|
|
* |
615
|
|
|
* @var Indexer |
616
|
|
|
*/ |
617
|
|
|
$indexerObj = GeneralUtility::makeInstance(Indexer::class); |
618
|
|
|
// Perform metaphone search |
619
|
|
|
$storeMetaphoneInfoAsWords = !$this->isTableUsed('index_words'); |
620
|
|
|
$res = $this->searchMetaphone($indexerObj->metaphone($sWord, $storeMetaphoneInfoAsWords)); |
621
|
|
|
unset($indexerObj); |
622
|
|
|
break; |
623
|
|
|
case '20': |
624
|
|
|
// Sentence |
625
|
|
|
$res = $this->searchSentence($sWord); |
626
|
|
|
// If there is a fulltext search for a sentence there is |
627
|
|
|
// a likeliness that sorting cannot be done by the rankings |
628
|
|
|
// from the rel-table (because no relations will exist for the |
629
|
|
|
// sentence in the word-table). So therefore mtime is used instead. |
630
|
|
|
// It is not required, but otherwise some hits may be left out. |
631
|
|
|
$this->sortOrder = 'mtime'; |
632
|
|
|
break; |
633
|
|
|
default: |
634
|
|
|
// Distinct word |
635
|
|
|
$res = $this->searchDistinct($sWord); |
636
|
|
|
} |
637
|
|
|
// If there was a query to do, then select all phash-integers which resulted from this. |
638
|
|
|
if ($res) { |
639
|
|
|
// Get phash list by searching for it: |
640
|
|
|
$phashList = []; |
641
|
|
|
while ($row = $res->fetch()) { |
642
|
|
|
$phashList[] = $row['phash']; |
643
|
|
|
} |
644
|
|
|
// Here the phash list are merged with the existing result based on whether we are dealing with OR, NOT or AND operations. |
645
|
|
|
if ($c) { |
646
|
|
|
switch ($v['oper']) { |
647
|
|
|
case 'OR': |
648
|
|
|
$totalHashList = array_unique(array_merge($phashList, $totalHashList)); |
649
|
|
|
break; |
650
|
|
|
case 'AND NOT': |
651
|
|
|
$totalHashList = array_diff($totalHashList, $phashList); |
652
|
|
|
break; |
653
|
|
|
default: |
654
|
|
|
// AND... |
655
|
|
|
$totalHashList = array_intersect($totalHashList, $phashList); |
656
|
|
|
} |
657
|
|
|
} else { |
658
|
|
|
// First search |
659
|
|
|
$totalHashList = $phashList; |
660
|
|
|
} |
661
|
|
|
} |
662
|
|
|
$this->getTimeTracker()->pull(); |
663
|
|
|
$c++; |
664
|
|
|
} |
665
|
|
|
return implode(',', $totalHashList); |
666
|
|
|
} |
667
|
|
|
|
668
|
|
|
/** |
669
|
|
|
* Returns a query which selects the search-word from the word/rel tables. |
670
|
|
|
* |
671
|
|
|
* @param string $wordSel WHERE clause selecting the word from phash |
672
|
|
|
* @param string $additionalWhereClause Additional AND clause in the end of the query. |
673
|
|
|
* @return Statement |
674
|
|
|
*/ |
675
|
|
|
protected function execPHashListQuery($wordSel, $additionalWhereClause = '') |
676
|
|
|
{ |
677
|
|
|
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_words'); |
678
|
|
|
$queryBuilder->select('IR.phash') |
679
|
|
|
->from('index_words', 'IW') |
680
|
|
|
->from('index_rel', 'IR') |
681
|
|
|
->from('index_section', 'ISEC') |
682
|
|
|
->where( |
683
|
|
|
QueryHelper::stripLogicalOperatorPrefix($wordSel), |
684
|
|
|
$queryBuilder->expr()->eq('IW.wid', $queryBuilder->quoteIdentifier('IR.wid')), |
685
|
|
|
$queryBuilder->expr()->eq('ISEC.phash', $queryBuilder->quoteIdentifier('IR.phash')), |
686
|
|
|
QueryHelper::stripLogicalOperatorPrefix($this->sectionTableWhere()), |
687
|
|
|
QueryHelper::stripLogicalOperatorPrefix($additionalWhereClause) |
688
|
|
|
) |
689
|
|
|
->groupBy('IR.phash'); |
690
|
|
|
|
691
|
|
|
return $queryBuilder->execute(); |
692
|
|
|
} |
693
|
|
|
|
694
|
|
|
/** |
695
|
|
|
* Search for a word |
696
|
|
|
* |
697
|
|
|
* @param string $sWord the search word |
698
|
|
|
* @param int $wildcard Bit-field of Utility\LikeWildcard |
699
|
|
|
* @return Statement |
700
|
|
|
*/ |
701
|
|
|
protected function searchWord($sWord, $wildcard) |
702
|
|
|
{ |
703
|
|
|
$likeWildcard = Utility\LikeWildcard::cast($wildcard); |
704
|
|
|
$wSel = $likeWildcard->getLikeQueryPart( |
705
|
|
|
'index_words', |
706
|
|
|
'IW.baseword', |
707
|
|
|
$sWord |
708
|
|
|
); |
709
|
|
|
$this->wSelClauses[] = $wSel; |
710
|
|
|
return $this->execPHashListQuery($wSel, ' AND is_stopword=0'); |
711
|
|
|
} |
712
|
|
|
|
713
|
|
|
/** |
714
|
|
|
* Search for one distinct word |
715
|
|
|
* |
716
|
|
|
* @param string $sWord the search word |
717
|
|
|
* @return Statement |
718
|
|
|
*/ |
719
|
|
|
protected function searchDistinct($sWord) |
720
|
|
|
{ |
721
|
|
|
$expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
722
|
|
|
->getQueryBuilderForTable('index_words') |
723
|
|
|
->expr(); |
724
|
|
|
$wSel = $expressionBuilder->eq('IW.wid', $this->md5inthash($sWord)); |
725
|
|
|
$this->wSelClauses[] = $wSel; |
726
|
|
|
return $this->execPHashListQuery($wSel, $expressionBuilder->eq('is_stopword', 0)); |
727
|
|
|
} |
728
|
|
|
|
729
|
|
|
/** |
730
|
|
|
* Search for a sentence |
731
|
|
|
* |
732
|
|
|
* @param string $sWord the search word |
733
|
|
|
* @return Statement |
734
|
|
|
*/ |
735
|
|
|
protected function searchSentence($sWord) |
736
|
|
|
{ |
737
|
|
|
$this->wSelClauses[] = '1=1'; |
738
|
|
|
$likeWildcard = Utility\LikeWildcard::cast(Utility\LikeWildcard::BOTH); |
739
|
|
|
$likePart = $likeWildcard->getLikeQueryPart( |
740
|
|
|
'index_fulltext', |
741
|
|
|
'IFT.fulltextdata', |
742
|
|
|
$sWord |
743
|
|
|
); |
744
|
|
|
|
745
|
|
|
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_section'); |
746
|
|
|
return $queryBuilder->select('ISEC.phash') |
747
|
|
|
->from('index_section', 'ISEC') |
748
|
|
|
->from('index_fulltext', 'IFT') |
749
|
|
|
->where( |
750
|
|
|
QueryHelper::stripLogicalOperatorPrefix($likePart), |
751
|
|
|
$queryBuilder->expr()->eq('ISEC.phash', $queryBuilder->quoteIdentifier(('IFT.phash'))), |
752
|
|
|
QueryHelper::stripLogicalOperatorPrefix($this->sectionTableWhere()) |
753
|
|
|
) |
754
|
|
|
->groupBy('ISEC.phash') |
755
|
|
|
->execute(); |
756
|
|
|
} |
757
|
|
|
|
758
|
|
|
/** |
759
|
|
|
* Search for a metaphone word |
760
|
|
|
* |
761
|
|
|
* @param string $sWord the search word |
762
|
|
|
* @return Statement |
763
|
|
|
*/ |
764
|
|
|
protected function searchMetaphone($sWord) |
765
|
|
|
{ |
766
|
|
|
$expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
767
|
|
|
->getQueryBuilderForTable('index_words') |
768
|
|
|
->expr(); |
769
|
|
|
$wSel = $expressionBuilder->eq('IW.metaphone', $expressionBuilder->literal($sWord)); |
770
|
|
|
$this->wSelClauses[] = $wSel; |
771
|
|
|
return $this->execPHashListQuery($wSel, $expressionBuilder->eq('is_stopword', 0)); |
772
|
|
|
} |
773
|
|
|
|
774
|
|
|
/** |
775
|
|
|
* Returns AND statement for selection of section in database. (rootlevel 0-2 + page_id) |
776
|
|
|
* |
777
|
|
|
* @return string AND clause for selection of section in database. |
778
|
|
|
*/ |
779
|
|
|
public function sectionTableWhere() |
780
|
|
|
{ |
781
|
|
|
$expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
782
|
|
|
->getQueryBuilderForTable('index_section') |
783
|
|
|
->expr(); |
784
|
|
|
|
785
|
|
|
$whereClause = $expressionBuilder->andX(); |
786
|
|
|
$match = false; |
787
|
|
|
if (!($this->searchRootPageIdList < 0)) { |
788
|
|
|
$whereClause->add( |
789
|
|
|
$expressionBuilder->in('ISEC.rl0', GeneralUtility::intExplode(',', $this->searchRootPageIdList, true)) |
790
|
|
|
); |
791
|
|
|
} |
792
|
|
|
if (substr($this->sections, 0, 4) === 'rl1_') { |
793
|
|
|
$whereClause->add( |
794
|
|
|
$expressionBuilder->in('ISEC.rl1', GeneralUtility::intExplode(',', substr($this->sections, 4))) |
795
|
|
|
); |
796
|
|
|
$match = true; |
797
|
|
|
} elseif (substr($this->sections, 0, 4) === 'rl2_') { |
798
|
|
|
$whereClause->add( |
799
|
|
|
$expressionBuilder->in('ISEC.rl2', GeneralUtility::intExplode(',', substr($this->sections, 4))) |
800
|
|
|
); |
801
|
|
|
$match = true; |
802
|
|
|
} else { |
803
|
|
|
// Traversing user configured fields to see if any of those are used to limit search to a section: |
804
|
|
|
foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'] ?? [] as $fieldName => $rootLineLevel) { |
805
|
|
|
if (substr($this->sections, 0, strlen($fieldName) + 1) == $fieldName . '_') { |
806
|
|
|
$whereClause->add( |
807
|
|
|
$expressionBuilder->in( |
808
|
|
|
'ISEC.' . $fieldName, |
809
|
|
|
GeneralUtility::intExplode(',', substr($this->sections, strlen($fieldName) + 1)) |
810
|
|
|
) |
811
|
|
|
); |
812
|
|
|
$match = true; |
813
|
|
|
break; |
814
|
|
|
} |
815
|
|
|
} |
816
|
|
|
} |
817
|
|
|
// If no match above, test the static types: |
818
|
|
|
if (!$match) { |
819
|
|
|
switch ((string)$this->sections) { |
820
|
|
|
case '-1': |
821
|
|
|
$whereClause->add( |
822
|
|
|
$expressionBuilder->eq('ISEC.page_id', (int)$this->getTypoScriptFrontendController()->id) |
823
|
|
|
); |
824
|
|
|
break; |
825
|
|
|
case '-2': |
826
|
|
|
$whereClause->add($expressionBuilder->eq('ISEC.rl2', 0)); |
827
|
|
|
break; |
828
|
|
|
case '-3': |
829
|
|
|
$whereClause->add($expressionBuilder->gt('ISEC.rl2', 0)); |
830
|
|
|
break; |
831
|
|
|
} |
832
|
|
|
} |
833
|
|
|
|
834
|
|
|
return $whereClause->count() ? ' AND ' . $whereClause : ''; |
835
|
|
|
} |
836
|
|
|
|
837
|
|
|
/** |
838
|
|
|
* Returns AND statement for selection of media type |
839
|
|
|
* |
840
|
|
|
* @return string AND statement for selection of media type |
841
|
|
|
*/ |
842
|
|
|
public function mediaTypeWhere() |
843
|
|
|
{ |
844
|
|
|
$expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
845
|
|
|
->getQueryBuilderForTable('index_phash') |
846
|
|
|
->expr(); |
847
|
|
|
switch ($this->mediaType) { |
848
|
|
|
case '0': |
849
|
|
|
// '0' => 'only TYPO3 pages', |
850
|
|
|
$whereClause = $expressionBuilder->eq('IP.item_type', $expressionBuilder->literal('0')); |
851
|
|
|
break; |
852
|
|
|
case '-2': |
853
|
|
|
// All external documents |
854
|
|
|
$whereClause = $expressionBuilder->neq('IP.item_type', $expressionBuilder->literal('0')); |
855
|
|
|
break; |
856
|
|
|
case false: |
|
|
|
|
857
|
|
|
// Intentional fall-through |
858
|
|
|
case '-1': |
859
|
|
|
// All content |
860
|
|
|
$whereClause = ''; |
861
|
|
|
break; |
862
|
|
|
default: |
863
|
|
|
$whereClause = $expressionBuilder->eq('IP.item_type', $expressionBuilder->literal($this->mediaType)); |
864
|
|
|
} |
865
|
|
|
return $whereClause ? ' AND ' . $whereClause : ''; |
866
|
|
|
} |
867
|
|
|
|
868
|
|
|
/** |
869
|
|
|
* Returns AND statement for selection of language |
870
|
|
|
* |
871
|
|
|
* @return string AND statement for selection of language |
872
|
|
|
*/ |
873
|
|
|
public function languageWhere() |
874
|
|
|
{ |
875
|
|
|
// -1 is the same as ALL language. |
876
|
|
|
if ($this->languageUid < 0) { |
877
|
|
|
return ''; |
878
|
|
|
} |
879
|
|
|
|
880
|
|
|
$expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
881
|
|
|
->getQueryBuilderForTable('index_phash') |
882
|
|
|
->expr(); |
883
|
|
|
|
884
|
|
|
return ' AND ' . $expressionBuilder->eq('IP.sys_language_uid', (int)$this->languageUid); |
885
|
|
|
} |
886
|
|
|
|
887
|
|
|
/** |
888
|
|
|
* Where-clause for free index-uid value. |
889
|
|
|
* |
890
|
|
|
* @param int $freeIndexUid Free Index UID value to limit search to. |
891
|
|
|
* @return string WHERE SQL clause part. |
892
|
|
|
*/ |
893
|
|
|
public function freeIndexUidWhere($freeIndexUid) |
894
|
|
|
{ |
895
|
|
|
$freeIndexUid = (int)$freeIndexUid; |
896
|
|
|
if ($freeIndexUid < 0) { |
897
|
|
|
return ''; |
898
|
|
|
} |
899
|
|
|
// First, look if the freeIndexUid is a meta configuration: |
900
|
|
|
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
901
|
|
|
->getQueryBuilderForTable('index_config'); |
902
|
|
|
$indexCfgRec = $queryBuilder->select('indexcfgs') |
903
|
|
|
->from('index_config') |
904
|
|
|
->where( |
905
|
|
|
$queryBuilder->expr()->eq('type', $queryBuilder->createNamedParameter(5, \PDO::PARAM_INT)), |
906
|
|
|
$queryBuilder->expr()->eq( |
907
|
|
|
'uid', |
908
|
|
|
$queryBuilder->createNamedParameter($freeIndexUid, \PDO::PARAM_INT) |
909
|
|
|
) |
910
|
|
|
) |
911
|
|
|
->execute() |
912
|
|
|
->fetch(); |
913
|
|
|
|
914
|
|
|
if (is_array($indexCfgRec)) { |
915
|
|
|
$refs = GeneralUtility::trimExplode(',', $indexCfgRec['indexcfgs']); |
916
|
|
|
// Default value to protect against empty array. |
917
|
|
|
$list = [-99]; |
918
|
|
|
foreach ($refs as $ref) { |
919
|
|
|
list($table, $uid) = GeneralUtility::revExplode('_', $ref, 2); |
920
|
|
|
$uid = (int)$uid; |
921
|
|
|
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
922
|
|
|
->getQueryBuilderForTable('index_config'); |
923
|
|
|
$queryBuilder->select('uid') |
924
|
|
|
->from('index_config'); |
925
|
|
|
switch ($table) { |
926
|
|
|
case 'index_config': |
927
|
|
|
$idxRec = $queryBuilder |
928
|
|
|
->where( |
929
|
|
|
$queryBuilder->expr()->eq( |
930
|
|
|
'uid', |
931
|
|
|
$queryBuilder->createNamedParameter($uid, \PDO::PARAM_INT) |
932
|
|
|
) |
933
|
|
|
) |
934
|
|
|
->execute() |
935
|
|
|
->fetch(); |
936
|
|
|
if ($idxRec) { |
937
|
|
|
$list[] = $uid; |
938
|
|
|
} |
939
|
|
|
break; |
940
|
|
|
case 'pages': |
941
|
|
|
$indexCfgRecordsFromPid = $queryBuilder |
942
|
|
|
->where( |
943
|
|
|
$queryBuilder->expr()->eq( |
944
|
|
|
'pid', |
945
|
|
|
$queryBuilder->createNamedParameter($uid, \PDO::PARAM_INT) |
946
|
|
|
) |
947
|
|
|
) |
948
|
|
|
->execute(); |
949
|
|
|
while ($idxRec = $indexCfgRecordsFromPid->fetch()) { |
950
|
|
|
$list[] = $idxRec['uid']; |
951
|
|
|
} |
952
|
|
|
break; |
953
|
|
|
} |
954
|
|
|
} |
955
|
|
|
$list = array_unique($list); |
956
|
|
|
} else { |
957
|
|
|
$list = [$freeIndexUid]; |
958
|
|
|
} |
959
|
|
|
|
960
|
|
|
$expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
961
|
|
|
->getQueryBuilderForTable('index_phash') |
962
|
|
|
->expr(); |
963
|
|
|
return ' AND ' . $expressionBuilder->in('IP.freeIndexUid', array_map('intval', $list)); |
964
|
|
|
} |
965
|
|
|
|
966
|
|
|
/** |
967
|
|
|
* Execute final query, based on phash integer list. The main point is sorting the result in the right order. |
968
|
|
|
* |
969
|
|
|
* @param string $list List of phash integers which match the search. |
970
|
|
|
* @param int $freeIndexUid Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content. |
971
|
|
|
* @return Statement |
972
|
|
|
*/ |
973
|
|
|
protected function execFinalQuery($list, $freeIndexUid = -1) |
974
|
|
|
{ |
975
|
|
|
$queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_words'); |
976
|
|
|
$queryBuilder->select('ISEC.*', 'IP.*') |
977
|
|
|
->from('index_phash', 'IP') |
978
|
|
|
->from('index_section', 'ISEC') |
979
|
|
|
->where( |
980
|
|
|
$queryBuilder->expr()->in( |
981
|
|
|
'IP.phash', |
982
|
|
|
$queryBuilder->createNamedParameter( |
983
|
|
|
GeneralUtility::intExplode(',', $list, true), |
984
|
|
|
Connection::PARAM_INT_ARRAY |
985
|
|
|
) |
986
|
|
|
), |
987
|
|
|
QueryHelper::stripLogicalOperatorPrefix($this->mediaTypeWhere()), |
988
|
|
|
QueryHelper::stripLogicalOperatorPrefix($this->languageWhere()), |
989
|
|
|
QueryHelper::stripLogicalOperatorPrefix($this->freeIndexUidWhere($freeIndexUid)), |
990
|
|
|
$queryBuilder->expr()->eq('ISEC.phash', $queryBuilder->quoteIdentifier('IP.phash')) |
991
|
|
|
) |
992
|
|
|
->groupBy( |
993
|
|
|
'IP.phash', |
994
|
|
|
'ISEC.phash', |
995
|
|
|
'ISEC.phash_t3', |
996
|
|
|
'ISEC.rl0', |
997
|
|
|
'ISEC.rl1', |
998
|
|
|
'ISEC.rl2', |
999
|
|
|
'ISEC.page_id', |
1000
|
|
|
'ISEC.uniqid', |
1001
|
|
|
'IP.phash_grouping', |
1002
|
|
|
'IP.data_filename', |
1003
|
|
|
'IP.data_page_id', |
1004
|
|
|
'IP.data_page_reg1', |
1005
|
|
|
'IP.data_page_type', |
1006
|
|
|
'IP.data_page_mp', |
1007
|
|
|
'IP.gr_list', |
1008
|
|
|
'IP.item_type', |
1009
|
|
|
'IP.item_title', |
1010
|
|
|
'IP.item_description', |
1011
|
|
|
'IP.item_mtime', |
1012
|
|
|
'IP.tstamp', |
1013
|
|
|
'IP.item_size', |
1014
|
|
|
'IP.contentHash', |
1015
|
|
|
'IP.crdate', |
1016
|
|
|
'IP.parsetime', |
1017
|
|
|
'IP.sys_language_uid', |
1018
|
|
|
'IP.item_crdate', |
1019
|
|
|
'IP.cHashParams', |
1020
|
|
|
'IP.externalUrl', |
1021
|
|
|
'IP.recordUid', |
1022
|
|
|
'IP.freeIndexUid', |
1023
|
|
|
'IP.freeIndexSetId' |
1024
|
|
|
); |
1025
|
|
|
|
1026
|
|
|
// Setting up methods of filtering results |
1027
|
|
|
// based on page types, access, etc. |
1028
|
|
|
if ($hookObj = $this->hookRequest('execFinalQuery_idList')) { |
1029
|
|
|
// Calling hook for alternative creation of page ID list |
1030
|
|
|
$hookWhere = QueryHelper::stripLogicalOperatorPrefix($hookObj->execFinalQuery_idList($list)); |
1031
|
|
|
if (!empty($hookWhere)) { |
1032
|
|
|
$queryBuilder->andWhere($hookWhere); |
1033
|
|
|
} |
1034
|
|
|
} elseif ($this->joinPagesForQuery) { |
1035
|
|
|
// Alternative to getting all page ids by ->getTreeList() where |
1036
|
|
|
// "excludeSubpages" is NOT respected. |
1037
|
|
|
$queryBuilder->setRestrictions(GeneralUtility::makeInstance(FrontendRestrictionContainer::class)); |
1038
|
|
|
$queryBuilder->from('pages'); |
1039
|
|
|
$queryBuilder->andWhere( |
1040
|
|
|
$queryBuilder->expr()->eq('pages.uid', $queryBuilder->quoteIdentifier('ISEC.page_id')), |
1041
|
|
|
$queryBuilder->expr()->eq( |
1042
|
|
|
'pages.no_search', |
1043
|
|
|
$queryBuilder->createNamedParameter(0, \PDO::PARAM_INT) |
1044
|
|
|
), |
1045
|
|
|
$queryBuilder->expr()->lt( |
1046
|
|
|
'pages.doktype', |
1047
|
|
|
$queryBuilder->createNamedParameter(200, \PDO::PARAM_INT) |
1048
|
|
|
) |
1049
|
|
|
); |
1050
|
|
|
} elseif ($this->searchRootPageIdList >= 0) { |
1051
|
|
|
// Collecting all pages IDs in which to search; |
1052
|
|
|
// filtering out ALL pages that are not accessible due to restriction containers. |
1053
|
|
|
// Does NOT look for "no_search" field! |
1054
|
|
|
$siteIdNumbers = GeneralUtility::intExplode(',', $this->searchRootPageIdList); |
1055
|
|
|
$pageIdList = []; |
1056
|
|
|
foreach ($siteIdNumbers as $rootId) { |
1057
|
|
|
$pageIdList[] = $this->getTypoScriptFrontendController()->cObj->getTreeList(-1 * $rootId, 9999); |
1058
|
|
|
} |
1059
|
|
|
$queryBuilder->andWhere( |
1060
|
|
|
$queryBuilder->expr()->in( |
1061
|
|
|
'ISEC.page_id', |
1062
|
|
|
$queryBuilder->createNamedParameter( |
1063
|
|
|
array_unique(GeneralUtility::intExplode(',', implode(',', $pageIdList), true)), |
1064
|
|
|
Connection::PARAM_INT_ARRAY |
1065
|
|
|
) |
1066
|
|
|
) |
1067
|
|
|
); |
1068
|
|
|
} |
1069
|
|
|
// otherwise select all / disable everything |
1070
|
|
|
// If any of the ranking sortings are selected, we must make a |
1071
|
|
|
// join with the word/rel-table again, because we need to |
1072
|
|
|
// calculate ranking based on all search-words found. |
1073
|
|
|
if (substr($this->sortOrder, 0, 5) === 'rank_') { |
1074
|
|
|
$queryBuilder |
1075
|
|
|
->from('index_words', 'IW') |
1076
|
|
|
->from('index_rel', 'IR') |
1077
|
|
|
->andWhere( |
1078
|
|
|
$queryBuilder->expr()->eq('IW.wid', $queryBuilder->quoteIdentifier('IR.wid')), |
1079
|
|
|
$queryBuilder->expr()->eq('ISEC.phash', $queryBuilder->quoteIdentifier('IR.phash')) |
1080
|
|
|
); |
1081
|
|
|
switch ($this->sortOrder) { |
1082
|
|
|
case 'rank_flag': |
1083
|
|
|
// This gives priority to word-position (max-value) so that words in title, keywords, description counts more than in content. |
1084
|
|
|
// The ordering is refined with the frequency sum as well. |
1085
|
|
|
$queryBuilder |
1086
|
|
|
->addSelectLiteral( |
1087
|
|
|
$queryBuilder->expr()->max('IR.flags', 'order_val1'), |
1088
|
|
|
$queryBuilder->expr()->sum('IR.freq', 'order_val2') |
1089
|
|
|
) |
1090
|
|
|
->orderBy('order_val1', $this->getDescendingSortOrderFlag()) |
1091
|
|
|
->addOrderBy('order_val2', $this->getDescendingSortOrderFlag()); |
1092
|
|
|
break; |
1093
|
|
|
case 'rank_first': |
1094
|
|
|
// Results in average position of search words on page. |
1095
|
|
|
// Must be inversely sorted (low numbers are closer to top) |
1096
|
|
|
$queryBuilder |
1097
|
|
|
->addSelectLiteral($queryBuilder->expr()->avg('IR.first', 'order_val')) |
1098
|
|
|
->orderBy('order_val', $this->getDescendingSortOrderFlag(true)); |
1099
|
|
|
break; |
1100
|
|
|
case 'rank_count': |
1101
|
|
|
// Number of words found |
1102
|
|
|
$queryBuilder |
1103
|
|
|
->addSelectLiteral($queryBuilder->expr()->sum('IR.count', 'order_val')) |
1104
|
|
|
->orderBy('order_val', $this->getDescendingSortOrderFlag()); |
1105
|
|
|
break; |
1106
|
|
|
default: |
1107
|
|
|
// Frequency sum. I'm not sure if this is the best way to do |
1108
|
|
|
// it (make a sum...). Or should it be the average? |
1109
|
|
|
$queryBuilder |
1110
|
|
|
->addSelectLiteral($queryBuilder->expr()->sum('IR.freq', 'order_val')) |
1111
|
|
|
->orderBy('order_val', $this->getDescendingSortOrderFlag()); |
1112
|
|
|
} |
1113
|
|
|
|
1114
|
|
|
if (!empty($this->wSelClauses)) { |
1115
|
|
|
// So, words are combined in an OR statement |
1116
|
|
|
// (no "sentence search" should be done here - may deselect results) |
1117
|
|
|
$wordSel = $queryBuilder->expr()->orX(); |
1118
|
|
|
foreach ($this->wSelClauses as $wSelClause) { |
1119
|
|
|
$wordSel->add(QueryHelper::stripLogicalOperatorPrefix($wSelClause)); |
1120
|
|
|
} |
1121
|
|
|
$queryBuilder->andWhere($wordSel); |
1122
|
|
|
} |
1123
|
|
|
} else { |
1124
|
|
|
// Otherwise, if sorting are done with the pages table or other fields, |
1125
|
|
|
// there is no need for joining with the rel/word tables: |
1126
|
|
|
switch ((string)$this->sortOrder) { |
1127
|
|
|
case 'title': |
1128
|
|
|
$queryBuilder->orderBy('IP.item_title', $this->getDescendingSortOrderFlag()); |
1129
|
|
|
break; |
1130
|
|
|
case 'crdate': |
1131
|
|
|
$queryBuilder->orderBy('IP.item_crdate', $this->getDescendingSortOrderFlag()); |
1132
|
|
|
break; |
1133
|
|
|
case 'mtime': |
1134
|
|
|
$queryBuilder->orderBy('IP.item_mtime', $this->getDescendingSortOrderFlag()); |
1135
|
|
|
break; |
1136
|
|
|
} |
1137
|
|
|
} |
1138
|
|
|
|
1139
|
|
|
return $queryBuilder->execute(); |
1140
|
|
|
} |
1141
|
|
|
|
1142
|
|
|
/** |
1143
|
|
|
* Checking if the resume can be shown for the search result |
1144
|
|
|
* (depending on whether the rights are OK) |
1145
|
|
|
* ? Should it also check for gr_list "0,-1"? |
1146
|
|
|
* |
1147
|
|
|
* @param array $row Result row array. |
1148
|
|
|
* @return bool Returns TRUE if resume can safely be shown |
1149
|
|
|
*/ |
1150
|
|
|
protected function checkResume($row) |
1151
|
|
|
{ |
1152
|
|
|
// If the record is indexed by an indexing configuration, just show it. |
1153
|
|
|
// At least this is needed for external URLs and files. |
1154
|
|
|
// For records we might need to extend this - for instance block display if record is access restricted. |
1155
|
|
|
if ($row['freeIndexUid']) { |
1156
|
|
|
return true; |
1157
|
|
|
} |
1158
|
|
|
// Evaluate regularly indexed pages based on item_type: |
1159
|
|
|
// External media: |
1160
|
|
|
$connection = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable('index_grlist'); |
1161
|
|
|
if ($row['item_type']) { |
1162
|
|
|
// For external media we will check the access of the parent page on which the media was linked from. |
1163
|
|
|
// "phash_t3" is the phash of the parent TYPO3 page row which initiated the indexing of the documents |
1164
|
|
|
// in this section. So, selecting for the grlist records belonging to the parent phash-row where the |
1165
|
|
|
// current users gr_list exists will help us to know. If this is NOT found, there is still a theoretical |
1166
|
|
|
// possibility that another user accessible page would display a link, so maybe the resume of such a |
1167
|
|
|
// document here may be unjustified hidden. But better safe than sorry. |
1168
|
|
|
if (!$this->isTableUsed('index_grlist')) { |
1169
|
|
|
return false; |
1170
|
|
|
} |
1171
|
|
|
|
1172
|
|
|
return (bool)$connection->count( |
1173
|
|
|
'phash', |
1174
|
|
|
'index_grlist', |
1175
|
|
|
[ |
1176
|
|
|
'phash' => (int)$row['phash_t3'], |
1177
|
|
|
'gr_list' => $this->frontendUserGroupList |
1178
|
|
|
] |
1179
|
|
|
); |
1180
|
|
|
} |
1181
|
|
|
// Ordinary TYPO3 pages: |
1182
|
|
|
if ((string)$row['gr_list'] !== (string)$this->frontendUserGroupList) { |
1183
|
|
|
// Selecting for the grlist records belonging to the phash-row where the current users gr_list exists. |
1184
|
|
|
// If it is found it is proof that this user has direct access to the phash-rows content although |
1185
|
|
|
// he did not himself initiate the indexing... |
1186
|
|
|
if (!$this->isTableUsed('index_grlist')) { |
1187
|
|
|
return false; |
1188
|
|
|
} |
1189
|
|
|
|
1190
|
|
|
return (bool)$connection->count( |
1191
|
|
|
'phash', |
1192
|
|
|
'index_grlist', |
1193
|
|
|
[ |
1194
|
|
|
'phash' => (int)$row['phash'], |
1195
|
|
|
'gr_list' => $this->frontendUserGroupList |
1196
|
|
|
] |
1197
|
|
|
); |
1198
|
|
|
} |
1199
|
|
|
return true; |
1200
|
|
|
} |
1201
|
|
|
|
1202
|
|
|
/** |
1203
|
|
|
* Returns "DESC" or "" depending on the settings of the incoming |
1204
|
|
|
* highest/lowest result order (piVars['desc']) |
1205
|
|
|
* |
1206
|
|
|
* @param bool $inverse If TRUE, inverse the order which is defined by piVars['desc'] |
1207
|
|
|
* @return string " DESC" or formerly known as tx_indexedsearch_pi->isDescending |
1208
|
|
|
*/ |
1209
|
|
|
protected function getDescendingSortOrderFlag($inverse = false) |
1210
|
|
|
{ |
1211
|
|
|
$desc = $this->descendingSortOrderFlag; |
1212
|
|
|
if ($inverse) { |
1213
|
|
|
$desc = !$desc; |
1214
|
|
|
} |
1215
|
|
|
return !$desc ? ' DESC' : ''; |
1216
|
|
|
} |
1217
|
|
|
|
1218
|
|
|
/** |
1219
|
|
|
* Returns if an item type is a multipage item type |
1220
|
|
|
* |
1221
|
|
|
* @param string $itemType Item type |
1222
|
|
|
* @return bool TRUE if multipage capable |
1223
|
|
|
*/ |
1224
|
|
|
protected function multiplePagesType($itemType) |
1225
|
|
|
{ |
1226
|
|
|
/** @var \TYPO3\CMS\IndexedSearch\FileContentParser $fileContentParser */ |
1227
|
|
|
$fileContentParser = $this->externalParsers[$itemType]; |
1228
|
|
|
return is_object($fileContentParser) && $fileContentParser->isMultiplePageExtension($itemType); |
1229
|
|
|
} |
1230
|
|
|
|
1231
|
|
|
/** |
1232
|
|
|
* md5 integer hash |
1233
|
|
|
* Using 7 instead of 8 just because that makes the integers lower than |
1234
|
|
|
* 32 bit (28 bit) and so they do not interfere with UNSIGNED integers |
1235
|
|
|
* or PHP-versions which has varying output from the hexdec function. |
1236
|
|
|
* |
1237
|
|
|
* @param string $str String to hash |
1238
|
|
|
* @return int Integer interpretation of the md5 hash of input string. |
1239
|
|
|
*/ |
1240
|
|
|
protected function md5inthash($str) |
1241
|
|
|
{ |
1242
|
|
|
return Utility\IndexedSearchUtility::md5inthash($str); |
1243
|
|
|
} |
1244
|
|
|
|
1245
|
|
|
/** |
1246
|
|
|
* Check if the tables provided are configured for usage. |
1247
|
|
|
* This becomes necessary for extensions that provide additional database |
1248
|
|
|
* functionality like indexed_search_mysql. |
1249
|
|
|
* |
1250
|
|
|
* @param string $table_list Comma-separated list of tables |
1251
|
|
|
* @return bool TRUE if given tables are enabled |
1252
|
|
|
*/ |
1253
|
|
|
protected function isTableUsed($table_list) |
1254
|
|
|
{ |
1255
|
|
|
return Utility\IndexedSearchUtility::isTableUsed($table_list); |
1256
|
|
|
} |
1257
|
|
|
|
1258
|
|
|
/** |
1259
|
|
|
* Returns an object reference to the hook object if any |
1260
|
|
|
* |
1261
|
|
|
* @param string $functionName Name of the function you want to call / hook key |
1262
|
|
|
* @return object|null Hook object, if any. Otherwise NULL. |
1263
|
|
|
*/ |
1264
|
|
|
public function hookRequest($functionName) |
1265
|
|
|
{ |
1266
|
|
|
// Hook: menuConfig_preProcessModMenu |
1267
|
|
|
if ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['pi1_hooks'][$functionName]) { |
1268
|
|
|
$hookObj = GeneralUtility::makeInstance($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['pi1_hooks'][$functionName]); |
1269
|
|
|
if (method_exists($hookObj, $functionName)) { |
1270
|
|
|
$hookObj->pObj = $this; |
1271
|
|
|
return $hookObj; |
1272
|
|
|
} |
1273
|
|
|
} |
1274
|
|
|
return null; |
1275
|
|
|
} |
1276
|
|
|
|
1277
|
|
|
/** |
1278
|
|
|
* Search type |
1279
|
|
|
* e.g. sentence (20), any part of the word (1) |
1280
|
|
|
* |
1281
|
|
|
* @return int |
1282
|
|
|
*/ |
1283
|
|
|
public function getSearchType() |
1284
|
|
|
{ |
1285
|
|
|
return (int)$this->searchType; |
1286
|
|
|
} |
1287
|
|
|
|
1288
|
|
|
/** |
1289
|
|
|
* A list of integer which should be root-pages to search from |
1290
|
|
|
* |
1291
|
|
|
* @return int[] |
1292
|
|
|
*/ |
1293
|
|
|
public function getSearchRootPageIdList() |
1294
|
|
|
{ |
1295
|
|
|
return GeneralUtility::intExplode(',', $this->searchRootPageIdList); |
1296
|
|
|
} |
1297
|
|
|
|
1298
|
|
|
/** |
1299
|
|
|
* Getter for joinPagesForQuery flag |
1300
|
|
|
* enabled through TypoScript 'settings.skipExtendToSubpagesChecking' |
1301
|
|
|
* |
1302
|
|
|
* @return bool |
1303
|
|
|
*/ |
1304
|
|
|
public function getJoinPagesForQuery() |
1305
|
|
|
{ |
1306
|
|
|
return $this->joinPagesForQuery; |
1307
|
|
|
} |
1308
|
|
|
|
1309
|
|
|
/** |
1310
|
|
|
* @return \TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController |
1311
|
|
|
*/ |
1312
|
|
|
protected function getTypoScriptFrontendController() |
1313
|
|
|
{ |
1314
|
|
|
return $GLOBALS['TSFE']; |
1315
|
|
|
} |
1316
|
|
|
|
1317
|
|
|
/** |
1318
|
|
|
* @return TimeTracker |
1319
|
|
|
*/ |
1320
|
|
|
protected function getTimeTracker() |
1321
|
|
|
{ |
1322
|
|
|
return GeneralUtility::makeInstance(TimeTracker::class); |
1323
|
|
|
} |
1324
|
|
|
} |
1325
|
|
|
|
This check looks for assignments to scalar types that may be of the wrong type.
To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.