1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace AOE\Crawler\Service; |
6
|
|
|
|
7
|
|
|
/* |
8
|
|
|
* (c) 2020 AOE GmbH <[email protected]> |
9
|
|
|
* |
10
|
|
|
* This file is part of the TYPO3 Crawler Extension. |
11
|
|
|
* |
12
|
|
|
* It is free software; you can redistribute it and/or modify it under |
13
|
|
|
* the terms of the GNU General Public License, either version 2 |
14
|
|
|
* of the License, or any later version. |
15
|
|
|
* |
16
|
|
|
* For the full copyright and license information, please read the |
17
|
|
|
* LICENSE.txt file that was distributed with this source code. |
18
|
|
|
* |
19
|
|
|
* The TYPO3 project - inspiring people to share! |
20
|
|
|
*/ |
21
|
|
|
|
22
|
|
|
use Doctrine\DBAL\Connection; |
23
|
|
|
use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction; |
24
|
|
|
use TYPO3\CMS\Core\Database\QueryGenerator; |
25
|
|
|
use TYPO3\CMS\Core\Utility\GeneralUtility; |
26
|
|
|
|
27
|
|
|
/** |
28
|
|
|
* @internal since v9.2.5 |
29
|
|
|
*/ |
30
|
|
|
class ConfigurationService |
31
|
|
|
{ |
32
|
|
|
/** |
33
|
|
|
* @var UrlService |
34
|
|
|
*/ |
35
|
|
|
private $urlService; |
36
|
|
|
|
37
|
|
|
/** |
38
|
|
|
* @var bool |
39
|
|
|
*/ |
40
|
|
|
private $MP = false; |
41
|
|
|
|
42
|
|
|
public function __construct() |
43
|
|
|
{ |
44
|
|
|
$this->urlService = GeneralUtility::makeInstance(UrlService::class); |
45
|
|
|
} |
46
|
|
|
|
47
|
|
|
public static function removeDisallowedConfigurations(array $allowedConfigurations, array $configurations): array |
48
|
|
|
{ |
49
|
|
|
if (! empty($allowedConfigurations)) { |
50
|
|
|
// remove configuration that does not match the current selection |
51
|
|
|
foreach ($configurations as $confKey => $confArray) { |
52
|
|
|
if (! in_array($confKey, $allowedConfigurations, true)) { |
53
|
|
|
unset($configurations[$confKey]); |
54
|
|
|
} |
55
|
|
|
} |
56
|
|
|
} |
57
|
|
|
return $configurations; |
58
|
|
|
} |
59
|
|
|
|
60
|
|
|
public function getConfigurationFromPageTS(array $pageTSConfig, int $pageId, array $res): array |
61
|
|
|
{ |
62
|
|
|
$crawlerCfg = $pageTSConfig['tx_crawler.']['crawlerCfg.']['paramSets.'] ?? []; |
63
|
|
|
foreach ($crawlerCfg as $key => $values) { |
64
|
|
|
if (! is_array($values)) { |
65
|
|
|
continue; |
66
|
|
|
} |
67
|
|
|
$key = str_replace('.', '', $key); |
68
|
|
|
// Sub configuration for a single configuration string: |
69
|
|
|
$subCfg = (array) $crawlerCfg[$key . '.']; |
70
|
|
|
$subCfg['key'] = $key; |
71
|
|
|
|
72
|
|
|
if (strcmp($subCfg['procInstrFilter'] ?? '', '')) { |
73
|
|
|
$subCfg['procInstrFilter'] = implode(',', GeneralUtility::trimExplode(',', $subCfg['procInstrFilter'])); |
74
|
|
|
} |
75
|
|
|
$pidOnlyList = implode(',', GeneralUtility::trimExplode(',', $subCfg['pidsOnly'], true)); |
76
|
|
|
|
77
|
|
|
// process configuration if it is not page-specific or if the specific page is the current page: |
78
|
|
|
// TODO: Check if $pidOnlyList can be kept as Array instead of imploded |
79
|
|
|
if (! strcmp((string) $subCfg['pidsOnly'], '') || GeneralUtility::inList($pidOnlyList, strval($pageId))) { |
80
|
|
|
|
81
|
|
|
// Explode, process etc.: |
82
|
|
|
$res[$key] = []; |
83
|
|
|
$res[$key]['subCfg'] = $subCfg; |
84
|
|
|
$res[$key]['paramParsed'] = GeneralUtility::explodeUrl2Array($crawlerCfg[$key]); |
85
|
|
|
$res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'], $pageId); |
86
|
|
|
$res[$key]['origin'] = 'pagets'; |
87
|
|
|
|
88
|
|
|
// recognize MP value |
89
|
|
|
if (! $this->MP) { |
90
|
|
|
$res[$key]['URLs'] = $this->urlService->compileUrls($res[$key]['paramExpanded'], ['?id=' . $pageId]); |
91
|
|
|
} else { |
92
|
|
|
$res[$key]['URLs'] = $this->urlService->compileUrls($res[$key]['paramExpanded'], ['?id=' . $pageId . '&MP=' . $this->MP]); |
|
|
|
|
93
|
|
|
} |
94
|
|
|
} |
95
|
|
|
} |
96
|
|
|
return $res; |
97
|
|
|
} |
98
|
|
|
|
99
|
|
|
/** |
100
|
|
|
* Will expand the parameters configuration to individual values. This follows a certain syntax of the value of each parameter. |
101
|
|
|
* Syntax of values: |
102
|
|
|
* - Basically: If the value is wrapped in [...] it will be expanded according to the following syntax, otherwise the value is taken literally |
103
|
|
|
* - Configuration is splitted by "|" and the parts are processed individually and finally added together |
104
|
|
|
* - For each configuration part: |
105
|
|
|
* - "[int]-[int]" = Integer range, will be expanded to all values in between, values included, starting from low to high (max. 1000). Example "1-34" or "-40--30" |
106
|
|
|
* - "_TABLE:[TCA table name];[_PID:[optional page id, default is current page]];[_ENABLELANG:1]" = Look up of table records from PID, filtering out deleted records. Example "_TABLE:tt_content; _PID:123" |
107
|
|
|
* _ENABLELANG:1 picks only original records without their language overlays |
108
|
|
|
* - Default: Literal value |
109
|
|
|
* |
110
|
|
|
* TODO: Write Functional Tests |
111
|
|
|
*/ |
112
|
|
|
public function expandParameters(array $paramArray, int $pid): array |
113
|
|
|
{ |
114
|
|
|
// Traverse parameter names: |
115
|
|
|
foreach ($paramArray as $p => $v) { |
116
|
|
|
$v = trim($v); |
117
|
|
|
|
118
|
|
|
// If value is encapsulated in square brackets it means there are some ranges of values to find, otherwise the value is literal |
119
|
|
|
if (strpos($v, '[') === 0 && substr($v, -1) === ']') { |
120
|
|
|
// So, find the value inside brackets and reset the paramArray value as an array. |
121
|
|
|
$v = substr($v, 1, -1); |
122
|
|
|
$paramArray[$p] = []; |
123
|
|
|
|
124
|
|
|
// Explode parts and traverse them: |
125
|
|
|
$parts = explode('|', $v); |
126
|
|
|
foreach ($parts as $pV) { |
127
|
|
|
|
128
|
|
|
// Look for integer range: (fx. 1-34 or -40--30 // reads minus 40 to minus 30) |
129
|
|
|
if (preg_match('/^(-?[0-9]+)\s*-\s*(-?[0-9]+)$/', trim($pV), $reg)) { |
130
|
|
|
$reg = $this->swapIfFirstIsLargerThanSecond($reg); |
131
|
|
|
|
132
|
|
|
// Traverse range, add values: |
133
|
|
|
// Limit to size of range! |
134
|
|
|
$runAwayBrake = 1000; |
135
|
|
|
for ($a = $reg[1]; $a <= $reg[2]; $a++) { |
136
|
|
|
$paramArray[$p][] = $a; |
137
|
|
|
$runAwayBrake--; |
138
|
|
|
if ($runAwayBrake <= 0) { |
139
|
|
|
break; |
140
|
|
|
} |
141
|
|
|
} |
142
|
|
|
} elseif (strpos(trim($pV), '_TABLE:') === 0) { |
143
|
|
|
|
144
|
|
|
// Parse parameters: |
145
|
|
|
$subparts = GeneralUtility::trimExplode(';', $pV); |
146
|
|
|
$subpartParams = []; |
147
|
|
|
foreach ($subparts as $spV) { |
148
|
|
|
[$pKey, $pVal] = GeneralUtility::trimExplode(':', $spV); |
149
|
|
|
$subpartParams[$pKey] = $pVal; |
150
|
|
|
} |
151
|
|
|
|
152
|
|
|
// Table exists: |
153
|
|
|
if (isset($GLOBALS['TCA'][$subpartParams['_TABLE']])) { |
154
|
|
|
$lookUpPid = isset($subpartParams['_PID']) ? intval($subpartParams['_PID']) : intval($pid); |
155
|
|
|
$recursiveDepth = isset($subpartParams['_RECURSIVE']) ? intval($subpartParams['_RECURSIVE']) : 0; |
156
|
|
|
$pidField = isset($subpartParams['_PIDFIELD']) ? trim($subpartParams['_PIDFIELD']) : 'pid'; |
157
|
|
|
$where = $subpartParams['_WHERE'] ?? ''; |
158
|
|
|
$addTable = $subpartParams['_ADDTABLE'] ?? ''; |
159
|
|
|
|
160
|
|
|
$fieldName = $subpartParams['_FIELD'] ? $subpartParams['_FIELD'] : 'uid'; |
161
|
|
|
if ($fieldName === 'uid' || $GLOBALS['TCA'][$subpartParams['_TABLE']]['columns'][$fieldName]) { |
162
|
|
|
$queryBuilder = $this->getQueryBuilder($subpartParams['_TABLE']); |
|
|
|
|
163
|
|
|
|
164
|
|
|
if ($recursiveDepth > 0) { |
165
|
|
|
/** @var QueryGenerator $queryGenerator */ |
166
|
|
|
$queryGenerator = GeneralUtility::makeInstance(QueryGenerator::class); |
167
|
|
|
$pidList = $queryGenerator->getTreeList($lookUpPid, $recursiveDepth, 0, 1); |
168
|
|
|
$pidArray = GeneralUtility::intExplode(',', $pidList); |
169
|
|
|
} else { |
170
|
|
|
$pidArray = [(string) $lookUpPid]; |
171
|
|
|
} |
172
|
|
|
|
173
|
|
|
$queryBuilder->getRestrictions() |
174
|
|
|
->removeAll() |
175
|
|
|
->add(GeneralUtility::makeInstance(DeletedRestriction::class)); |
176
|
|
|
|
177
|
|
|
$queryBuilder |
178
|
|
|
->select($fieldName) |
179
|
|
|
->from($subpartParams['_TABLE']) |
180
|
|
|
->where( |
181
|
|
|
$queryBuilder->expr()->in($pidField, $queryBuilder->createNamedParameter($pidArray, Connection::PARAM_INT_ARRAY)), |
182
|
|
|
$where |
183
|
|
|
); |
184
|
|
|
|
185
|
|
|
if (! empty($addTable)) { |
186
|
|
|
// TODO: Check if this works as intended! |
187
|
|
|
$queryBuilder->add('from', $addTable); |
188
|
|
|
} |
189
|
|
|
$transOrigPointerField = $GLOBALS['TCA'][$subpartParams['_TABLE']]['ctrl']['transOrigPointerField']; |
190
|
|
|
|
191
|
|
|
if ($subpartParams['_ENABLELANG'] && $transOrigPointerField) { |
192
|
|
|
$queryBuilder->andWhere( |
193
|
|
|
$queryBuilder->expr()->lte( |
194
|
|
|
$transOrigPointerField, |
195
|
|
|
0 |
196
|
|
|
) |
197
|
|
|
); |
198
|
|
|
} |
199
|
|
|
|
200
|
|
|
$statement = $queryBuilder->execute(); |
201
|
|
|
|
202
|
|
|
$rows = []; |
203
|
|
|
while ($row = $statement->fetch()) { |
204
|
|
|
$rows[$row[$fieldName]] = $row; |
205
|
|
|
} |
206
|
|
|
|
207
|
|
|
if (is_array($rows)) { |
208
|
|
|
$paramArray[$p] = array_merge($paramArray[$p], array_keys($rows)); |
209
|
|
|
} |
210
|
|
|
} |
211
|
|
|
} |
212
|
|
|
} else { |
213
|
|
|
// Just add value: |
214
|
|
|
$paramArray[$p][] = $pV; |
215
|
|
|
} |
216
|
|
|
// Hook for processing own expandParameters place holder |
217
|
|
|
if (is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'])) { |
218
|
|
|
$_params = [ |
219
|
|
|
'pObj' => &$this, |
220
|
|
|
'paramArray' => &$paramArray, |
221
|
|
|
'currentKey' => $p, |
222
|
|
|
'currentValue' => $pV, |
223
|
|
|
'pid' => $pid, |
224
|
|
|
]; |
225
|
|
|
foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'] as $_funcRef) { |
226
|
|
|
GeneralUtility::callUserFunction($_funcRef, $_params, $this); |
227
|
|
|
} |
228
|
|
|
} |
229
|
|
|
} |
230
|
|
|
|
231
|
|
|
// Make unique set of values and sort array by key: |
232
|
|
|
$paramArray[$p] = array_unique($paramArray[$p]); |
233
|
|
|
ksort($paramArray); |
234
|
|
|
} else { |
235
|
|
|
// Set the literal value as only value in array: |
236
|
|
|
$paramArray[$p] = [$v]; |
237
|
|
|
} |
238
|
|
|
} |
239
|
|
|
|
240
|
|
|
return $paramArray; |
241
|
|
|
} |
242
|
|
|
|
243
|
|
|
private function swapIfFirstIsLargerThanSecond(array $reg): array |
244
|
|
|
{ |
245
|
|
|
// Swap if first is larger than last: |
246
|
|
|
if ($reg[1] > $reg[2]) { |
247
|
|
|
$temp = $reg[2]; |
248
|
|
|
$reg[2] = $reg[1]; |
249
|
|
|
$reg[1] = $temp; |
250
|
|
|
} |
251
|
|
|
|
252
|
|
|
return $reg; |
253
|
|
|
} |
254
|
|
|
} |
255
|
|
|
|