Passed
Push — Cleanup/CrawlerController ( cb7b19...6de449 )
by Tomas Norre
16:03
created

ConfigurationService::expandExcludeString()   B

Complexity

Conditions 9
Paths 3

Size

Total Lines 44
Code Lines 22

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 9
eloc 22
nc 3
nop 1
dl 0
loc 44
rs 8.0555
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace AOE\Crawler\Service;
6
7
/*
8
 * (c) 2021 AOE GmbH <[email protected]>
9
 *
10
 * This file is part of the TYPO3 Crawler Extension.
11
 *
12
 * It is free software; you can redistribute it and/or modify it under
13
 * the terms of the GNU General Public License, either version 2
14
 * of the License, or any later version.
15
 *
16
 * For the full copyright and license information, please read the
17
 * LICENSE.txt file that was distributed with this source code.
18
 *
19
 * The TYPO3 project - inspiring people to share!
20
 */
21
22
use AOE\Crawler\Configuration\ExtensionConfigurationProvider;
23
use AOE\Crawler\Domain\Repository\ConfigurationRepository;
24
use Doctrine\DBAL\Connection;
25
use TYPO3\CMS\Backend\Tree\View\PageTreeView;
26
use TYPO3\CMS\Core\Authentication\BackendUserAuthentication;
27
use TYPO3\CMS\Core\Core\Bootstrap;
28
use TYPO3\CMS\Core\Database\ConnectionPool;
29
use TYPO3\CMS\Core\Database\Query\QueryBuilder;
30
use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
31
use TYPO3\CMS\Core\Database\QueryGenerator;
32
use TYPO3\CMS\Core\Type\Bitmask\Permission;
33
use TYPO3\CMS\Core\TypoScript\Parser\TypoScriptParser;
34
use TYPO3\CMS\Core\Utility\GeneralUtility;
35
use TYPO3\CMS\Core\Utility\MathUtility;
36
use TYPO3\CMS\Extbase\Object\ObjectManager;
37
38
/**
39
 * @internal since v9.2.5
40
 */
41
class ConfigurationService
42
{
43
    /**
44
     * @var BackendUserAuthentication|null
45
     */
46
    private $backendUser;
47
48
    /**
49
     * @var UrlService
50
     */
51
    private $urlService;
52
53
    /**
54
     * @var ConfigurationRepository
55
     */
56
    private $configurationRepository;
57
58
    /**
59
     * @var array
60
     */
61
    private $extensionSettings;
62
63
    public function __construct()
64
    {
65
        $objectManager = GeneralUtility::makeInstance(ObjectManager::class);
66
        $this->urlService = GeneralUtility::makeInstance(UrlService::class);
67
        $this->configurationRepository = $objectManager->get(ConfigurationRepository::class);
68
        $this->extensionSettings = GeneralUtility::makeInstance(ExtensionConfigurationProvider::class)->getExtensionConfiguration();
69
    }
70
71
    public static function removeDisallowedConfigurations(array $allowedConfigurations, array $configurations): array
72
    {
73
        if (! empty($allowedConfigurations)) {
74
            // 	remove configuration that does not match the current selection
75
            foreach ($configurations as $confKey => $confArray) {
76
                if (! in_array($confKey, $allowedConfigurations, true)) {
77
                    unset($configurations[$confKey]);
78
                }
79
            }
80
        }
81
        return $configurations;
82
    }
83
84
    public function getConfigurationFromPageTS(array $pageTSConfig, int $pageId, array $res, string $mountPoint = ''): array
85
    {
86
        $maxUrlsToCompile = MathUtility::forceIntegerInRange($this->extensionSettings['maxCompileUrls'], 1, 1000000000, 10000);
87
        $crawlerCfg = $pageTSConfig['tx_crawler.']['crawlerCfg.']['paramSets.'] ?? [];
88
        foreach ($crawlerCfg as $key => $values) {
89
            if (! is_array($values)) {
90
                continue;
91
            }
92
            $key = str_replace('.', '', $key);
93
            // Sub configuration for a single configuration string:
94
            $subCfg = (array) $crawlerCfg[$key . '.'];
95
            $subCfg['key'] = $key;
96
97
            if (strcmp($subCfg['procInstrFilter'] ?? '', '')) {
98
                $subCfg['procInstrFilter'] = implode(',', GeneralUtility::trimExplode(',', $subCfg['procInstrFilter']));
99
            }
100
            $pidOnlyList = implode(',', GeneralUtility::trimExplode(',', $subCfg['pidsOnly'], true));
101
102
            // process configuration if it is not page-specific or if the specific page is the current page:
103
            // TODO: Check if $pidOnlyList can be kept as Array instead of imploded
104
            if (! strcmp((string) $subCfg['pidsOnly'], '') || GeneralUtility::inList($pidOnlyList, strval($pageId))) {
105
106
                // Explode, process etc.:
107
                $res[$key] = [];
108
                $res[$key]['subCfg'] = $subCfg;
109
                $res[$key]['paramParsed'] = GeneralUtility::explodeUrl2Array($crawlerCfg[$key]);
110
                $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'], $pageId);
111
                $res[$key]['origin'] = 'pagets';
112
113
                $url = '?id=' . $pageId;
114
                $url .= is_string($mountPoint) ? '&MP=' . $mountPoint : '';
115
                $res[$key]['URLs'] = $this->getUrlService()->compileUrls($res[$key]['paramExpanded'], [$url], $maxUrlsToCompile);
116
            }
117
        }
118
        return $res;
119
    }
120
121
    public function getConfigurationFromDatabase(int $pageId, array $res): array
122
    {
123
        $maxUrlsToCompile = MathUtility::forceIntegerInRange($this->extensionSettings['maxCompileUrls'], 1, 1000000000, 10000);
124
125
        $crawlerConfigurations = $this->configurationRepository->getCrawlerConfigurationRecordsFromRootLine($pageId);
126
        foreach ($crawlerConfigurations as $configurationRecord) {
127
128
            // check access to the configuration record
129
            if (empty($configurationRecord['begroups']) || $this->getBackendUser()->isAdmin() || UserService::hasGroupAccess($this->getBackendUser()->user['usergroup_cached_list'], $configurationRecord['begroups'])) {
130
                $pidOnlyList = implode(',', GeneralUtility::trimExplode(',', $configurationRecord['pidsonly'], true));
131
132
                // process configuration if it is not page-specific or if the specific page is the current page:
133
                // TODO: Check if $pidOnlyList can be kept as Array instead of imploded
134
                if (! strcmp($configurationRecord['pidsonly'], '') || GeneralUtility::inList($pidOnlyList, strval($pageId))) {
135
                    $key = $configurationRecord['name'];
136
137
                    // don't overwrite previously defined paramSets
138
                    if (! isset($res[$key])) {
139
140
                        /* @var $TSparserObject TypoScriptParser */
141
                        $TSparserObject = GeneralUtility::makeInstance(TypoScriptParser::class);
142
                        $TSparserObject->parse($configurationRecord['processing_instruction_parameters_ts']);
143
144
                        $subCfg = [
145
                            'procInstrFilter' => $configurationRecord['processing_instruction_filter'],
146
                            'procInstrParams.' => $TSparserObject->setup,
147
                            'baseUrl' => $configurationRecord['base_url'],
148
                            'force_ssl' => (int) $configurationRecord['force_ssl'],
149
                            'userGroups' => $configurationRecord['fegroups'],
150
                            'exclude' => $configurationRecord['exclude'],
151
                            'key' => $key,
152
                        ];
153
154
                        if (! in_array($pageId, $this->expandExcludeString($subCfg['exclude']), true)) {
155
                            $res[$key] = [];
156
                            $res[$key]['subCfg'] = $subCfg;
157
                            $res[$key]['paramParsed'] = GeneralUtility::explodeUrl2Array($configurationRecord['configuration']);
158
                            $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'], $pageId);
159
                            $res[$key]['URLs'] = $this->getUrlService()->compileUrls($res[$key]['paramExpanded'], ['?id=' . $pageId], $maxUrlsToCompile);
160
                            $res[$key]['origin'] = 'tx_crawler_configuration_' . $configurationRecord['uid'];
161
                        }
162
                    }
163
                }
164
            }
165
        }
166
        return $res;
167
    }
168
169
    public function expandExcludeString(string $excludeString): array
170
    {
171
        // internal static caches;
172
        static $expandedExcludeStringCache;
173
        static $treeCache;
174
175
        if (empty($expandedExcludeStringCache[$excludeString])) {
176
            $pidList = [];
177
178
            if (! empty($excludeString)) {
179
                /** @var PageTreeView $tree */
180
                $tree = GeneralUtility::makeInstance(PageTreeView::class);
181
                $tree->init('AND ' . $this->getBackendUser()->getPagePermsClause(Permission::PAGE_SHOW));
182
183
                $excludeParts = GeneralUtility::trimExplode(',', $excludeString);
184
185
                foreach ($excludeParts as $excludePart) {
186
                    [$pid, $depth] = GeneralUtility::trimExplode('+', $excludePart);
187
188
                    // default is "page only" = "depth=0"
189
                    if (empty($depth)) {
190
                        $depth = (strpos($excludePart, '+') !== false) ? 99 : 0;
191
                    }
192
193
                    $pidList[] = (int) $pid;
194
195
                    if ($depth > 0) {
196
                        if (empty($treeCache[$pid][$depth])) {
197
                            $tree->reset();
198
                            $tree->getTree($pid, $depth);
0 ignored issues
show
Bug introduced by
$pid of type string is incompatible with the type integer expected by parameter $uid of TYPO3\CMS\Backend\Tree\V...ractTreeView::getTree(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

198
                            $tree->getTree(/** @scrutinizer ignore-type */ $pid, $depth);
Loading history...
199
                            $treeCache[$pid][$depth] = $tree->tree;
200
                        }
201
202
                        foreach ($treeCache[$pid][$depth] as $data) {
203
                            $pidList[] = (int) $data['row']['uid'];
204
                        }
205
                    }
206
                }
207
            }
208
209
            $expandedExcludeStringCache[$excludeString] = array_unique($pidList);
210
        }
211
212
        return $expandedExcludeStringCache[$excludeString];
213
    }
214
215
    /**
216
     * Will expand the parameters configuration to individual values. This follows a certain syntax of the value of each parameter.
217
     * Syntax of values:
218
     * - Basically: If the value is wrapped in [...] it will be expanded according to the following syntax, otherwise the value is taken literally
219
     * - Configuration is splitted by "|" and the parts are processed individually and finally added together
220
     * - For each configuration part:
221
     *         - "[int]-[int]" = Integer range, will be expanded to all values in between, values included, starting from low to high (max. 1000). Example "1-34" or "-40--30"
222
     *         - "_TABLE:[TCA table name];[_PID:[optional page id, default is current page]];[_ENABLELANG:1]" = Look up of table records from PID, filtering out deleted records. Example "_TABLE:tt_content; _PID:123"
223
     *        _ENABLELANG:1 picks only original records without their language overlays
224
     *         - Default: Literal value
225
     */
226
    private function expandParameters(array $paramArray, int $pid): array
227
    {
228
        // Traverse parameter names:
229
        foreach ($paramArray as $parameter => $parameterValue) {
230
            $parameterValue = trim($parameterValue);
231
232
            // If value is encapsulated in square brackets it means there are some ranges of values to find, otherwise the value is literal
233
            if ($this->isWrappedInSquareBrackets($parameterValue)) {
234
                // So, find the value inside brackets and reset the paramArray value as an array.
235
                $parameterValue = substr($parameterValue, 1, -1);
236
                $paramArray[$parameter] = [];
237
238
                // Explode parts and traverse them:
239
                $parts = explode('|', $parameterValue);
240
                foreach ($parts as $part) {
241
242
                    // Look for integer range: (fx. 1-34 or -40--30 // reads minus 40 to minus 30)
243
                    if (preg_match('/^(-?[0-9]+)\s*-\s*(-?[0-9]+)$/', trim($part), $reg)) {
244
                        $reg = $this->swapIfFirstIsLargerThanSecond($reg);
245
                        $paramArray = $this->addValuesInRange($reg, $paramArray, $parameter);
246
                    } elseif (strpos(trim($part), '_TABLE:') === 0) {
247
248
                        // Parse parameters:
249
                        $subparts = GeneralUtility::trimExplode(';', $part);
250
                        $subpartParams = [];
251
                        foreach ($subparts as $spV) {
252
                            [$pKey, $pVal] = GeneralUtility::trimExplode(':', $spV);
253
                            $subpartParams[$pKey] = $pVal;
254
                        }
255
256
                        // Table exists:
257
                        if (isset($GLOBALS['TCA'][$subpartParams['_TABLE']])) {
258
                            $lookUpPid = isset($subpartParams['_PID']) ? intval($subpartParams['_PID']) : $pid;
259
                            $recursiveDepth = isset($subpartParams['_RECURSIVE']) ? intval($subpartParams['_RECURSIVE']) : 0;
260
                            $pidField = isset($subpartParams['_PIDFIELD']) ? trim($subpartParams['_PIDFIELD']) : 'pid';
261
                            $where = $subpartParams['_WHERE'] ?? '';
262
                            $addTable = $subpartParams['_ADDTABLE'] ?? '';
263
264
                            $fieldName = $subpartParams['_FIELD'] ?: 'uid';
265
                            if ($fieldName === 'uid' || $GLOBALS['TCA'][$subpartParams['_TABLE']]['columns'][$fieldName]) {
266
                                $queryBuilder = $this->getQueryBuilder($subpartParams['_TABLE']);
267
                                $pidArray = $this->getPidArray($recursiveDepth, $lookUpPid);
268
269
                                $queryBuilder->getRestrictions()
270
                                    ->removeAll()
271
                                    ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
272
273
                                $queryBuilder
274
                                    ->select($fieldName)
275
                                    ->from($subpartParams['_TABLE'])
276
                                    ->where(
277
                                        $queryBuilder->expr()->in($pidField, $queryBuilder->createNamedParameter($pidArray, Connection::PARAM_INT_ARRAY)),
278
                                        $where
279
                                    );
280
281
                                if (! empty($addTable)) {
282
                                    // TODO: Check if this works as intended!
283
                                    $queryBuilder->add('from', $addTable);
284
                                }
285
                                $transOrigPointerField = $GLOBALS['TCA'][$subpartParams['_TABLE']]['ctrl']['transOrigPointerField'];
286
287
                                if ($subpartParams['_ENABLELANG'] && $transOrigPointerField) {
288
                                    $queryBuilder->andWhere(
289
                                        $queryBuilder->expr()->lte(
290
                                            $transOrigPointerField,
291
                                            0
292
                                        )
293
                                    );
294
                                }
295
296
                                $statement = $queryBuilder->execute();
297
298
                                $rows = [];
299
                                while ($row = $statement->fetch()) {
300
                                    $rows[$row[$fieldName]] = $row;
301
                                }
302
303
                                if (is_array($rows)) {
304
                                    $paramArray[$parameter] = array_merge($paramArray[$parameter], array_keys($rows));
305
                                }
306
                            }
307
                        }
308
                    } else {
309
                        // Just add value:
310
                        $paramArray[$parameter][] = $part;
311
                    }
312
                    // Hook for processing own expandParameters place holder
313
                    $paramArray = $this->runExpandParametersHook($paramArray, $parameter, $part, $pid);
314
                }
315
316
                // Make unique set of values and sort array by key:
317
                $paramArray[$parameter] = array_unique($paramArray[$parameter]);
318
                ksort($paramArray);
319
            } else {
320
                // Set the literal value as only value in array:
321
                $paramArray[$parameter] = [$parameterValue];
322
            }
323
        }
324
325
        return $paramArray;
326
    }
327
328
    private function isWrappedInSquareBrackets(string $string): bool
329
    {
330
        return (strpos($string, '[') === 0 && substr($string, -1) === ']');
331
    }
332
333
    private function swapIfFirstIsLargerThanSecond(array $reg): array
334
    {
335
        // Swap if first is larger than last:
336
        if ($reg[1] > $reg[2]) {
337
            $temp = $reg[2];
338
            $reg[2] = $reg[1];
339
            $reg[1] = $temp;
340
        }
341
342
        return $reg;
343
    }
344
345
    /**
346
     * @return BackendUserAuthentication
347
     */
348
    private function getBackendUser()
349
    {
350
        // Make sure the _cli_ user is loaded
351
        Bootstrap::initializeBackendAuthentication();
352
        if ($this->backendUser === null) {
353
            $this->backendUser = $GLOBALS['BE_USER'];
354
        }
355
        return $this->backendUser;
356
    }
357
358
    /**
359
     * Get querybuilder for given table
360
     *
361
     * @return QueryBuilder
362
     */
363
    private function getQueryBuilder(string $table)
364
    {
365
        return GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($table);
366
    }
367
368
    /**
369
     * @param $parameter
370
     * @param $path
371
     */
372
    private function runExpandParametersHook(array $paramArray, $parameter, $path, int $pid): array
373
    {
374
        if (is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'])) {
375
            $_params = [
376
                'pObj' => &$this,
377
                'paramArray' => &$paramArray,
378
                'currentKey' => $parameter,
379
                'currentValue' => $path,
380
                'pid' => $pid,
381
            ];
382
            foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'] as $_funcRef) {
383
                GeneralUtility::callUserFunction($_funcRef, $_params, $this);
384
            }
385
        }
386
        return $paramArray;
387
    }
388
389
    protected function getUrlService(): UrlService
390
    {
391
        return $this->urlService ?? GeneralUtility::makeInstance(UrlService::class);
392
    }
393
394
    private function getPidArray(int $recursiveDepth, int $lookUpPid): array
395
    {
396
        if ($recursiveDepth > 0) {
397
            /** @var QueryGenerator $queryGenerator */
398
            $queryGenerator = GeneralUtility::makeInstance(QueryGenerator::class);
399
            $pidList = $queryGenerator->getTreeList($lookUpPid, $recursiveDepth, 0, 1);
400
            $pidArray = GeneralUtility::intExplode(',', $pidList);
401
        } else {
402
            $pidArray = [$lookUpPid];
403
        }
404
        return $pidArray;
405
    }
406
407
    /**
408
     * @param $parameter
409
     *
410
     * Traverse range, add values:
411
     * Limit to size of range!
412
     */
413
    private function addValuesInRange(array $reg, array $paramArray, $parameter): array
414
    {
415
        $runAwayBrake = 1000;
416
        for ($a = $reg[1]; $a <= $reg[2]; $a++) {
417
            $paramArray[$parameter][] = $a;
418
            $runAwayBrake--;
419
            if ($runAwayBrake <= 0) {
420
                break;
421
            }
422
        }
423
        return $paramArray;
424
    }
425
}
426