Passed
Push — Cleanup/CrawlerController ( ef5bda...231e2f )
by Tomas Norre
19:25 queued 04:30
created

swapIfFirstIsLargerThanSecond()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 10
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 5
c 0
b 0
f 0
nc 2
nop 1
dl 0
loc 10
rs 10
1
<?php
2
3
declare(strict_types=1);
4
5
namespace AOE\Crawler\Service;
6
7
/*
8
 * (c) 2021 AOE GmbH <[email protected]>
9
 *
10
 * This file is part of the TYPO3 Crawler Extension.
11
 *
12
 * It is free software; you can redistribute it and/or modify it under
13
 * the terms of the GNU General Public License, either version 2
14
 * of the License, or any later version.
15
 *
16
 * For the full copyright and license information, please read the
17
 * LICENSE.txt file that was distributed with this source code.
18
 *
19
 * The TYPO3 project - inspiring people to share!
20
 */
21
22
use AOE\Crawler\Configuration\ExtensionConfigurationProvider;
23
use AOE\Crawler\Domain\Repository\ConfigurationRepository;
24
use Doctrine\DBAL\Connection;
25
use TYPO3\CMS\Backend\Tree\View\PageTreeView;
26
use TYPO3\CMS\Core\Authentication\BackendUserAuthentication;
27
use TYPO3\CMS\Core\Core\Bootstrap;
28
use TYPO3\CMS\Core\Database\ConnectionPool;
29
use TYPO3\CMS\Core\Database\Query\QueryBuilder;
30
use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
31
use TYPO3\CMS\Core\Database\QueryGenerator;
32
use TYPO3\CMS\Core\Type\Bitmask\Permission;
33
use TYPO3\CMS\Core\TypoScript\Parser\TypoScriptParser;
34
use TYPO3\CMS\Core\Utility\GeneralUtility;
35
use TYPO3\CMS\Core\Utility\MathUtility;
36
use TYPO3\CMS\Extbase\Object\ObjectManager;
37
38
/**
39
 * @internal since v9.2.5
40
 */
41
class ConfigurationService
42
{
43
    /**
44
     * @var BackendUserAuthentication|null
45
     */
46
    private $backendUser;
47
48
    /**
49
     * @var UrlService
50
     */
51
    private $urlService;
52
53
    /**
54
     * @var ConfigurationRepository
55
     */
56
    private $configurationRepository;
57
58
    /**
59
     * @var array
60
     */
61
    private $extensionSettings;
62
63
    public function __construct()
64
    {
65
        $objectManager = GeneralUtility::makeInstance(ObjectManager::class);
66
        $this->urlService = GeneralUtility::makeInstance(UrlService::class);
67
        $this->configurationRepository = $objectManager->get(ConfigurationRepository::class);
68
        $this->extensionSettings = GeneralUtility::makeInstance(ExtensionConfigurationProvider::class)->getExtensionConfiguration();
69
    }
70
71
    public static function removeDisallowedConfigurations(array $allowedConfigurations, array $configurations): array
72
    {
73
        if (! empty($allowedConfigurations)) {
74
            // 	remove configuration that does not match the current selection
75
            foreach ($configurations as $confKey => $confArray) {
76
                if (! in_array($confKey, $allowedConfigurations, true)) {
77
                    unset($configurations[$confKey]);
78
                }
79
            }
80
        }
81
        return $configurations;
82
    }
83
84
    public function getConfigurationFromPageTS(array $pageTSConfig, int $pageId, array $res, string $mountPoint = ''): array
85
    {
86
        $maxUrlsToCompile = MathUtility::forceIntegerInRange($this->extensionSettings['maxCompileUrls'], 1, 1000000000, 10000);
87
        $crawlerCfg = $pageTSConfig['tx_crawler.']['crawlerCfg.']['paramSets.'] ?? [];
88
        foreach ($crawlerCfg as $key => $values) {
89
            if (! is_array($values)) {
90
                continue;
91
            }
92
            $key = str_replace('.', '', $key);
93
            // Sub configuration for a single configuration string:
94
            $subCfg = (array) $crawlerCfg[$key . '.'];
95
            $subCfg['key'] = $key;
96
97
            if (strcmp($subCfg['procInstrFilter'] ?? '', '')) {
98
                $subCfg['procInstrFilter'] = implode(',', GeneralUtility::trimExplode(',', $subCfg['procInstrFilter']));
99
            }
100
            $pidOnlyList = implode(',', GeneralUtility::trimExplode(',', $subCfg['pidsOnly'], true));
101
102
            // process configuration if it is not page-specific or if the specific page is the current page:
103
            // TODO: Check if $pidOnlyList can be kept as Array instead of imploded
104
            if (! strcmp((string) $subCfg['pidsOnly'], '') || GeneralUtility::inList($pidOnlyList, strval($pageId))) {
105
106
                // Explode, process etc.:
107
                $res[$key] = [];
108
                $res[$key]['subCfg'] = $subCfg;
109
                $res[$key]['paramParsed'] = GeneralUtility::explodeUrl2Array($crawlerCfg[$key]);
110
                $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'], $pageId);
111
                $res[$key]['origin'] = 'pagets';
112
113
                $url = '?id=' . $pageId;
114
                $url .= is_string($mountPoint) ? '&MP=' . $mountPoint : '';
115
                $res[$key]['URLs'] = $this->getUrlService()->compileUrls($res[$key]['paramExpanded'], [$url], $maxUrlsToCompile);
116
            }
117
        }
118
        return $res;
119
    }
120
121
    public function getConfigurationFromDatabase(int $pageId, array $res): array
122
    {
123
        $maxUrlsToCompile = MathUtility::forceIntegerInRange($this->extensionSettings['maxCompileUrls'], 1, 1000000000, 10000);
124
125
        $crawlerConfigurations = $this->configurationRepository->getCrawlerConfigurationRecordsFromRootLine($pageId);
126
        foreach ($crawlerConfigurations as $configurationRecord) {
127
128
            // check access to the configuration record
129
            if (empty($configurationRecord['begroups']) || $this->getBackendUser()->isAdmin() || UserService::hasGroupAccess($this->getBackendUser()->user['usergroup_cached_list'], $configurationRecord['begroups'])) {
130
                $pidOnlyList = implode(',', GeneralUtility::trimExplode(',', $configurationRecord['pidsonly'], true));
131
132
                // process configuration if it is not page-specific or if the specific page is the current page:
133
                // TODO: Check if $pidOnlyList can be kept as Array instead of imploded
134
                if (! strcmp($configurationRecord['pidsonly'], '') || GeneralUtility::inList($pidOnlyList, strval($pageId))) {
135
                    $key = $configurationRecord['name'];
136
137
                    // don't overwrite previously defined paramSets
138
                    if (! isset($res[$key])) {
139
140
                        /* @var $TSparserObject TypoScriptParser */
141
                        $TSparserObject = GeneralUtility::makeInstance(TypoScriptParser::class);
142
                        $TSparserObject->parse($configurationRecord['processing_instruction_parameters_ts']);
143
144
                        $subCfg = [
145
                            'procInstrFilter' => $configurationRecord['processing_instruction_filter'],
146
                            'procInstrParams.' => $TSparserObject->setup,
147
                            'baseUrl' => $configurationRecord['base_url'],
148
                            'force_ssl' => (int) $configurationRecord['force_ssl'],
149
                            'userGroups' => $configurationRecord['fegroups'],
150
                            'exclude' => $configurationRecord['exclude'],
151
                            'key' => $key,
152
                        ];
153
154
                        if (! in_array($pageId, $this->expandExcludeString($subCfg['exclude']), true)) {
155
                            $res[$key] = [];
156
                            $res[$key]['subCfg'] = $subCfg;
157
                            $res[$key]['paramParsed'] = GeneralUtility::explodeUrl2Array($configurationRecord['configuration']);
158
                            $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'], $pageId);
159
                            $res[$key]['URLs'] = $this->getUrlService()->compileUrls($res[$key]['paramExpanded'], ['?id=' . $pageId], $maxUrlsToCompile);
160
                            $res[$key]['origin'] = 'tx_crawler_configuration_' . $configurationRecord['uid'];
161
                        }
162
                    }
163
                }
164
            }
165
        }
166
        return $res;
167
    }
168
169
    public function expandExcludeString(string $excludeString): array
170
    {
171
        // internal static caches;
172
        static $expandedExcludeStringCache;
173
        static $treeCache;
174
175
        if (empty($expandedExcludeStringCache[$excludeString])) {
176
            $pidList = [];
177
178
            if (! empty($excludeString)) {
179
                /** @var PageTreeView $tree */
180
                $tree = GeneralUtility::makeInstance(PageTreeView::class);
181
                $tree->init('AND ' . $this->getBackendUser()->getPagePermsClause(Permission::PAGE_SHOW));
182
183
                $excludeParts = GeneralUtility::trimExplode(',', $excludeString);
184
185
                foreach ($excludeParts as $excludePart) {
186
                    [$pid, $depth] = GeneralUtility::trimExplode('+', $excludePart);
187
188
                    // default is "page only" = "depth=0"
189
                    if (empty($depth)) {
190
                        $depth = (strpos($excludePart, '+') !== false) ? 99 : 0;
191
                    }
192
193
                    $pidList[] = (int) $pid;
194
                    if ($depth > 0) {
195
                        $pidList = $this->expandPidList($treeCache, $pid, $depth, $tree, $pidList);
196
                    }
197
                }
198
            }
199
200
            $expandedExcludeStringCache[$excludeString] = array_unique($pidList);
201
        }
202
203
        return $expandedExcludeStringCache[$excludeString];
204
    }
205
206
    /**
207
     * Will expand the parameters configuration to individual values. This follows a certain syntax of the value of each parameter.
208
     * Syntax of values:
209
     * - Basically: If the value is wrapped in [...] it will be expanded according to the following syntax, otherwise the value is taken literally
210
     * - Configuration is splitted by "|" and the parts are processed individually and finally added together
211
     * - For each configuration part:
212
     *         - "[int]-[int]" = Integer range, will be expanded to all values in between, values included, starting from low to high (max. 1000). Example "1-34" or "-40--30"
213
     *         - "_TABLE:[TCA table name];[_PID:[optional page id, default is current page]];[_ENABLELANG:1]" = Look up of table records from PID, filtering out deleted records. Example "_TABLE:tt_content; _PID:123"
214
     *        _ENABLELANG:1 picks only original records without their language overlays
215
     *         - Default: Literal value
216
     */
217
    private function expandParameters(array $paramArray, int $pid): array
218
    {
219
        // Traverse parameter names:
220
        foreach ($paramArray as $parameter => $parameterValue) {
221
            $parameterValue = trim($parameterValue);
222
223
            // If value is encapsulated in square brackets it means there are some ranges of values to find, otherwise the value is literal
224
            if ($this->isWrappedInSquareBrackets($parameterValue)) {
225
                // So, find the value inside brackets and reset the paramArray value as an array.
226
                $parameterValue = substr($parameterValue, 1, -1);
227
                $paramArray[$parameter] = [];
228
229
                // Explode parts and traverse them:
230
                $parts = explode('|', $parameterValue);
231
                foreach ($parts as $part) {
232
233
                    // Look for integer range: (fx. 1-34 or -40--30 // reads minus 40 to minus 30)
234
                    if (preg_match('/^(-?[0-9]+)\s*-\s*(-?[0-9]+)$/', trim($part), $reg)) {
235
                        $reg = $this->swapIfFirstIsLargerThanSecond($reg);
236
                        $paramArray = $this->addValuesInRange($reg, $paramArray, $parameter);
237
                    } elseif (strpos(trim($part), '_TABLE:') === 0) {
238
239
                        // Parse parameters:
240
                        $subparts = GeneralUtility::trimExplode(';', $part);
241
                        $subpartParams = [];
242
                        foreach ($subparts as $spV) {
243
                            [$pKey, $pVal] = GeneralUtility::trimExplode(':', $spV);
244
                            $subpartParams[$pKey] = $pVal;
245
                        }
246
247
                        // Table exists:
248
                        if (isset($GLOBALS['TCA'][$subpartParams['_TABLE']])) {
249
                            $lookUpPid = isset($subpartParams['_PID']) ? intval($subpartParams['_PID']) : $pid;
250
                            $recursiveDepth = isset($subpartParams['_RECURSIVE']) ? intval($subpartParams['_RECURSIVE']) : 0;
251
                            $pidField = isset($subpartParams['_PIDFIELD']) ? trim($subpartParams['_PIDFIELD']) : 'pid';
252
                            $where = $subpartParams['_WHERE'] ?? '';
253
                            $addTable = $subpartParams['_ADDTABLE'] ?? '';
254
255
                            $fieldName = $subpartParams['_FIELD'] ?: 'uid';
256
                            if ($fieldName === 'uid' || $GLOBALS['TCA'][$subpartParams['_TABLE']]['columns'][$fieldName]) {
257
                                $queryBuilder = $this->getQueryBuilder($subpartParams['_TABLE']);
258
                                $pidArray = $this->getPidArray($recursiveDepth, $lookUpPid);
259
260
                                $queryBuilder->getRestrictions()
261
                                    ->removeAll()
262
                                    ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
263
264
                                $queryBuilder
265
                                    ->select($fieldName)
266
                                    ->from($subpartParams['_TABLE'])
267
                                    ->where(
268
                                        $queryBuilder->expr()->in($pidField, $queryBuilder->createNamedParameter($pidArray, Connection::PARAM_INT_ARRAY)),
269
                                        $where
270
                                    );
271
272
                                if (! empty($addTable)) {
273
                                    // TODO: Check if this works as intended!
274
                                    $queryBuilder->add('from', $addTable);
275
                                }
276
                                $transOrigPointerField = $GLOBALS['TCA'][$subpartParams['_TABLE']]['ctrl']['transOrigPointerField'];
277
278
                                if ($subpartParams['_ENABLELANG'] && $transOrigPointerField) {
279
                                    $queryBuilder->andWhere(
280
                                        $queryBuilder->expr()->lte(
281
                                            $transOrigPointerField,
282
                                            0
283
                                        )
284
                                    );
285
                                }
286
287
                                $statement = $queryBuilder->execute();
288
289
                                $rows = [];
290
                                while ($row = $statement->fetch()) {
291
                                    $rows[$row[$fieldName]] = $row;
292
                                }
293
294
                                if (is_array($rows)) {
295
                                    $paramArray[$parameter] = array_merge($paramArray[$parameter], array_keys($rows));
296
                                }
297
                            }
298
                        }
299
                    } else {
300
                        // Just add value:
301
                        $paramArray[$parameter][] = $part;
302
                    }
303
                    // Hook for processing own expandParameters place holder
304
                    $paramArray = $this->runExpandParametersHook($paramArray, $parameter, $part, $pid);
305
                }
306
307
                // Make unique set of values and sort array by key:
308
                $paramArray[$parameter] = array_unique($paramArray[$parameter]);
309
                ksort($paramArray);
310
            } else {
311
                // Set the literal value as only value in array:
312
                $paramArray[$parameter] = [$parameterValue];
313
            }
314
        }
315
316
        return $paramArray;
317
    }
318
319
    private function isWrappedInSquareBrackets(string $string): bool
320
    {
321
        return (strpos($string, '[') === 0 && substr($string, -1) === ']');
322
    }
323
324
    private function swapIfFirstIsLargerThanSecond(array $reg): array
325
    {
326
        // Swap if first is larger than last:
327
        if ($reg[1] > $reg[2]) {
328
            $temp = $reg[2];
329
            $reg[2] = $reg[1];
330
            $reg[1] = $temp;
331
        }
332
333
        return $reg;
334
    }
335
336
    /**
337
     * @return BackendUserAuthentication
338
     */
339
    private function getBackendUser()
340
    {
341
        // Make sure the _cli_ user is loaded
342
        Bootstrap::initializeBackendAuthentication();
343
        if ($this->backendUser === null) {
344
            $this->backendUser = $GLOBALS['BE_USER'];
345
        }
346
        return $this->backendUser;
347
    }
348
349
    /**
350
     * Get querybuilder for given table
351
     *
352
     * @return QueryBuilder
353
     */
354
    private function getQueryBuilder(string $table)
355
    {
356
        return GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($table);
357
    }
358
359
    /**
360
     * @param $parameter
361
     * @param $path
362
     */
363
    private function runExpandParametersHook(array $paramArray, $parameter, $path, int $pid): array
364
    {
365
        if (is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'])) {
366
            $_params = [
367
                'pObj' => &$this,
368
                'paramArray' => &$paramArray,
369
                'currentKey' => $parameter,
370
                'currentValue' => $path,
371
                'pid' => $pid,
372
            ];
373
            foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'] as $_funcRef) {
374
                GeneralUtility::callUserFunction($_funcRef, $_params, $this);
375
            }
376
        }
377
        return $paramArray;
378
    }
379
380
    protected function getUrlService(): UrlService
381
    {
382
        $this->urlService = $this->urlService ?? GeneralUtility::makeInstance(UrlService::class);
383
        return $this->urlService;
384
    }
385
386
    private function getPidArray(int $recursiveDepth, int $lookUpPid): array
387
    {
388
        if ($recursiveDepth > 0) {
389
            /** @var QueryGenerator $queryGenerator */
390
            $queryGenerator = GeneralUtility::makeInstance(QueryGenerator::class);
391
            $pidList = $queryGenerator->getTreeList($lookUpPid, $recursiveDepth, 0, 1);
392
            $pidArray = GeneralUtility::intExplode(',', $pidList);
393
        } else {
394
            $pidArray = [$lookUpPid];
395
        }
396
        return $pidArray;
397
    }
398
399
    /**
400
     * @param $parameter
401
     *
402
     * Traverse range, add values:
403
     * Limit to size of range!
404
     */
405
    private function addValuesInRange(array $reg, array $paramArray, $parameter): array
406
    {
407
        $runAwayBrake = 1000;
408
        for ($a = $reg[1]; $a <= $reg[2]; $a++) {
409
            $paramArray[$parameter][] = $a;
410
            $runAwayBrake--;
411
            if ($runAwayBrake <= 0) {
412
                break;
413
            }
414
        }
415
        return $paramArray;
416
    }
417
418
    /**
419
     * @param $depth
420
     */
421
    private function expandPidList(array $treeCache, string $pid, $depth, PageTreeView $tree, array $pidList): array
422
    {
423
        if (empty($treeCache[$pid][$depth])) {
424
            $tree->reset();
425
            $tree->getTree($pid, $depth);
0 ignored issues
show
Bug introduced by
$pid of type string is incompatible with the type integer expected by parameter $uid of TYPO3\CMS\Backend\Tree\V...ractTreeView::getTree(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

425
            $tree->getTree(/** @scrutinizer ignore-type */ $pid, $depth);
Loading history...
426
            $treeCache[$pid][$depth] = $tree->tree;
427
        }
428
429
        foreach ($treeCache[$pid][$depth] as $data) {
430
            $pidList[] = (int) $data['row']['uid'];
431
        }
432
        return $pidList;
433
    }
434
}
435