Passed
Push — Cleanup/CrawlerController ( e10c7c...cb7b19 )
by Tomas Norre
16:53
created

getConfigurationFromDatabase()   B

Complexity

Conditions 9
Paths 6

Size

Total Lines 46
Code Lines 26

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 9
eloc 26
nc 6
nop 2
dl 0
loc 46
rs 8.0555
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace AOE\Crawler\Service;
6
7
/*
8
 * (c) 2021 AOE GmbH <[email protected]>
9
 *
10
 * This file is part of the TYPO3 Crawler Extension.
11
 *
12
 * It is free software; you can redistribute it and/or modify it under
13
 * the terms of the GNU General Public License, either version 2
14
 * of the License, or any later version.
15
 *
16
 * For the full copyright and license information, please read the
17
 * LICENSE.txt file that was distributed with this source code.
18
 *
19
 * The TYPO3 project - inspiring people to share!
20
 */
21
22
use AOE\Crawler\Configuration\ExtensionConfigurationProvider;
23
use AOE\Crawler\Domain\Repository\ConfigurationRepository;
24
use Doctrine\DBAL\Connection;
25
use TYPO3\CMS\Backend\Tree\View\PageTreeView;
26
use TYPO3\CMS\Core\Authentication\BackendUserAuthentication;
27
use TYPO3\CMS\Core\Core\Bootstrap;
28
use TYPO3\CMS\Core\Database\ConnectionPool;
29
use TYPO3\CMS\Core\Database\Query\QueryBuilder;
30
use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
31
use TYPO3\CMS\Core\Database\QueryGenerator;
32
use TYPO3\CMS\Core\Type\Bitmask\Permission;
33
use TYPO3\CMS\Core\TypoScript\Parser\TypoScriptParser;
34
use TYPO3\CMS\Core\Utility\GeneralUtility;
35
use TYPO3\CMS\Core\Utility\MathUtility;
36
use TYPO3\CMS\Extbase\Object\ObjectManager;
37
38
/**
39
 * @internal since v9.2.5
40
 */
41
class ConfigurationService
42
{
43
    /**
44
     * @var BackendUserAuthentication|null
45
     */
46
    private $backendUser;
47
48
    /**
49
     * @var UrlService
50
     */
51
    private $urlService;
52
53
    /**
54
     * @var ConfigurationRepository
55
     */
56
    private $configurationRepository;
57
58
    /**
59
     * @var array
60
     */
61
    private $extensionSettings;
62
63
    public function __construct()
64
    {
65
        $objectManager = GeneralUtility::makeInstance(ObjectManager::class);
66
        $this->urlService = GeneralUtility::makeInstance(UrlService::class);
67
        $this->configurationRepository = $objectManager->get(ConfigurationRepository::class);
68
        $this->extensionSettings = GeneralUtility::makeInstance(ExtensionConfigurationProvider::class)->getExtensionConfiguration();
69
    }
70
71
    public static function removeDisallowedConfigurations(array $allowedConfigurations, array $configurations): array
72
    {
73
        if (! empty($allowedConfigurations)) {
74
            // 	remove configuration that does not match the current selection
75
            foreach ($configurations as $confKey => $confArray) {
76
                if (! in_array($confKey, $allowedConfigurations, true)) {
77
                    unset($configurations[$confKey]);
78
                }
79
            }
80
        }
81
        return $configurations;
82
    }
83
84
    public function getConfigurationFromPageTS(array $pageTSConfig, int $pageId, array $res, string $mountPoint = ''): array
85
    {
86
        $maxUrlsToCompile = MathUtility::forceIntegerInRange($this->extensionSettings['maxCompileUrls'], 1, 1000000000, 10000);
87
        $crawlerCfg = $pageTSConfig['tx_crawler.']['crawlerCfg.']['paramSets.'] ?? [];
88
        foreach ($crawlerCfg as $key => $values) {
89
            if (! is_array($values)) {
90
                continue;
91
            }
92
            $key = str_replace('.', '', $key);
93
            // Sub configuration for a single configuration string:
94
            $subCfg = (array) $crawlerCfg[$key . '.'];
95
            $subCfg['key'] = $key;
96
97
            if (strcmp($subCfg['procInstrFilter'] ?? '', '')) {
98
                $subCfg['procInstrFilter'] = implode(',', GeneralUtility::trimExplode(',', $subCfg['procInstrFilter']));
99
            }
100
            $pidOnlyList = implode(',', GeneralUtility::trimExplode(',', $subCfg['pidsOnly'], true));
101
102
            // process configuration if it is not page-specific or if the specific page is the current page:
103
            // TODO: Check if $pidOnlyList can be kept as Array instead of imploded
104
            if (! strcmp((string) $subCfg['pidsOnly'], '') || GeneralUtility::inList($pidOnlyList, strval($pageId))) {
105
106
                // Explode, process etc.:
107
                $res[$key] = [];
108
                $res[$key]['subCfg'] = $subCfg;
109
                $res[$key]['paramParsed'] = GeneralUtility::explodeUrl2Array($crawlerCfg[$key]);
110
                $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'], $pageId);
111
                $res[$key]['origin'] = 'pagets';
112
113
                $url = '?id=' . $pageId;
114
                $url .= is_string($mountPoint) ? '&MP=' . $mountPoint : '';
115
                $res[$key]['URLs'] = $this->getUrlService()->compileUrls($res[$key]['paramExpanded'], [$url], $maxUrlsToCompile);
116
            }
117
        }
118
        return $res;
119
    }
120
121
    public function getConfigurationFromDatabase(int $pageId, array $res): array
122
    {
123
        $maxUrlsToCompile = MathUtility::forceIntegerInRange($this->extensionSettings['maxCompileUrls'], 1, 1000000000, 10000);
124
125
        $crawlerConfigurations = $this->configurationRepository->getCrawlerConfigurationRecordsFromRootLine($pageId);
126
        foreach ($crawlerConfigurations as $configurationRecord) {
127
128
            // check access to the configuration record
129
            if (empty($configurationRecord['begroups']) || $this->getBackendUser()->isAdmin() || UserService::hasGroupAccess($this->getBackendUser()->user['usergroup_cached_list'], $configurationRecord['begroups'])) {
130
                $pidOnlyList = implode(',', GeneralUtility::trimExplode(',', $configurationRecord['pidsonly'], true));
131
132
                // process configuration if it is not page-specific or if the specific page is the current page:
133
                // TODO: Check if $pidOnlyList can be kept as Array instead of imploded
134
                if (! strcmp($configurationRecord['pidsonly'], '') || GeneralUtility::inList($pidOnlyList, strval($pageId))) {
135
                    $key = $configurationRecord['name'];
136
137
                    // don't overwrite previously defined paramSets
138
                    if (! isset($res[$key])) {
139
140
                        /* @var $TSparserObject TypoScriptParser */
141
                        $TSparserObject = GeneralUtility::makeInstance(TypoScriptParser::class);
142
                        $TSparserObject->parse($configurationRecord['processing_instruction_parameters_ts']);
143
144
                        $subCfg = [
145
                            'procInstrFilter' => $configurationRecord['processing_instruction_filter'],
146
                            'procInstrParams.' => $TSparserObject->setup,
147
                            'baseUrl' => $configurationRecord['base_url'],
148
                            'force_ssl' => (int) $configurationRecord['force_ssl'],
149
                            'userGroups' => $configurationRecord['fegroups'],
150
                            'exclude' => $configurationRecord['exclude'],
151
                            'key' => $key,
152
                        ];
153
154
                        if (! in_array($pageId, $this->expandExcludeString($subCfg['exclude']), true)) {
155
                            $res[$key] = [];
156
                            $res[$key]['subCfg'] = $subCfg;
157
                            $res[$key]['paramParsed'] = GeneralUtility::explodeUrl2Array($configurationRecord['configuration']);
158
                            $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'], $pageId);
159
                            $res[$key]['URLs'] = $this->getUrlService()->compileUrls($res[$key]['paramExpanded'], ['?id=' . $pageId], $maxUrlsToCompile);
160
                            $res[$key]['origin'] = 'tx_crawler_configuration_' . $configurationRecord['uid'];
161
                        }
162
                    }
163
                }
164
            }
165
        }
166
        return $res;
167
    }
168
169
    public function expandExcludeString(string $excludeString): array
170
    {
171
        // internal static caches;
172
        static $expandedExcludeStringCache;
173
        static $treeCache;
174
175
        if (empty($expandedExcludeStringCache[$excludeString])) {
176
            $pidList = [];
177
178
            if (! empty($excludeString)) {
179
                /** @var PageTreeView $tree */
180
                $tree = GeneralUtility::makeInstance(PageTreeView::class);
181
                $tree->init('AND ' . $this->getBackendUser()->getPagePermsClause(Permission::PAGE_SHOW));
182
183
                $excludeParts = GeneralUtility::trimExplode(',', $excludeString);
184
185
                foreach ($excludeParts as $excludePart) {
186
                    [$pid, $depth] = GeneralUtility::trimExplode('+', $excludePart);
187
188
                    // default is "page only" = "depth=0"
189
                    if (empty($depth)) {
190
                        $depth = (strpos($excludePart, '+') !== false) ? 99 : 0;
191
                    }
192
193
                    $pidList[] = (int) $pid;
194
195
                    if ($depth > 0) {
196
                        if (empty($treeCache[$pid][$depth])) {
197
                            $tree->reset();
198
                            $tree->getTree($pid, $depth);
0 ignored issues
show
Bug introduced by
$pid of type string is incompatible with the type integer expected by parameter $uid of TYPO3\CMS\Backend\Tree\V...ractTreeView::getTree(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

198
                            $tree->getTree(/** @scrutinizer ignore-type */ $pid, $depth);
Loading history...
199
                            $treeCache[$pid][$depth] = $tree->tree;
200
                        }
201
202
                        foreach ($treeCache[$pid][$depth] as $data) {
203
                            $pidList[] = (int) $data['row']['uid'];
204
                        }
205
                    }
206
                }
207
            }
208
209
            $expandedExcludeStringCache[$excludeString] = array_unique($pidList);
210
        }
211
212
        return $expandedExcludeStringCache[$excludeString];
213
    }
214
215
    /**
216
     * Will expand the parameters configuration to individual values. This follows a certain syntax of the value of each parameter.
217
     * Syntax of values:
218
     * - Basically: If the value is wrapped in [...] it will be expanded according to the following syntax, otherwise the value is taken literally
219
     * - Configuration is splitted by "|" and the parts are processed individually and finally added together
220
     * - For each configuration part:
221
     *         - "[int]-[int]" = Integer range, will be expanded to all values in between, values included, starting from low to high (max. 1000). Example "1-34" or "-40--30"
222
     *         - "_TABLE:[TCA table name];[_PID:[optional page id, default is current page]];[_ENABLELANG:1]" = Look up of table records from PID, filtering out deleted records. Example "_TABLE:tt_content; _PID:123"
223
     *        _ENABLELANG:1 picks only original records without their language overlays
224
     *         - Default: Literal value
225
     */
226
    private function expandParameters(array $paramArray, int $pid): array
227
    {
228
        // Traverse parameter names:
229
        foreach ($paramArray as $parameter => $parameterValue) {
230
            $parameterValue = trim($parameterValue);
231
232
            // If value is encapsulated in square brackets it means there are some ranges of values to find, otherwise the value is literal
233
            if (strpos($parameterValue, '[') === 0 && substr($parameterValue, -1) === ']') {
234
                // So, find the value inside brackets and reset the paramArray value as an array.
235
                $parameterValue = substr($parameterValue, 1, -1);
236
                $paramArray[$parameter] = [];
237
238
                // Explode parts and traverse them:
239
                $parts = explode('|', $parameterValue);
240
                foreach ($parts as $part) {
241
242
                    // Look for integer range: (fx. 1-34 or -40--30 // reads minus 40 to minus 30)
243
                    if (preg_match('/^(-?[0-9]+)\s*-\s*(-?[0-9]+)$/', trim($part), $reg)) {
244
                        $reg = $this->swapIfFirstIsLargerThanSecond($reg);
245
246
                        // Traverse range, add values:
247
                        // Limit to size of range!
248
                        $runAwayBrake = 1000;
249
                        for ($a = $reg[1]; $a <= $reg[2]; $a++) {
250
                            $paramArray[$parameter][] = $a;
251
                            $runAwayBrake--;
252
                            if ($runAwayBrake <= 0) {
253
                                break;
254
                            }
255
                        }
256
                    } elseif (strpos(trim($part), '_TABLE:') === 0) {
257
258
                        // Parse parameters:
259
                        $subparts = GeneralUtility::trimExplode(';', $part);
260
                        $subpartParams = [];
261
                        foreach ($subparts as $spV) {
262
                            [$pKey, $pVal] = GeneralUtility::trimExplode(':', $spV);
263
                            $subpartParams[$pKey] = $pVal;
264
                        }
265
266
                        // Table exists:
267
                        if (isset($GLOBALS['TCA'][$subpartParams['_TABLE']])) {
268
                            $lookUpPid = isset($subpartParams['_PID']) ? intval($subpartParams['_PID']) : intval($pid);
269
                            $recursiveDepth = isset($subpartParams['_RECURSIVE']) ? intval($subpartParams['_RECURSIVE']) : 0;
270
                            $pidField = isset($subpartParams['_PIDFIELD']) ? trim($subpartParams['_PIDFIELD']) : 'pid';
271
                            $where = $subpartParams['_WHERE'] ?? '';
272
                            $addTable = $subpartParams['_ADDTABLE'] ?? '';
273
274
                            $fieldName = $subpartParams['_FIELD'] ?: 'uid';
275
                            if ($fieldName === 'uid' || $GLOBALS['TCA'][$subpartParams['_TABLE']]['columns'][$fieldName]) {
276
                                $queryBuilder = $this->getQueryBuilder($subpartParams['_TABLE']);
277
278
                                if ($recursiveDepth > 0) {
279
                                    /** @var QueryGenerator $queryGenerator */
280
                                    $queryGenerator = GeneralUtility::makeInstance(QueryGenerator::class);
281
                                    $pidList = $queryGenerator->getTreeList($lookUpPid, $recursiveDepth, 0, 1);
282
                                    $pidArray = GeneralUtility::intExplode(',', $pidList);
283
                                } else {
284
                                    $pidArray = [(string) $lookUpPid];
285
                                }
286
287
                                $queryBuilder->getRestrictions()
288
                                    ->removeAll()
289
                                    ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
290
291
                                $queryBuilder
292
                                    ->select($fieldName)
293
                                    ->from($subpartParams['_TABLE'])
294
                                    ->where(
295
                                        $queryBuilder->expr()->in($pidField, $queryBuilder->createNamedParameter($pidArray, Connection::PARAM_INT_ARRAY)),
296
                                        $where
297
                                    );
298
299
                                if (! empty($addTable)) {
300
                                    // TODO: Check if this works as intended!
301
                                    $queryBuilder->add('from', $addTable);
302
                                }
303
                                $transOrigPointerField = $GLOBALS['TCA'][$subpartParams['_TABLE']]['ctrl']['transOrigPointerField'];
304
305
                                if ($subpartParams['_ENABLELANG'] && $transOrigPointerField) {
306
                                    $queryBuilder->andWhere(
307
                                        $queryBuilder->expr()->lte(
308
                                            $transOrigPointerField,
309
                                            0
310
                                        )
311
                                    );
312
                                }
313
314
                                $statement = $queryBuilder->execute();
315
316
                                $rows = [];
317
                                while ($row = $statement->fetch()) {
318
                                    $rows[$row[$fieldName]] = $row;
319
                                }
320
321
                                if (is_array($rows)) {
322
                                    $paramArray[$parameter] = array_merge($paramArray[$parameter], array_keys($rows));
323
                                }
324
                            }
325
                        }
326
                    } else {
327
                        // Just add value:
328
                        $paramArray[$parameter][] = $part;
329
                    }
330
                    // Hook for processing own expandParameters place holder
331
                    $paramArray = $this->runExpandParametersHook($paramArray, $parameter, $part, $pid);
332
                }
333
334
                // Make unique set of values and sort array by key:
335
                $paramArray[$parameter] = array_unique($paramArray[$parameter]);
336
                ksort($paramArray);
337
            } else {
338
                // Set the literal value as only value in array:
339
                $paramArray[$parameter] = [$parameterValue];
340
            }
341
        }
342
343
        return $paramArray;
344
    }
345
346
    private function swapIfFirstIsLargerThanSecond(array $reg): array
347
    {
348
        // Swap if first is larger than last:
349
        if ($reg[1] > $reg[2]) {
350
            $temp = $reg[2];
351
            $reg[2] = $reg[1];
352
            $reg[1] = $temp;
353
        }
354
355
        return $reg;
356
    }
357
358
    /**
359
     * @return BackendUserAuthentication
360
     */
361
    private function getBackendUser()
362
    {
363
        // Make sure the _cli_ user is loaded
364
        Bootstrap::initializeBackendAuthentication();
365
        if ($this->backendUser === null) {
366
            $this->backendUser = $GLOBALS['BE_USER'];
367
        }
368
        return $this->backendUser;
369
    }
370
371
    /**
372
     * Get querybuilder for given table
373
     *
374
     * @return QueryBuilder
375
     */
376
    private function getQueryBuilder(string $table)
377
    {
378
        return GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($table);
379
    }
380
381
    /**
382
     * @param $parameter
383
     * @param $path
384
     */
385
    private function runExpandParametersHook(array $paramArray, $parameter, $path, int $pid): array
386
    {
387
        if (is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'])) {
388
            $_params = [
389
                'pObj' => &$this,
390
                'paramArray' => &$paramArray,
391
                'currentKey' => $parameter,
392
                'currentValue' => $path,
393
                'pid' => $pid,
394
            ];
395
            foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'] as $_funcRef) {
396
                GeneralUtility::callUserFunction($_funcRef, $_params, $this);
397
            }
398
        }
399
        return $paramArray;
400
    }
401
402
    protected function getUrlService(): UrlService
403
    {
404
        return $this->urlService ?? GeneralUtility::makeInstance(UrlService::class);
405
    }
406
}
407