Passed
Push — Cleanup/CrawlerController ( ef5bda...231e2f )
by Tomas Norre
19:25 queued 04:30
created

UrlService::compileUrls()   A

Complexity

Conditions 6
Paths 6

Size

Total Lines 18
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 6
eloc 10
c 0
b 0
f 0
nc 6
nop 3
dl 0
loc 18
rs 9.2222
1
<?php
2
3
declare(strict_types=1);
4
5
namespace AOE\Crawler\Service;
6
7
/*
8
 * (c) 2020 AOE GmbH <[email protected]>
9
 *
10
 * This file is part of the TYPO3 Crawler Extension.
11
 *
12
 * It is free software; you can redistribute it and/or modify it under
13
 * the terms of the GNU General Public License, either version 2
14
 * of the License, or any later version.
15
 *
16
 * For the full copyright and license information, please read the
17
 * LICENSE.txt file that was distributed with this source code.
18
 *
19
 * The TYPO3 project - inspiring people to share!
20
 */
21
22
use Psr\Http\Message\UriInterface;
23
use TYPO3\CMS\Core\Http\Uri;
24
use TYPO3\CMS\Core\Routing\SiteMatcher;
25
use TYPO3\CMS\Core\Site\Entity\Site;
26
use TYPO3\CMS\Core\Utility\GeneralUtility;
27
28
/**
29
 * @internal since v9.2.5
30
 */
31
class UrlService
32
{
33
    /**
34
     * Build a URL from a Page and the Query String. If the page has a Site configuration, it can be built by using
35
     * the Site instance.
36
     *
37
     * @param int $httpsOrHttp see tx_crawler_configuration.force_ssl
38
     * @throws \TYPO3\CMS\Core\Exception\SiteNotFoundException
39
     * @throws \TYPO3\CMS\Core\Routing\InvalidRouteArgumentsException
40
     */
41
    public function getUrlFromPageAndQueryParameters(int $pageId, string $queryString, ?string $alternativeBaseUrl, int $httpsOrHttp): UriInterface
42
    {
43
        $site = GeneralUtility::makeInstance(SiteMatcher::class)->matchByPageId($pageId);
44
        if ($site instanceof Site) {
45
            $queryString = ltrim($queryString, '?&');
46
            $queryParts = [];
47
            parse_str($queryString, $queryParts);
48
            unset($queryParts['id']);
49
            // workaround as long as we don't have native language support in crawler configurations
50
            if (isset($queryParts['L'])) {
51
                $queryParts['_language'] = $queryParts['L'];
52
                unset($queryParts['L']);
53
                $siteLanguage = $site->getLanguageById((int) $queryParts['_language']);
0 ignored issues
show
Unused Code introduced by
The assignment to $siteLanguage is dead and can be removed.
Loading history...
54
            } else {
55
                $siteLanguage = $site->getDefaultLanguage();
56
            }
57
            $url = $site->getRouter()->generateUri($pageId, $queryParts);
58
            if (! empty($alternativeBaseUrl)) {
59
                $alternativeBaseUrl = new Uri($alternativeBaseUrl);
60
                $url = $url->withHost($alternativeBaseUrl->getHost());
61
                $url = $url->withScheme($alternativeBaseUrl->getScheme());
62
                $url = $url->withPort($alternativeBaseUrl->getPort());
63
                if ($userInfo = $alternativeBaseUrl->getUserInfo()) {
64
                    $url = $url->withUserInfo($userInfo);
65
                }
66
            }
67
        } else {
68
            // Technically this is not possible with site handling, but kept for backwards-compatibility reasons
69
            // Once EXT:crawler is v10-only compatible, this should be removed completely
70
            $baseUrl = ($alternativeBaseUrl ?: GeneralUtility::getIndpEnv('TYPO3_SITE_URL'));
71
            $url = rtrim($baseUrl, '/') . '/index.php' . $queryString;
72
            $url = new Uri($url);
73
        }
74
75
        if ($httpsOrHttp === -1) {
76
            $url = $url->withScheme('http');
77
        } elseif ($httpsOrHttp === 1) {
78
            $url = $url->withScheme('https');
79
        }
80
81
        return $url;
82
    }
83
84
    /**
85
     * Compiling URLs from parameter array (output of expandParameters())
86
     * The number of URLs will be the multiplication of the number of parameter values for each key
87
     *
88
     * @param array $paramArray Output of expandParameters(): Array with keys (GET var names) and for each an array of values
89
     * @param array $urls URLs accumulated in this array (for recursion)
90
     */
91
    public function compileUrls(array $paramArray, array $urls, int $maxUrlToCompile = 10): array
92
    {
93
        if (empty($paramArray)) {
94
            return $urls;
95
        }
96
        $varName = key($paramArray);
97
        $valueSet = array_shift($paramArray);
98
99
        // Traverse value set:
100
        $newUrls = [];
101
        foreach ($urls as $url) {
102
            foreach ($valueSet as $val) {
103
                if (count($newUrls) < $maxUrlToCompile) {
104
                    $newUrls[] = $url . (strcmp((string) $val, '') ? '&' . rawurlencode($varName) . '=' . rawurlencode((string) $val) : '');
105
                }
106
            }
107
        }
108
        return $this->compileUrls($paramArray, $newUrls, $maxUrlToCompile);
109
    }
110
}
111