1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | namespace AOE\Crawler\Service; |
||
6 | |||
7 | /* |
||
8 | * (c) 2020 AOE GmbH <[email protected]> |
||
9 | * |
||
10 | * This file is part of the TYPO3 Crawler Extension. |
||
11 | * |
||
12 | * It is free software; you can redistribute it and/or modify it under |
||
13 | * the terms of the GNU General Public License, either version 2 |
||
14 | * of the License, or any later version. |
||
15 | * |
||
16 | * For the full copyright and license information, please read the |
||
17 | * LICENSE.txt file that was distributed with this source code. |
||
18 | * |
||
19 | * The TYPO3 project - inspiring people to share! |
||
20 | */ |
||
21 | |||
22 | use Psr\Http\Message\UriInterface; |
||
23 | use TYPO3\CMS\Core\Http\Uri; |
||
24 | use TYPO3\CMS\Core\Routing\SiteMatcher; |
||
25 | use TYPO3\CMS\Core\Site\Entity\Site; |
||
26 | use TYPO3\CMS\Core\Utility\GeneralUtility; |
||
27 | |||
28 | /** |
||
29 | * @internal since v9.2.5 |
||
30 | */ |
||
31 | class UrlService |
||
32 | { |
||
33 | /** |
||
34 | * Build a URL from a Page and the Query String. If the page has a Site configuration, it can be built by using |
||
35 | * the Site instance. |
||
36 | * |
||
37 | * @param int $httpsOrHttp see tx_crawler_configuration.force_ssl |
||
38 | * @throws \TYPO3\CMS\Core\Exception\SiteNotFoundException |
||
39 | * @throws \TYPO3\CMS\Core\Routing\InvalidRouteArgumentsException |
||
40 | */ |
||
41 | 15 | public function getUrlFromPageAndQueryParameters(int $pageId, string $queryString, ?string $alternativeBaseUrl, int $httpsOrHttp): UriInterface |
|
42 | { |
||
43 | 15 | $site = GeneralUtility::makeInstance(SiteMatcher::class)->matchByPageId($pageId); |
|
44 | 15 | if ($site instanceof Site) { |
|
45 | 5 | $queryString = ltrim($queryString, '?&'); |
|
46 | 5 | $queryParts = []; |
|
47 | 5 | parse_str($queryString, $queryParts); |
|
48 | 5 | unset($queryParts['id']); |
|
49 | // workaround as long as we don't have native language support in crawler configurations |
||
50 | 5 | if (isset($queryParts['L'])) { |
|
51 | 1 | $queryParts['_language'] = $queryParts['L']; |
|
52 | 1 | unset($queryParts['L']); |
|
53 | 1 | $siteLanguage = $site->getLanguageById((int) $queryParts['_language']); |
|
0 ignored issues
–
show
Unused Code
introduced
by
![]() |
|||
54 | } else { |
||
55 | 4 | $siteLanguage = $site->getDefaultLanguage(); |
|
56 | } |
||
57 | 5 | $url = $site->getRouter()->generateUri($pageId, $queryParts); |
|
58 | 5 | if (! empty($alternativeBaseUrl)) { |
|
59 | 2 | $alternativeBaseUrl = new Uri($alternativeBaseUrl); |
|
60 | 2 | $url = $url->withHost($alternativeBaseUrl->getHost()); |
|
61 | 2 | $url = $url->withScheme($alternativeBaseUrl->getScheme()); |
|
62 | 2 | $url = $url->withPort($alternativeBaseUrl->getPort()); |
|
63 | 2 | if ($userInfo = $alternativeBaseUrl->getUserInfo()) { |
|
64 | 5 | $url = $url->withUserInfo($userInfo); |
|
65 | } |
||
66 | } |
||
67 | } else { |
||
68 | // Technically this is not possible with site handling, but kept for backwards-compatibility reasons |
||
69 | // Once EXT:crawler is v10-only compatible, this should be removed completely |
||
70 | 10 | $baseUrl = ($alternativeBaseUrl ?: GeneralUtility::getIndpEnv('TYPO3_SITE_URL')); |
|
71 | 10 | $url = rtrim($baseUrl, '/') . '/index.php' . $queryString; |
|
72 | 10 | $url = new Uri($url); |
|
73 | } |
||
74 | |||
75 | 15 | if ($httpsOrHttp === -1) { |
|
76 | 1 | $url = $url->withScheme('http'); |
|
77 | 14 | } elseif ($httpsOrHttp === 1) { |
|
78 | 7 | $url = $url->withScheme('https'); |
|
79 | } |
||
80 | |||
81 | 15 | return $url; |
|
82 | } |
||
83 | |||
84 | /** |
||
85 | * Compiling URLs from parameter array (output of expandParameters()) |
||
86 | * The number of URLs will be the multiplication of the number of parameter values for each key |
||
87 | * |
||
88 | * @param array $paramArray Output of expandParameters(): Array with keys (GET var names) and for each an array of values |
||
89 | * @param array $urls URLs accumulated in this array (for recursion) |
||
90 | */ |
||
91 | 13 | public function compileUrls(array $paramArray, array $urls, int $maxUrlToCompile = 10): array |
|
92 | { |
||
93 | 13 | if (empty($paramArray)) { |
|
94 | 13 | return $urls; |
|
95 | } |
||
96 | 12 | $varName = key($paramArray); |
|
97 | 12 | $valueSet = array_shift($paramArray); |
|
98 | |||
99 | // Traverse value set: |
||
100 | 12 | $newUrls = []; |
|
101 | 12 | foreach ($urls as $url) { |
|
102 | 11 | foreach ($valueSet as $val) { |
|
103 | 10 | if (count($newUrls) < $maxUrlToCompile) { |
|
104 | 10 | $newUrls[] = $url . (strcmp((string) $val, '') ? '&' . rawurlencode($varName) . '=' . rawurlencode((string) $val) : ''); |
|
105 | } |
||
106 | } |
||
107 | } |
||
108 | 12 | return $this->compileUrls($paramArray, $newUrls, $maxUrlToCompile); |
|
109 | } |
||
110 | } |
||
111 |