AOEpeople /
crawler
| 1 | <?php |
||
| 2 | |||
| 3 | declare(strict_types=1); |
||
| 4 | |||
| 5 | namespace AOE\Crawler\Service; |
||
| 6 | |||
| 7 | /* |
||
| 8 | * (c) 2020 AOE GmbH <[email protected]> |
||
| 9 | * |
||
| 10 | * This file is part of the TYPO3 Crawler Extension. |
||
| 11 | * |
||
| 12 | * It is free software; you can redistribute it and/or modify it under |
||
| 13 | * the terms of the GNU General Public License, either version 2 |
||
| 14 | * of the License, or any later version. |
||
| 15 | * |
||
| 16 | * For the full copyright and license information, please read the |
||
| 17 | * LICENSE.txt file that was distributed with this source code. |
||
| 18 | * |
||
| 19 | * The TYPO3 project - inspiring people to share! |
||
| 20 | */ |
||
| 21 | |||
| 22 | use Psr\Http\Message\UriInterface; |
||
| 23 | use TYPO3\CMS\Core\Http\Uri; |
||
| 24 | use TYPO3\CMS\Core\Routing\SiteMatcher; |
||
| 25 | use TYPO3\CMS\Core\Site\Entity\Site; |
||
| 26 | use TYPO3\CMS\Core\Utility\GeneralUtility; |
||
| 27 | |||
| 28 | /** |
||
| 29 | * @internal since v9.2.5 |
||
| 30 | */ |
||
| 31 | class UrlService |
||
| 32 | { |
||
| 33 | /** |
||
| 34 | * Build a URL from a Page and the Query String. If the page has a Site configuration, it can be built by using |
||
| 35 | * the Site instance. |
||
| 36 | * |
||
| 37 | * @param int $httpsOrHttp see tx_crawler_configuration.force_ssl |
||
| 38 | * @throws \TYPO3\CMS\Core\Exception\SiteNotFoundException |
||
| 39 | * @throws \TYPO3\CMS\Core\Routing\InvalidRouteArgumentsException |
||
| 40 | */ |
||
| 41 | 15 | public function getUrlFromPageAndQueryParameters(int $pageId, string $queryString, ?string $alternativeBaseUrl, int $httpsOrHttp): UriInterface |
|
| 42 | { |
||
| 43 | 15 | $site = GeneralUtility::makeInstance(SiteMatcher::class)->matchByPageId($pageId); |
|
| 44 | 15 | if ($site instanceof Site) { |
|
| 45 | 5 | $queryString = ltrim($queryString, '?&'); |
|
| 46 | 5 | $queryParts = []; |
|
| 47 | 5 | parse_str($queryString, $queryParts); |
|
| 48 | 5 | unset($queryParts['id']); |
|
| 49 | // workaround as long as we don't have native language support in crawler configurations |
||
| 50 | 5 | if (isset($queryParts['L'])) { |
|
| 51 | 1 | $queryParts['_language'] = $queryParts['L']; |
|
| 52 | 1 | unset($queryParts['L']); |
|
| 53 | 1 | $siteLanguage = $site->getLanguageById((int) $queryParts['_language']); |
|
|
0 ignored issues
–
show
Unused Code
introduced
by
Loading history...
|
|||
| 54 | } else { |
||
| 55 | 4 | $siteLanguage = $site->getDefaultLanguage(); |
|
| 56 | } |
||
| 57 | 5 | $url = $site->getRouter()->generateUri($pageId, $queryParts); |
|
| 58 | 5 | if (! empty($alternativeBaseUrl)) { |
|
| 59 | 2 | $alternativeBaseUrl = new Uri($alternativeBaseUrl); |
|
| 60 | 2 | $url = $url->withHost($alternativeBaseUrl->getHost()); |
|
| 61 | 2 | $url = $url->withScheme($alternativeBaseUrl->getScheme()); |
|
| 62 | 2 | $url = $url->withPort($alternativeBaseUrl->getPort()); |
|
| 63 | 2 | if ($userInfo = $alternativeBaseUrl->getUserInfo()) { |
|
| 64 | 5 | $url = $url->withUserInfo($userInfo); |
|
| 65 | } |
||
| 66 | } |
||
| 67 | } else { |
||
| 68 | // Technically this is not possible with site handling, but kept for backwards-compatibility reasons |
||
| 69 | // Once EXT:crawler is v10-only compatible, this should be removed completely |
||
| 70 | 10 | $baseUrl = ($alternativeBaseUrl ?: GeneralUtility::getIndpEnv('TYPO3_SITE_URL')); |
|
| 71 | 10 | $url = rtrim($baseUrl, '/') . '/index.php' . $queryString; |
|
| 72 | 10 | $url = new Uri($url); |
|
| 73 | } |
||
| 74 | |||
| 75 | 15 | if ($httpsOrHttp === -1) { |
|
| 76 | 1 | $url = $url->withScheme('http'); |
|
| 77 | 14 | } elseif ($httpsOrHttp === 1) { |
|
| 78 | 7 | $url = $url->withScheme('https'); |
|
| 79 | } |
||
| 80 | |||
| 81 | 15 | return $url; |
|
| 82 | } |
||
| 83 | |||
| 84 | /** |
||
| 85 | * Compiling URLs from parameter array (output of expandParameters()) |
||
| 86 | * The number of URLs will be the multiplication of the number of parameter values for each key |
||
| 87 | * |
||
| 88 | * @param array $paramArray Output of expandParameters(): Array with keys (GET var names) and for each an array of values |
||
| 89 | * @param array $urls URLs accumulated in this array (for recursion) |
||
| 90 | */ |
||
| 91 | 13 | public function compileUrls(array $paramArray, array $urls, int $maxUrlToCompile = 10): array |
|
| 92 | { |
||
| 93 | 13 | if (empty($paramArray)) { |
|
| 94 | 13 | return $urls; |
|
| 95 | } |
||
| 96 | 12 | $varName = key($paramArray); |
|
| 97 | 12 | $valueSet = array_shift($paramArray); |
|
| 98 | |||
| 99 | // Traverse value set: |
||
| 100 | 12 | $newUrls = []; |
|
| 101 | 12 | foreach ($urls as $url) { |
|
| 102 | 11 | foreach ($valueSet as $val) { |
|
| 103 | 10 | if (count($newUrls) < $maxUrlToCompile) { |
|
| 104 | 10 | $newUrls[] = $url . (strcmp((string) $val, '') ? '&' . rawurlencode($varName) . '=' . rawurlencode((string) $val) : ''); |
|
| 105 | } |
||
| 106 | } |
||
| 107 | } |
||
| 108 | 12 | return $this->compileUrls($paramArray, $newUrls, $maxUrlToCompile); |
|
| 109 | } |
||
| 110 | } |
||
| 111 |