| 
                    1
                 | 
                                    
                                                     | 
                
                 | 
                <?php  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    2
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    3
                 | 
                                    
                                                     | 
                
                 | 
                declare(strict_types=1);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    4
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    5
                 | 
                                    
                                                     | 
                
                 | 
                namespace AOE\Crawler\Service;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    6
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    7
                 | 
                                    
                                                     | 
                
                 | 
                /*  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    8
                 | 
                                    
                                                     | 
                
                 | 
                 * (c) 2020 AOE GmbH <[email protected]>  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    9
                 | 
                                    
                                                     | 
                
                 | 
                 *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    10
                 | 
                                    
                                                     | 
                
                 | 
                 * This file is part of the TYPO3 Crawler Extension.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    11
                 | 
                                    
                                                     | 
                
                 | 
                 *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    12
                 | 
                                    
                                                     | 
                
                 | 
                 * It is free software; you can redistribute it and/or modify it under  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    13
                 | 
                                    
                                                     | 
                
                 | 
                 * the terms of the GNU General Public License, either version 2  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    14
                 | 
                                    
                                                     | 
                
                 | 
                 * of the License, or any later version.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    15
                 | 
                                    
                                                     | 
                
                 | 
                 *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    16
                 | 
                                    
                                                     | 
                
                 | 
                 * For the full copyright and license information, please read the  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    17
                 | 
                                    
                                                     | 
                
                 | 
                 * LICENSE.txt file that was distributed with this source code.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    18
                 | 
                                    
                                                     | 
                
                 | 
                 *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    19
                 | 
                                    
                                                     | 
                
                 | 
                 * The TYPO3 project - inspiring people to share!  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    20
                 | 
                                    
                                                     | 
                
                 | 
                 */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    21
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    22
                 | 
                                    
                                                     | 
                
                 | 
                use Psr\Http\Message\UriInterface;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    23
                 | 
                                    
                                                     | 
                
                 | 
                use TYPO3\CMS\Core\Http\Uri;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    24
                 | 
                                    
                                                     | 
                
                 | 
                use TYPO3\CMS\Core\Routing\SiteMatcher;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    25
                 | 
                                    
                                                     | 
                
                 | 
                use TYPO3\CMS\Core\Site\Entity\Site;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    26
                 | 
                                    
                                                     | 
                
                 | 
                use TYPO3\CMS\Core\Utility\GeneralUtility;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    27
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    28
                 | 
                                    
                                                     | 
                
                 | 
                /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    29
                 | 
                                    
                                                     | 
                
                 | 
                 * @internal since v9.2.5  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    30
                 | 
                                    
                                                     | 
                
                 | 
                 */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    31
                 | 
                                    
                                                     | 
                
                 | 
                class UrlService  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    32
                 | 
                                    
                                                     | 
                
                 | 
                { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    33
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    34
                 | 
                                    
                                                     | 
                
                 | 
                     * Build a URL from a Page and the Query String. If the page has a Site configuration, it can be built by using  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    35
                 | 
                                    
                                                     | 
                
                 | 
                     * the Site instance.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    36
                 | 
                                    
                                                     | 
                
                 | 
                     *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    37
                 | 
                                    
                                                     | 
                
                 | 
                     * @param int $httpsOrHttp see tx_crawler_configuration.force_ssl  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    38
                 | 
                                    
                                                     | 
                
                 | 
                     * @throws \TYPO3\CMS\Core\Exception\SiteNotFoundException  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    39
                 | 
                                    
                                                     | 
                
                 | 
                     * @throws \TYPO3\CMS\Core\Routing\InvalidRouteArgumentsException  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    40
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    41
                 | 
                                    
                             15                          | 
                
                 | 
                    public function getUrlFromPageAndQueryParameters(int $pageId, string $queryString, ?string $alternativeBaseUrl, int $httpsOrHttp): UriInterface  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    42
                 | 
                                    
                                                     | 
                
                 | 
                    { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    43
                 | 
                                    
                             15                          | 
                
                 | 
                        $site = GeneralUtility::makeInstance(SiteMatcher::class)->matchByPageId($pageId);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    44
                 | 
                                    
                             15                          | 
                
                 | 
                        if ($site instanceof Site) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    45
                 | 
                                    
                             5                          | 
                
                 | 
                            $queryString = ltrim($queryString, '?&');  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    46
                 | 
                                    
                             5                          | 
                
                 | 
                            $queryParts = [];  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    47
                 | 
                                    
                             5                          | 
                
                 | 
                            parse_str($queryString, $queryParts);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    48
                 | 
                                    
                             5                          | 
                
                 | 
                            unset($queryParts['id']);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    49
                 | 
                                    
                                                     | 
                
                 | 
                            // workaround as long as we don't have native language support in crawler configurations  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    50
                 | 
                                    
                             5                          | 
                
                 | 
                            if (isset($queryParts['L'])) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    51
                 | 
                                    
                             1                          | 
                
                 | 
                                $queryParts['_language'] = $queryParts['L'];  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    52
                 | 
                                    
                             1                          | 
                
                 | 
                                unset($queryParts['L']);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    53
                 | 
                                    
                             1                          | 
                
                 | 
                                $siteLanguage = $site->getLanguageById((int) $queryParts['_language']);  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    54
                 | 
                                    
                                                     | 
                
                 | 
                            } else { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    55
                 | 
                                    
                             4                          | 
                
                 | 
                                $siteLanguage = $site->getDefaultLanguage();  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    56
                 | 
                                    
                                                     | 
                
                 | 
                            }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    57
                 | 
                                    
                             5                          | 
                
                 | 
                            $url = $site->getRouter()->generateUri($pageId, $queryParts);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    58
                 | 
                                    
                             5                          | 
                
                 | 
                            if (! empty($alternativeBaseUrl)) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    59
                 | 
                                    
                             2                          | 
                
                 | 
                                $alternativeBaseUrl = new Uri($alternativeBaseUrl);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    60
                 | 
                                    
                             2                          | 
                
                 | 
                                $url = $url->withHost($alternativeBaseUrl->getHost());  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    61
                 | 
                                    
                             2                          | 
                
                 | 
                                $url = $url->withScheme($alternativeBaseUrl->getScheme());  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    62
                 | 
                                    
                             2                          | 
                
                 | 
                                $url = $url->withPort($alternativeBaseUrl->getPort());  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    63
                 | 
                                    
                             2                          | 
                
                 | 
                                if ($userInfo = $alternativeBaseUrl->getUserInfo()) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    64
                 | 
                                    
                             5                          | 
                
                 | 
                                    $url = $url->withUserInfo($userInfo);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    65
                 | 
                                    
                                                     | 
                
                 | 
                                }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    66
                 | 
                                    
                                                     | 
                
                 | 
                            }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    67
                 | 
                                    
                                                     | 
                
                 | 
                        } else { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    68
                 | 
                                    
                                                     | 
                
                 | 
                            // Technically this is not possible with site handling, but kept for backwards-compatibility reasons  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    69
                 | 
                                    
                                                     | 
                
                 | 
                            // Once EXT:crawler is v10-only compatible, this should be removed completely  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    70
                 | 
                                    
                             10                          | 
                
                 | 
                            $baseUrl = ($alternativeBaseUrl ?: GeneralUtility::getIndpEnv('TYPO3_SITE_URL')); | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    71
                 | 
                                    
                             10                          | 
                
                 | 
                            $url = rtrim($baseUrl, '/') . '/index.php' . $queryString;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    72
                 | 
                                    
                             10                          | 
                
                 | 
                            $url = new Uri($url);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    73
                 | 
                                    
                                                     | 
                
                 | 
                        }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    74
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    75
                 | 
                                    
                             15                          | 
                
                 | 
                        if ($httpsOrHttp === -1) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    76
                 | 
                                    
                             1                          | 
                
                 | 
                            $url = $url->withScheme('http'); | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    77
                 | 
                                    
                             14                          | 
                
                 | 
                        } elseif ($httpsOrHttp === 1) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    78
                 | 
                                    
                             7                          | 
                
                 | 
                            $url = $url->withScheme('https'); | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    79
                 | 
                                    
                                                     | 
                
                 | 
                        }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    80
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    81
                 | 
                                    
                             15                          | 
                
                 | 
                        return $url;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    82
                 | 
                                    
                                                     | 
                
                 | 
                    }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    83
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    84
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    85
                 | 
                                    
                                                     | 
                
                 | 
                     * Compiling URLs from parameter array (output of expandParameters())  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    86
                 | 
                                    
                                                     | 
                
                 | 
                     * The number of URLs will be the multiplication of the number of parameter values for each key  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    87
                 | 
                                    
                                                     | 
                
                 | 
                     *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    88
                 | 
                                    
                                                     | 
                
                 | 
                     * @param array $paramArray Output of expandParameters(): Array with keys (GET var names) and for each an array of values  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    89
                 | 
                                    
                                                     | 
                
                 | 
                     * @param array $urls URLs accumulated in this array (for recursion)  | 
            
            
                                                                                                            
                                                                
            
                                    
            
            
                | 
                    90
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    91
                 | 
                                    
                             13                          | 
                
                 | 
                    public function compileUrls(array $paramArray, array $urls, int $maxUrlToCompile = 10): array  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    92
                 | 
                                    
                                                     | 
                
                 | 
                    { | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    93
                 | 
                                    
                             13                          | 
                
                 | 
                        if (empty($paramArray)) { | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    94
                 | 
                                    
                             13                          | 
                
                 | 
                            return $urls;  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    95
                 | 
                                    
                                                     | 
                
                 | 
                        }  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    96
                 | 
                                    
                             12                          | 
                
                 | 
                        $varName = key($paramArray);  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    97
                 | 
                                    
                             12                          | 
                
                 | 
                        $valueSet = array_shift($paramArray);  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    98
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    99
                 | 
                                    
                                                     | 
                
                 | 
                        // Traverse value set:  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    100
                 | 
                                    
                             12                          | 
                
                 | 
                        $newUrls = [];  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    101
                 | 
                                    
                             12                          | 
                
                 | 
                        foreach ($urls as $url) { | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    102
                 | 
                                    
                             11                          | 
                
                 | 
                            foreach ($valueSet as $val) { | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    103
                 | 
                                    
                             10                          | 
                
                 | 
                                if (count($newUrls) < $maxUrlToCompile) { | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    104
                 | 
                                    
                             10                          | 
                
                 | 
                                    $newUrls[] = $url . (strcmp((string) $val, '') ? '&' . rawurlencode($varName) . '=' . rawurlencode((string) $val) : '');  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    105
                 | 
                                    
                                                     | 
                
                 | 
                                }  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    106
                 | 
                                    
                                                     | 
                
                 | 
                            }  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    107
                 | 
                                    
                                                     | 
                
                 | 
                        }  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    108
                 | 
                                    
                             12                          | 
                
                 | 
                        return $this->compileUrls($paramArray, $newUrls, $maxUrlToCompile);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    109
                 | 
                                    
                                                     | 
                
                 | 
                    }  | 
            
            
                                                                                                            
                                                                
            
                                    
            
            
                | 
                    110
                 | 
                                    
                                                     | 
                
                 | 
                }  | 
            
            
                                                        
            
                                    
            
            
                | 
                    111
                 | 
                                    
                                                     | 
                
                 | 
                 |