| 
                    1
                 | 
                                    
                                                     | 
                
                 | 
                <?php  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    2
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    3
                 | 
                                    
                                                     | 
                
                 | 
                namespace vipnytt;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    4
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    5
                 | 
                                    
                                                     | 
                
                 | 
                use GuzzleHttp;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    6
                 | 
                                    
                                                     | 
                
                 | 
                use SimpleXMLElement;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    7
                 | 
                                    
                                                     | 
                
                 | 
                use vipnytt\SitemapParser\Exceptions;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    8
                 | 
                                    
                                                     | 
                
                 | 
                use vipnytt\SitemapParser\UrlParser;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    9
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    10
                 | 
                                    
                                                     | 
                
                 | 
                /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    11
                 | 
                                    
                                                     | 
                
                 | 
                 * SitemapParser class  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    12
                 | 
                                    
                                                     | 
                
                 | 
                 *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    13
                 | 
                                    
                                                     | 
                
                 | 
                 * @license https://opensource.org/licenses/MIT MIT license  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    14
                 | 
                                    
                                                     | 
                
                 | 
                 * @link https://github.com/VIPnytt/SitemapParser  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    15
                 | 
                                    
                                                     | 
                
                 | 
                 *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    16
                 | 
                                    
                                                     | 
                
                 | 
                 * Specifications:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    17
                 | 
                                    
                                                     | 
                
                 | 
                 * @link http://www.sitemaps.org/protocol.html  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    18
                 | 
                                    
                                                     | 
                
                 | 
                 */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    19
                 | 
                                    
                                                     | 
                
                 | 
                class SitemapParser  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    20
                 | 
                                    
                                                     | 
                
                 | 
                { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    21
                 | 
                                    
                                                     | 
                
                 | 
                    use UrlParser;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    22
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    23
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    24
                 | 
                                    
                                                     | 
                
                 | 
                     * Default User-Agent  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    25
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    26
                 | 
                                    
                                                     | 
                
                 | 
                    const DEFAULT_USER_AGENT = 'SitemapParser-VIPnytt/1.1 (+https://github.com/VIPnytt/SitemapParser/blob/master/README.md)';  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    27
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    28
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    29
                 | 
                                    
                                                     | 
                
                 | 
                     * Default encoding  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    30
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    31
                 | 
                                    
                                                     | 
                
                 | 
                    const ENCODING = 'UTF-8';  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    32
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    33
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    34
                 | 
                                    
                                                     | 
                
                 | 
                     * XML file extension  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    35
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    36
                 | 
                                    
                                                     | 
                
                 | 
                    const XML_EXTENSION = 'xml';  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    37
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    38
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    39
                 | 
                                    
                                                     | 
                
                 | 
                     * Compressed XML file extension  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    40
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    41
                 | 
                                    
                                                     | 
                
                 | 
                    const XML_EXTENSION_COMPRESSED = 'xml.gz';  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    42
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    43
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    44
                 | 
                                    
                                                     | 
                
                 | 
                     * XML Sitemap tag  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    45
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    46
                 | 
                                    
                                                     | 
                
                 | 
                    const XML_TAG_SITEMAP = 'sitemap';  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    47
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    48
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    49
                 | 
                                    
                                                     | 
                
                 | 
                     * XML URL tag  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    50
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    51
                 | 
                                    
                                                     | 
                
                 | 
                    const XML_TAG_URL = 'url';  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    52
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    53
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    54
                 | 
                                    
                                                     | 
                
                 | 
                     * Robots.txt path  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    55
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    56
                 | 
                                    
                                                     | 
                
                 | 
                    const ROBOTSTXT_PATH = '/robots.txt';  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    57
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    58
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    59
                 | 
                                    
                                                     | 
                
                 | 
                     * User-Agent to send with every HTTP(S) request  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    60
                 | 
                                    
                                                     | 
                
                 | 
                     * @var string  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    61
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    62
                 | 
                                    
                                                     | 
                
                 | 
                    protected $userAgent;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    63
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    64
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    65
                 | 
                                    
                                                     | 
                
                 | 
                     * Configuration options  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    66
                 | 
                                    
                                                     | 
                
                 | 
                     * @var array  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    67
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    68
                 | 
                                    
                                                     | 
                
                 | 
                    protected $config = [];  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    69
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    70
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    71
                 | 
                                    
                                                     | 
                
                 | 
                     * Sitemaps discovered  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    72
                 | 
                                    
                                                     | 
                
                 | 
                     * @var array  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    73
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    74
                 | 
                                    
                                                     | 
                
                 | 
                    protected $sitemaps = [];  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    75
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    76
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    77
                 | 
                                    
                                                     | 
                
                 | 
                     * URLs discovered  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    78
                 | 
                                    
                                                     | 
                
                 | 
                     * @var array  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    79
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    80
                 | 
                                    
                                                     | 
                
                 | 
                    protected $urls = [];  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    81
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    82
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    83
                 | 
                                    
                                                     | 
                
                 | 
                     * Sitemap URLs discovered but not yet parsed  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    84
                 | 
                                    
                                                     | 
                
                 | 
                     * @var array  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    85
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    86
                 | 
                                    
                                                     | 
                
                 | 
                    protected $queue = [];  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    87
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    88
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    89
                 | 
                                    
                                                     | 
                
                 | 
                     * Parsed URLs history  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    90
                 | 
                                    
                                                     | 
                
                 | 
                     * @var array  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    91
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    92
                 | 
                                    
                                                     | 
                
                 | 
                    protected $history = [];  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    93
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    94
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    95
                 | 
                                    
                                                     | 
                
                 | 
                     * Current URL being parsed  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    96
                 | 
                                    
                                                     | 
                
                 | 
                     * @var null|string  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    97
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    98
                 | 
                                    
                                                     | 
                
                 | 
                    protected $currentURL;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    99
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    100
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    101
                 | 
                                    
                                                     | 
                
                 | 
                     * Constructor  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    102
                 | 
                                    
                                                     | 
                
                 | 
                     *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    103
                 | 
                                    
                                                     | 
                
                 | 
                     * @param string $userAgent User-Agent to send with every HTTP(S) request  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    104
                 | 
                                    
                                                     | 
                
                 | 
                     * @param array $config Configuration options  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    105
                 | 
                                    
                                                     | 
                
                 | 
                     * @throws Exceptions\SitemapParserException  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    106
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    107
                 | 
                                    
                                                     | 
                
                 | 
                    public function __construct($userAgent = self::DEFAULT_USER_AGENT, array $config = [])  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    108
                 | 
                                    
                                                     | 
                
                 | 
                    { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    109
                 | 
                                    
                                                     | 
                
                 | 
                        mb_language("uni"); | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    110
                 | 
                                    
                                                     | 
                
                 | 
                        if (!mb_internal_encoding(self::ENCODING)) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    111
                 | 
                                    
                                                     | 
                
                 | 
                            throw new Exceptions\SitemapParserException('Unable to set internal character encoding to `' . self::ENCODING . '`'); | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    112
                 | 
                                    
                                                     | 
                
                 | 
                        }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    113
                 | 
                                    
                                                     | 
                
                 | 
                        $this->userAgent = $userAgent;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    114
                 | 
                                    
                                                     | 
                
                 | 
                        $this->config = $config;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    115
                 | 
                                    
                                                     | 
                
                 | 
                    }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    116
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    117
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    118
                 | 
                                    
                                                     | 
                
                 | 
                     * Parse Recursive  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    119
                 | 
                                    
                                                     | 
                
                 | 
                     *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    120
                 | 
                                    
                                                     | 
                
                 | 
                     * @param string $url  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    121
                 | 
                                    
                                                     | 
                
                 | 
                     * @return void  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    122
                 | 
                                    
                                                     | 
                
                 | 
                     * @throws Exceptions\SitemapParserException  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    123
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    124
                 | 
                                    
                                                     | 
                
                 | 
                    public function parseRecursive($url)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    125
                 | 
                                    
                                                     | 
                
                 | 
                    { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    126
                 | 
                                    
                                                     | 
                
                 | 
                        $this->addToQueue([$url]);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    127
                 | 
                                    
                                                     | 
                
                 | 
                        while (count($todo = $this->getQueue()) > 0) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    128
                 | 
                                    
                                                     | 
                
                 | 
                            $sitemaps = $this->sitemaps;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    129
                 | 
                                    
                                                     | 
                
                 | 
                            $urls = $this->urls;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    130
                 | 
                                    
                                                     | 
                
                 | 
                            try { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    131
                 | 
                                    
                                                     | 
                
                 | 
                                $this->parse($todo[0]);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    132
                 | 
                                    
                                                     | 
                
                 | 
                            } catch (Exceptions\TransferException $e) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    133
                 | 
                                    
                                                     | 
                
                 | 
                                // Keep crawling  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    134
                 | 
                                    
                                                     | 
                
                 | 
                                continue;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    135
                 | 
                                    
                                                     | 
                
                 | 
                            }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    136
                 | 
                                    
                                                     | 
                
                 | 
                            $this->sitemaps = array_merge_recursive($sitemaps, $this->sitemaps);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    137
                 | 
                                    
                                                     | 
                
                 | 
                            $this->urls = array_merge_recursive($urls, $this->urls);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    138
                 | 
                                    
                                                     | 
                
                 | 
                        }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    139
                 | 
                                    
                                                     | 
                
                 | 
                    }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    140
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    141
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    142
                 | 
                                    
                                                     | 
                
                 | 
                     * Add an array of URLs to the parser queue  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    143
                 | 
                                    
                                                     | 
                
                 | 
                     *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    144
                 | 
                                    
                                                     | 
                
                 | 
                     * @param array $urlArray  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    145
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    146
                 | 
                                    
                                                     | 
                
                 | 
                    public function addToQueue(array $urlArray)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    147
                 | 
                                    
                                                     | 
                
                 | 
                    { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    148
                 | 
                                    
                                                     | 
                
                 | 
                        foreach ($urlArray as $url) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    149
                 | 
                                    
                                                     | 
                
                 | 
                            $url = $this->urlEncode($url);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    150
                 | 
                                    
                                                     | 
                
                 | 
                            if ($this->urlValidate($url)) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    151
                 | 
                                    
                                                     | 
                
                 | 
                                $this->queue[] = $url;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    152
                 | 
                                    
                                                     | 
                
                 | 
                            }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    153
                 | 
                                    
                                                     | 
                
                 | 
                        }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    154
                 | 
                                    
                                                     | 
                
                 | 
                    }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    155
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    156
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    157
                 | 
                                    
                                                     | 
                
                 | 
                     * Sitemap URLs discovered but not yet parsed  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    158
                 | 
                                    
                                                     | 
                
                 | 
                     *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    159
                 | 
                                    
                                                     | 
                
                 | 
                     * @return array  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    160
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    161
                 | 
                                    
                                                     | 
                
                 | 
                    public function getQueue()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    162
                 | 
                                    
                                                     | 
                
                 | 
                    { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    163
                 | 
                                    
                                                     | 
                
                 | 
                        $this->queue = array_values(array_diff(array_unique(array_merge($this->queue, array_keys($this->sitemaps))), $this->history));  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    164
                 | 
                                    
                                                     | 
                
                 | 
                        return $this->queue;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    165
                 | 
                                    
                                                     | 
                
                 | 
                    }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    166
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    167
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    168
                 | 
                                    
                                                     | 
                
                 | 
                     * Parse  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    169
                 | 
                                    
                                                     | 
                
                 | 
                     *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    170
                 | 
                                    
                                                     | 
                
                 | 
                     * @param string $url URL to parse  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    171
                 | 
                                    
                                                     | 
                
                 | 
                     * @param string|null $urlContent URL body content (provide to skip download)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    172
                 | 
                                    
                                                     | 
                
                 | 
                     * @return void  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    173
                 | 
                                    
                                                     | 
                
                 | 
                     * @throws Exceptions\TransferException  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    174
                 | 
                                    
                                                     | 
                
                 | 
                     * @throws Exceptions\SitemapParserException  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    175
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    176
                 | 
                                    
                                                     | 
                
                 | 
                    public function parse($url, $urlContent = null)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    177
                 | 
                                    
                                                     | 
                
                 | 
                    { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    178
                 | 
                                    
                                                     | 
                
                 | 
                        $this->clean();  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    179
                 | 
                                    
                                                     | 
                
                 | 
                        $this->currentURL = $this->urlEncode($url);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    180
                 | 
                                    
                                                     | 
                
                 | 
                        if (!$this->urlValidate($this->currentURL)) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    181
                 | 
                                    
                                                     | 
                
                 | 
                            throw new Exceptions\SitemapParserException('Invalid URL'); | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    182
                 | 
                                    
                                                     | 
                
                 | 
                        }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    183
                 | 
                                    
                                                     | 
                
                 | 
                        $this->history[] = $this->currentURL;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    184
                 | 
                                    
                                                     | 
                
                 | 
                        $response = is_string($urlContent) ? $urlContent : $this->getContent();  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    185
                 | 
                                    
                                                     | 
                
                 | 
                        if (parse_url($this->currentURL, PHP_URL_PATH) === self::ROBOTSTXT_PATH) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    186
                 | 
                                    
                                                     | 
                
                 | 
                            $this->parseRobotstxt($response);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    187
                 | 
                                    
                                                     | 
                
                 | 
                            return;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    188
                 | 
                                    
                                                     | 
                
                 | 
                        }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    189
                 | 
                                    
                                                     | 
                
                 | 
                        // Check if content is an gzip file  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    190
                 | 
                                    
                                                     | 
                
                 | 
                        if (mb_strpos($response, "\x1f\x8b\x08", 0, "US-ASCII") === 0) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    191
                 | 
                                    
                                                     | 
                
                 | 
                            $response = gzdecode($response);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    192
                 | 
                                    
                                                     | 
                
                 | 
                        }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    193
                 | 
                                    
                                                     | 
                
                 | 
                        $sitemapJson = $this->generateXMLObject($response);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    194
                 | 
                                    
                                                     | 
                
                 | 
                        if ($sitemapJson instanceof SimpleXMLElement === false) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    195
                 | 
                                    
                                                     | 
                
                 | 
                            $this->parseString($response);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    196
                 | 
                                    
                                                     | 
                
                 | 
                            return;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    197
                 | 
                                    
                                                     | 
                
                 | 
                        }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    198
                 | 
                                    
                                                     | 
                
                 | 
                        $this->parseJson(self::XML_TAG_SITEMAP, $sitemapJson);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    199
                 | 
                                    
                                                     | 
                
                 | 
                        $this->parseJson(self::XML_TAG_URL, $sitemapJson);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    200
                 | 
                                    
                                                     | 
                
                 | 
                    }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    201
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    202
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    203
                 | 
                                    
                                                     | 
                
                 | 
                     * Cleanup between each parse  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    204
                 | 
                                    
                                                     | 
                
                 | 
                     *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    205
                 | 
                                    
                                                     | 
                
                 | 
                     * @return void  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    206
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    207
                 | 
                                    
                                                     | 
                
                 | 
                    protected function clean()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    208
                 | 
                                    
                                                     | 
                
                 | 
                    { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    209
                 | 
                                    
                                                     | 
                
                 | 
                        $this->sitemaps = [];  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    210
                 | 
                                    
                                                     | 
                
                 | 
                        $this->urls = [];  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    211
                 | 
                                    
                                                     | 
                
                 | 
                    }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    212
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    213
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    214
                 | 
                                    
                                                     | 
                
                 | 
                     * Request the body content of an URL  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    215
                 | 
                                    
                                                     | 
                
                 | 
                     *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    216
                 | 
                                    
                                                     | 
                
                 | 
                     * @return string Raw body content  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    217
                 | 
                                    
                                                     | 
                
                 | 
                     * @throws Exceptions\TransferException  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    218
                 | 
                                    
                                                     | 
                
                 | 
                     * @throws Exceptions\SitemapParserException  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    219
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    220
                 | 
                                    
                                                     | 
                
                 | 
                    protected function getContent()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    221
                 | 
                                    
                                                     | 
                
                 | 
                    { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    222
                 | 
                                    
                                                     | 
                
                 | 
                        $this->currentURL = $this->urlEncode($this->currentURL);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    223
                 | 
                                    
                                                     | 
                
                 | 
                        if (!$this->urlValidate($this->currentURL)) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    224
                 | 
                                    
                                                     | 
                
                 | 
                            throw new Exceptions\SitemapParserException('Invalid URL'); | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    225
                 | 
                                    
                                                     | 
                
                 | 
                        }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    226
                 | 
                                    
                                                     | 
                
                 | 
                        try { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    227
                 | 
                                    
                                                     | 
                
                 | 
                            if (!isset($this->config['guzzle']['headers']['User-Agent'])) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    228
                 | 
                                    
                                                     | 
                
                 | 
                                $this->config['guzzle']['headers']['User-Agent'] = $this->userAgent;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    229
                 | 
                                    
                                                     | 
                
                 | 
                            }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    230
                 | 
                                    
                                                     | 
                
                 | 
                            $client = new GuzzleHttp\Client();  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    231
                 | 
                                    
                                                     | 
                
                 | 
                            $res = $client->request('GET', $this->currentURL, $this->config['guzzle']); | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    232
                 | 
                                    
                                                     | 
                
                 | 
                            return $res->getBody()->getContents();  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    233
                 | 
                                    
                                                     | 
                
                 | 
                        } catch (GuzzleHttp\Exception\TransferException $e) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    234
                 | 
                                    
                                                     | 
                
                 | 
                            throw new Exceptions\TransferException('Unable to fetch URL contents', 0, $e); | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    235
                 | 
                                    
                                                     | 
                
                 | 
                        } catch (GuzzleHttp\Exception\GuzzleException $e) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    236
                 | 
                                    
                                                     | 
                
                 | 
                            throw new Exceptions\SitemapParserException('GuzzleHttp exception', 0, $e); | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    237
                 | 
                                    
                                                     | 
                
                 | 
                        }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    238
                 | 
                                    
                                                     | 
                
                 | 
                    }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    239
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    240
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    241
                 | 
                                    
                                                     | 
                
                 | 
                     * Search for sitemaps in the robots.txt content  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    242
                 | 
                                    
                                                     | 
                
                 | 
                     *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    243
                 | 
                                    
                                                     | 
                
                 | 
                     * @param string $robotstxt  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    244
                 | 
                                    
                                                     | 
                
                 | 
                     * @return bool  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    245
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    246
                 | 
                                    
                                                     | 
                
                 | 
                    protected function parseRobotstxt($robotstxt)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    247
                 | 
                                    
                                                     | 
                
                 | 
                    { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    248
                 | 
                                    
                                                     | 
                
                 | 
                        // Split lines into array  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    249
                 | 
                                    
                                                     | 
                
                 | 
                        $lines = array_filter(array_map('trim', mb_split('\r\n|\n|\r', $robotstxt))); | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    250
                 | 
                                    
                                                     | 
                
                 | 
                        // Parse each line individually  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    251
                 | 
                                    
                                                     | 
                
                 | 
                        foreach ($lines as $line) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    252
                 | 
                                    
                                                     | 
                
                 | 
                            // Remove comments  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    253
                 | 
                                    
                                                     | 
                
                 | 
                            $line = mb_split('#', $line, 2)[0]; | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    254
                 | 
                                    
                                                     | 
                
                 | 
                            // Split by directive and rule  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    255
                 | 
                                    
                                                     | 
                
                 | 
                            $pair = array_map('trim', mb_split(':', $line, 2)); | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    256
                 | 
                                    
                                                     | 
                
                 | 
                            // Check if the line contains a sitemap  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    257
                 | 
                                    
                                                     | 
                
                 | 
                            if (  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    258
                 | 
                                    
                                                     | 
                
                 | 
                                mb_strtolower($pair[0]) !== self::XML_TAG_SITEMAP ||  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    259
                 | 
                                    
                                                     | 
                
                 | 
                                empty($pair[1])  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    260
                 | 
                                    
                                                     | 
                
                 | 
                            ) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    261
                 | 
                                    
                                                     | 
                
                 | 
                                // Line does not contain any supported directive  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    262
                 | 
                                    
                                                     | 
                
                 | 
                                continue;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    263
                 | 
                                    
                                                     | 
                
                 | 
                            }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    264
                 | 
                                    
                                                     | 
                
                 | 
                            $url = $this->urlEncode($pair[1]);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    265
                 | 
                                    
                                                     | 
                
                 | 
                            if ($this->urlValidate($url)) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    266
                 | 
                                    
                                                     | 
                
                 | 
                                $this->addArray(self::XML_TAG_SITEMAP, ['loc' => $url]);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    267
                 | 
                                    
                                                     | 
                
                 | 
                            }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    268
                 | 
                                    
                                                     | 
                
                 | 
                        }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    269
                 | 
                                    
                                                     | 
                
                 | 
                        return true;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    270
                 | 
                                    
                                                     | 
                
                 | 
                    }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    271
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    272
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    273
                 | 
                                    
                                                     | 
                
                 | 
                     * Validate URL arrays and add them to their corresponding arrays  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    274
                 | 
                                    
                                                     | 
                
                 | 
                     *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    275
                 | 
                                    
                                                     | 
                
                 | 
                     * @param string $type sitemap|url  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    276
                 | 
                                    
                                                     | 
                
                 | 
                     * @param array $array Tag array  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    277
                 | 
                                    
                                                     | 
                
                 | 
                     * @return bool  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    278
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    279
                 | 
                                    
                                                     | 
                
                 | 
                    protected function addArray($type, array $array)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    280
                 | 
                                    
                                                     | 
                
                 | 
                    { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    281
                 | 
                                    
                                                     | 
                
                 | 
                        if (!isset($array['loc'])) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    282
                 | 
                                    
                                                     | 
                
                 | 
                            return false;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    283
                 | 
                                    
                                                     | 
                
                 | 
                        }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    284
                 | 
                                    
                                                     | 
                
                 | 
                        $array['loc'] = $this->urlEncode(trim($array['loc']));  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    285
                 | 
                                    
                                                     | 
                
                 | 
                        if ($this->urlValidate($array['loc'])) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    286
                 | 
                                    
                                                     | 
                
                 | 
                            switch ($type) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    287
                 | 
                                    
                                                     | 
                
                 | 
                                case self::XML_TAG_SITEMAP:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    288
                 | 
                                    
                                                     | 
                
                 | 
                                    $this->sitemaps[$array['loc']] = $this->fixMissingTags(['lastmod'], $array);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    289
                 | 
                                    
                                                     | 
                
                 | 
                                    return true;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    290
                 | 
                                    
                                                     | 
                
                 | 
                                case self::XML_TAG_URL:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    291
                 | 
                                    
                                                     | 
                
                 | 
                                    $this->urls[$array['loc']] = $this->fixMissingTags(['lastmod', 'changefreq', 'priority'], $array);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    292
                 | 
                                    
                                                     | 
                
                 | 
                                    return true;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    293
                 | 
                                    
                                                     | 
                
                 | 
                            }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    294
                 | 
                                    
                                                     | 
                
                 | 
                        }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    295
                 | 
                                    
                                                     | 
                
                 | 
                        return false;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    296
                 | 
                                    
                                                     | 
                
                 | 
                    }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    297
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    298
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    299
                 | 
                                    
                                                     | 
                
                 | 
                     * Check for missing values and set them to null  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    300
                 | 
                                    
                                                     | 
                
                 | 
                     *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    301
                 | 
                                    
                                                     | 
                
                 | 
                     * @param array $tags Tags check if exists  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    302
                 | 
                                    
                                                     | 
                
                 | 
                     * @param array $array Array to check  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    303
                 | 
                                    
                                                     | 
                
                 | 
                     * @return array  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    304
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    305
                 | 
                                    
                                                     | 
                
                 | 
                    protected function fixMissingTags(array $tags, array $array)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    306
                 | 
                                    
                                                     | 
                
                 | 
                    { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    307
                 | 
                                    
                                                     | 
                
                 | 
                        foreach ($tags as $tag) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    308
                 | 
                                    
                                                     | 
                
                 | 
                            if (empty($array[$tag])) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    309
                 | 
                                    
                                                     | 
                
                 | 
                                $array[$tag] = null;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    310
                 | 
                                    
                                                     | 
                
                 | 
                            }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    311
                 | 
                                    
                                                     | 
                
                 | 
                        }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    312
                 | 
                                    
                                                     | 
                
                 | 
                        return $array;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    313
                 | 
                                    
                                                     | 
                
                 | 
                    }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    314
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    315
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    316
                 | 
                                    
                                                     | 
                
                 | 
                     * Generate the \SimpleXMLElement object if the XML is valid  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    317
                 | 
                                    
                                                     | 
                
                 | 
                     *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    318
                 | 
                                    
                                                     | 
                
                 | 
                     * @param string $xml  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    319
                 | 
                                    
                                                     | 
                
                 | 
                     * @return \SimpleXMLElement|false  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    320
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    321
                 | 
                                    
                                                     | 
                
                 | 
                    protected function generateXMLObject($xml)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    322
                 | 
                                    
                                                     | 
                
                 | 
                    { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    323
                 | 
                                    
                                                     | 
                
                 | 
                        // strip XML comments from files  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    324
                 | 
                                    
                                                     | 
                
                 | 
                        // if they occur at the beginning of the file it will invalidate the XML  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    325
                 | 
                                    
                                                     | 
                
                 | 
                        // this occurs with certain versions of Yoast  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    326
                 | 
                                    
                                                     | 
                
                 | 
                        $xml = preg_replace('/\s*\<\!\-\-((?!\-\-\>)[\s\S])*\-\-\>\s*/', '', (string)$xml); | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    327
                 | 
                                    
                                                     | 
                
                 | 
                        try { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    328
                 | 
                                    
                                                     | 
                
                 | 
                            libxml_use_internal_errors(true);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    329
                 | 
                                    
                                                     | 
                
                 | 
                            return new SimpleXMLElement($xml, LIBXML_NOCDATA);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    330
                 | 
                                    
                                                     | 
                
                 | 
                        } catch (\Exception $e) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    331
                 | 
                                    
                                                     | 
                
                 | 
                            return false;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    332
                 | 
                                    
                                                     | 
                
                 | 
                        }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    333
                 | 
                                    
                                                     | 
                
                 | 
                    }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    334
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    335
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    336
                 | 
                                    
                                                     | 
                
                 | 
                     * Parse line separated text string  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    337
                 | 
                                    
                                                     | 
                
                 | 
                     *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    338
                 | 
                                    
                                                     | 
                
                 | 
                     * @param string $string  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    339
                 | 
                                    
                                                     | 
                
                 | 
                     * @return bool  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    340
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    341
                 | 
                                    
                                                     | 
                
                 | 
                    protected function parseString($string)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    342
                 | 
                                    
                                                     | 
                
                 | 
                    { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    343
                 | 
                                    
                                                     | 
                
                 | 
                        if (!isset($this->config['strict']) || $this->config['strict'] !== false) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    344
                 | 
                                    
                                                     | 
                
                 | 
                            // Strings are not part of any documented sitemap standard  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    345
                 | 
                                    
                                                     | 
                
                 | 
                            return false;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    346
                 | 
                                    
                                                     | 
                
                 | 
                        }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    347
                 | 
                                    
                                                     | 
                
                 | 
                        $array = array_filter(array_map('trim', mb_split('\r\n|\n|\r', $string))); | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    348
                 | 
                                    
                                                     | 
                
                 | 
                        foreach ($array as $line) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    349
                 | 
                                    
                                                     | 
                
                 | 
                            if ($this->isSitemapURL($line)) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    350
                 | 
                                    
                                                     | 
                
                 | 
                                $this->addArray(self::XML_TAG_SITEMAP, ['loc' => $line]);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    351
                 | 
                                    
                                                     | 
                
                 | 
                                continue;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    352
                 | 
                                    
                                                     | 
                
                 | 
                            }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    353
                 | 
                                    
                                                     | 
                
                 | 
                            $this->addArray(self::XML_TAG_URL, ['loc' => $line]);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    354
                 | 
                                    
                                                     | 
                
                 | 
                        }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    355
                 | 
                                    
                                                     | 
                
                 | 
                        return true;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    356
                 | 
                                    
                                                     | 
                
                 | 
                    }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    357
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    358
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    359
                 | 
                                    
                                                     | 
                
                 | 
                     * Check if the URL may contain an Sitemap  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    360
                 | 
                                    
                                                     | 
                
                 | 
                     *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    361
                 | 
                                    
                                                     | 
                
                 | 
                     * @param string $url  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    362
                 | 
                                    
                                                     | 
                
                 | 
                     * @return bool  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    363
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    364
                 | 
                                    
                                                     | 
                
                 | 
                    protected function isSitemapURL($url)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    365
                 | 
                                    
                                                     | 
                
                 | 
                    { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    366
                 | 
                                    
                                                     | 
                
                 | 
                        $path = parse_url($this->urlEncode($url), PHP_URL_PATH);  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    367
                 | 
                                    
                                                     | 
                
                 | 
                        return $this->urlValidate($url) && (  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    368
                 | 
                                    
                                                     | 
                
                 | 
                                mb_substr($path, -mb_strlen(self::XML_EXTENSION) - 1) == '.' . self::XML_EXTENSION ||  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    369
                 | 
                                    
                                                     | 
                
                 | 
                                mb_substr($path, -mb_strlen(self::XML_EXTENSION_COMPRESSED) - 1) == '.' . self::XML_EXTENSION_COMPRESSED  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    370
                 | 
                                    
                                                     | 
                
                 | 
                            );  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    371
                 | 
                                    
                                                     | 
                
                 | 
                    }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    372
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    373
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    374
                 | 
                                    
                                                     | 
                
                 | 
                     * Parse Json object  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    375
                 | 
                                    
                                                     | 
                
                 | 
                     *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    376
                 | 
                                    
                                                     | 
                
                 | 
                     * @param string $type Sitemap or URL  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    377
                 | 
                                    
                                                     | 
                
                 | 
                     * @param \SimpleXMLElement $json object  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    378
                 | 
                                    
                                                     | 
                
                 | 
                     * @return bool  | 
            
            
                                                                                                            
                                                                
            
                                    
            
            
                | 
                    379
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    380
                 | 
                                    
                                                     | 
                
                 | 
                    protected function parseJson($type, \SimpleXMLElement $json)  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    381
                 | 
                                    
                                                     | 
                
                 | 
                    { | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    382
                 | 
                                    
                                                     | 
                
                 | 
                        if (!isset($json->$type)) { | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    383
                 | 
                                    
                                                     | 
                
                 | 
                            return false;  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    384
                 | 
                                    
                                                     | 
                
                 | 
                        }  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    385
                 | 
                                    
                                                     | 
                
                 | 
                        foreach ($json->$type as $url) { | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    386
                 | 
                                    
                                                     | 
                
                 | 
                            $this->addArray($type, (array)$url);  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    387
                 | 
                                    
                                                     | 
                
                 | 
                        }  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    388
                 | 
                                    
                                                     | 
                
                 | 
                        return true;  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    389
                 | 
                                    
                                                     | 
                
                 | 
                    }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    390
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    391
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    392
                 | 
                                    
                                                     | 
                
                 | 
                     * Sitemaps discovered  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    393
                 | 
                                    
                                                     | 
                
                 | 
                     *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    394
                 | 
                                    
                                                     | 
                
                 | 
                     * @return array  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    395
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    396
                 | 
                                    
                                                     | 
                
                 | 
                    public function getSitemaps()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    397
                 | 
                                    
                                                     | 
                
                 | 
                    { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    398
                 | 
                                    
                                                     | 
                
                 | 
                        return $this->sitemaps;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    399
                 | 
                                    
                                                     | 
                
                 | 
                    }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    400
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    401
                 | 
                                    
                                                     | 
                
                 | 
                    /**  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    402
                 | 
                                    
                                                     | 
                
                 | 
                     * URLs discovered  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    403
                 | 
                                    
                                                     | 
                
                 | 
                     *  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    404
                 | 
                                    
                                                     | 
                
                 | 
                     * @return array  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    405
                 | 
                                    
                                                     | 
                
                 | 
                     */  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    406
                 | 
                                    
                                                     | 
                
                 | 
                    public function getURLs()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    407
                 | 
                                    
                                                     | 
                
                 | 
                    { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    408
                 | 
                                    
                                                     | 
                
                 | 
                        return $this->urls;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    409
                 | 
                                    
                                                     | 
                
                 | 
                    }  | 
            
            
                                                                                                            
                                                                
            
                                    
            
            
                | 
                    410
                 | 
                                    
                                                     | 
                
                 | 
                }  | 
            
            
                                                        
            
                                    
            
            
                | 
                    411
                 | 
                                    
                                                     | 
                
                 | 
                 |