1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace AOE\Languagevisibility\Hooks; |
4
|
|
|
|
5
|
|
|
/*************************************************************** |
6
|
|
|
* Copyright notice |
7
|
|
|
* |
8
|
|
|
* (c) 2016 AOE GmbH <[email protected]> |
9
|
|
|
* All rights reserved |
10
|
|
|
* |
11
|
|
|
* This script is part of the TYPO3 project. The TYPO3 project is |
12
|
|
|
* free software; you can redistribute it and/or modify |
13
|
|
|
* it under the terms of the GNU General Public License as published by |
14
|
|
|
* the Free Software Foundation; either version 2 of the License, or |
15
|
|
|
* (at your option) any later version. |
16
|
|
|
* |
17
|
|
|
* The GNU General Public License can be found at |
18
|
|
|
* http://www.gnu.org/copyleft/gpl.html. |
19
|
|
|
* |
20
|
|
|
* This script is distributed in the hope that it will be useful, |
21
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
22
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
23
|
|
|
* GNU General Public License for more details. |
24
|
|
|
* |
25
|
|
|
* This copyright notice MUST APPEAR in all copies of the script! |
26
|
|
|
***************************************************************/ |
27
|
|
|
|
28
|
|
|
use \AOE\Languagevisibility\Services\FeServices; |
29
|
|
|
|
30
|
|
|
class Crawler { |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* Process the prepared crawler urls and check wether these pages have the chance to get crawled or not |
34
|
|
|
* |
35
|
|
|
* @param array $params the crawler result |
36
|
|
|
* @param object $ref the crawler_lib |
37
|
|
|
* @return void |
38
|
|
|
*/ |
39
|
|
|
public function processUrls(&$params, &$ref) { |
|
|
|
|
40
|
|
|
|
41
|
|
|
foreach ( $params['res'] as $cfg => $sub ) { |
42
|
|
|
$list = array(); |
43
|
|
|
foreach ( $params['res'][$cfg]['URLs'] as $key => $url ) { |
44
|
|
|
|
45
|
|
|
list ( $id, $lang ) = self::extractIdAndLangFromUrl($url); |
46
|
|
|
|
47
|
|
|
if (FeServices::checkVisiblityForElement($id, 'pages', $lang)) { |
48
|
|
|
$list[] = $url; |
49
|
|
|
} else { |
50
|
|
|
// $url not visible therefore we drop it |
51
|
|
|
} |
52
|
|
|
} |
53
|
|
|
$params['res'][$cfg]['URLs'] = $list; |
54
|
|
|
} |
55
|
|
|
|
56
|
|
|
} |
57
|
|
|
|
58
|
|
|
/** |
59
|
|
|
* |
60
|
|
|
* @param string $url guess what |
61
|
|
|
* @return array |
62
|
|
|
*/ |
63
|
|
|
protected static function extractIdAndLangFromUrl($url) { |
64
|
|
|
|
65
|
|
|
// retrieving the id this way is save because that part is hardcoded in the crawler |
66
|
|
|
$matches = array(); |
67
|
|
|
preg_match('/\?id=(\d+)&?/', $url, $matches); |
68
|
|
|
$id = $matches[1]; |
69
|
|
|
|
70
|
|
|
// TODO: might need domain if no "L" is given |
71
|
|
|
$matches = array(); |
72
|
|
|
if (! preg_match('/L=(\d+)&?/', $url, $matches)) { |
73
|
|
|
$lang = 0; |
74
|
|
|
} else { |
75
|
|
|
$lang = $matches[1]; |
76
|
|
|
} |
77
|
|
|
|
78
|
|
|
return array($id, $lang ); |
79
|
|
|
} |
80
|
|
|
} |
81
|
|
|
|
This check looks from parameters that have been defined for a function or method, but which are not used in the method body.