This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | |||
3 | class SEOTestSiteTreeController extends Controller { |
||
4 | |||
5 | private static $alternate_domain = null; |
||
6 | private static $desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36'; |
||
7 | private static $mobile_user_agent = 'Mozilla/5.0 (Linux; <Android Version>; <Build Tag etc.>) AppleWebKit/<WebKit Rev> (KHTML, like Gecko) Chrome/<Chrome Rev> Mobile Safari/<WebKit Rev>'; |
||
8 | |||
9 | /** |
||
10 | * Array of regex that will be used by the crawler. |
||
11 | * If the url we're going to crawl matches any filter in here, it will be ignored |
||
12 | */ |
||
13 | private static $ignore_paths = array(); |
||
14 | |||
15 | private static $allowed_actions = array('urlsAndSettings', 'getPageData', 'getPage'); |
||
16 | |||
17 | public function init() { |
||
18 | parent::init(); |
||
19 | |||
20 | Requirements::clear(); |
||
21 | |||
22 | if (!Member::currentUser() || !Permission::check('CMS_ACCESS_SEOToolboxAdmin')){ |
||
23 | return $this->redirect(Security::login_url().'?BackURL=/seotest'); |
||
24 | } |
||
25 | |||
26 | Requirements::css(SEOTOOLBOX_DIR.'/third-party/bootstrap/css/bootstrap.min.css'); |
||
27 | Requirements::css(SEOTOOLBOX_DIR.'/third-party/bootstrap/css/bootstrap-theme.min.css'); |
||
28 | Requirements::combine_files('seotest.css', array( |
||
29 | SEOTOOLBOX_DIR.'/css/fonts/lato/lato.css', |
||
30 | SEOTOOLBOX_DIR.'/css/seotest.css' |
||
31 | )); |
||
32 | |||
33 | Requirements::combine_files('seotest.js', array( |
||
34 | SEOTOOLBOX_DIR.'/third-party/jquery-1.12.0.js', |
||
35 | SEOTOOLBOX_DIR.'/js/crawler_event_handler.js', |
||
36 | SEOTOOLBOX_DIR.'/js/crawler_painter.js', |
||
37 | SEOTOOLBOX_DIR.'/js/crawler.js', |
||
38 | SEOTOOLBOX_DIR.'/js/crawler_file_tester.js', |
||
39 | SEOTOOLBOX_DIR.'/js/default_tests.js', |
||
40 | SEOTOOLBOX_DIR.'/js/crawler_init.js' |
||
41 | )); |
||
42 | } |
||
43 | |||
44 | /** |
||
45 | * Curl the passed url. |
||
46 | * |
||
47 | * This is still run on the same domain as where your admin domain is located |
||
48 | * however curl can deal with redirects much better then ajax therefore giving a |
||
49 | * more accurate result. Also it prepares all the important data into an array object |
||
50 | * which is then encoded and sent to JS for parsing |
||
51 | * |
||
52 | * Object Contents |
||
53 | * obj['header'] = Headers that we got back from the curl |
||
54 | * obj['body'] = HTML of the page |
||
55 | * obj['phrases'] = A list of sentences as extracting from the DOM |
||
56 | * obj['field_data'] = A result from preg_match with all the html fields visible on the page |
||
57 | * |
||
58 | * @param SS_HTTPRequest $request |
||
59 | * @return String |
||
60 | */ |
||
61 | public function getPageData(SS_HTTPRequest $request) { |
||
62 | $agent = ($request->getVar('agent') == 'mobile') |
||
63 | ? $this->config()->get('mobile_user_agent') |
||
64 | : $this->config()->get('desktop_user_agent'); |
||
65 | |||
66 | $curl = $this->loadPage($request->getVar('u'), $agent); |
||
67 | $curl['phrases'] = $this->extractWords($curl['body']); |
||
68 | |||
69 | Requirements::clear(); |
||
70 | return json_encode($curl); |
||
71 | } |
||
72 | |||
73 | /** |
||
74 | * Get the page contents of the requested url. |
||
75 | * This is used as a proxy so that users running the admin on a subdomain |
||
76 | * still get the data from their main domain |
||
77 | * |
||
78 | * @param SS_HTTPRequest $request |
||
79 | * @return string |
||
80 | */ |
||
81 | public function getPage(SS_HTTPRequest $request){ |
||
82 | $agent = ($request->getVar('agent') == 'mobile') |
||
83 | ? $this->config()->get('mobile_user_agent') |
||
84 | : $this->config()->get('desktop_user_agent'); |
||
85 | |||
86 | $ch = $this->setupCurl($request->getVar('u'), $agent); |
||
87 | $data = curl_exec($ch); |
||
88 | $body = $this->getPageBody($ch, $data); |
||
89 | curl_close($ch); |
||
90 | |||
91 | Requirements::clear(); |
||
92 | |||
93 | return $body; |
||
94 | } |
||
95 | |||
96 | /** |
||
97 | * Break down the $html provided and returns all words that have an SEO significance |
||
98 | * |
||
99 | * @param string $html |
||
100 | * @return array |
||
101 | */ |
||
102 | private function extractWords($html) { |
||
103 | mb_internal_encoding('UTF-8'); |
||
104 | $html = preg_replace_callback( |
||
105 | "/(&#[0-9]+;)/", |
||
106 | function($m) {return mb_convert_encoding($m[1], "UTF-8", "HTML-ENTITIES"); }, |
||
107 | $html |
||
108 | ); |
||
109 | $html = str_replace(array("\n", "\r"), ' ', mb_strtolower($html)); |
||
110 | $phrases = array(); |
||
111 | $regex_find_replace = array( |
||
112 | array( |
||
113 | 'find' => '/<meta(.*?)name="(.*?)description"(.*?)content="(.*?)"(.*?)[>]/m', |
||
114 | 'find_pos' => 4, |
||
115 | 'replace' => '/<meta(.*?)[>]/i' |
||
116 | ), |
||
117 | array( |
||
118 | 'find' => '/<(img|a)[^<]*title=(\'|")(.*?)(\'|")[^<]*[>]/m', |
||
119 | 'find_pos' => 3 |
||
120 | ), |
||
121 | array( |
||
122 | 'find' => '/<img[^<]*alt=(\'|")(.*?)(\'|")[^<]*[>]/m', |
||
123 | 'find_pos' => 2, |
||
124 | 'replace' => '/<img(.*?)[>]/i' |
||
125 | ), |
||
126 | array( |
||
127 | 'find' => '/<(.*?)>(.*?)<\/[a-zA-Z0-9]++>/m', |
||
128 | 'find_pos' => 2, |
||
129 | ) |
||
130 | ); |
||
131 | |||
132 | foreach ($regex_find_replace as $commands) { |
||
133 | if (isset($commands['find'])) { |
||
134 | preg_match_all($commands['find'], $html, $matches); |
||
135 | array_walk($matches[$commands['find_pos']], function(&$phrase) { |
||
136 | $words = explode(' ', strip_tags($phrase)); |
||
137 | array_walk($words, function(&$w) { |
||
138 | $w = trim(preg_replace('/\s+/', ' ', strip_tags($w))); |
||
139 | }); |
||
140 | $phrase = preg_replace('/\s+/', ' ', implode(' ', $words)); |
||
141 | }); |
||
142 | $phrases = array_merge($phrases, $matches[$commands['find_pos']]); |
||
143 | } |
||
144 | |||
145 | if (isset($commands['replace'])) |
||
146 | $html = preg_replace($commands['replace'], ' ', $html); |
||
147 | } |
||
148 | |||
149 | // Remove the empty elements |
||
150 | return array_filter($phrases, function($phrase) {return strlen(trim($phrase)) > 0; }); |
||
151 | } |
||
152 | |||
153 | /** |
||
154 | * Returns the first batch of urls the crawler will use |
||
155 | * and it's settings in json format |
||
156 | * |
||
157 | * @param SS_HTTPRequest $request |
||
158 | * @return string |
||
159 | */ |
||
160 | public function urlsAndSettings(SS_HTTPRequest $request) { |
||
0 ignored issues
–
show
|
|||
161 | Requirements::clear(); |
||
162 | return json_encode(array( |
||
163 | 'urls' => Versioned::get_by_stage('SiteTree', 'Live') |
||
164 | ->exclude('ClassName', 'RedirectorPage') |
||
165 | ->exclude('ClassName', 'ErrorPage') |
||
166 | ->map('ID', 'AbsoluteLink') |
||
167 | ->toArray(), |
||
168 | |||
169 | 'settings' => array( |
||
170 | 'ignore_paths' => $this->config()->get('ignore_paths'), |
||
171 | 'crawl_id' => GlobalAutoLinkSettings::get_current()->CrawlID |
||
172 | ) |
||
173 | )); |
||
174 | } |
||
175 | |||
176 | /** |
||
177 | * Parses the data that we got from curling the crawl version of the page |
||
178 | * and splits the html fields into an array |
||
179 | * |
||
180 | * @param string $data |
||
181 | * @return array |
||
182 | */ |
||
183 | private function getHTMLFieldsData($data){ |
||
184 | preg_match_all('/\[\*\*\[(.*?)\]\*\*\[(.*?)\]\*\*\]/im', $data, $matches); |
||
185 | foreach( $matches[2] as $key => $field_text ){ |
||
186 | $matches[2][$key] = base64_decode($field_text); |
||
187 | $matches[3][$key] = preg_replace('/[\s]+/mu', ' ', strip_tags($matches[2][$key])); |
||
188 | } |
||
189 | return $matches; |
||
190 | } |
||
191 | |||
192 | /** |
||
193 | * Setup a curl request |
||
194 | * |
||
195 | * @param string $url |
||
196 | * @param string $agent |
||
197 | * @param bool $useCrawlID |
||
198 | * |
||
199 | * @return resource |
||
200 | */ |
||
201 | public function setupCurl($url, $agent, $useCrawlID = false){ |
||
202 | $ch = curl_init(); |
||
203 | curl_setopt( $ch, CURLOPT_URL, $this->getCurlURL($url) ); |
||
204 | curl_setopt( $ch, CURLOPT_HEADER, true ); |
||
205 | curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); |
||
206 | curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true ); |
||
207 | curl_setopt( $ch, CURLOPT_MAXREDIRS, 10 ); |
||
208 | curl_setopt( $ch, CURLOPT_USERAGENT, $agent ); |
||
209 | curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, 10 ); |
||
210 | curl_setopt( $ch, CURLOPT_TIMEOUT, 30 ); |
||
211 | curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false ); |
||
212 | if( $useCrawlID ){ |
||
213 | $crawl_id = GlobalAutoLinkSettings::get_current()->CrawlID; |
||
214 | curl_setopt( $ch, CURLOPT_HTTPHEADER, array( 'X-Crawl-Id: '.$crawl_id ) ); |
||
215 | } |
||
216 | return $ch; |
||
217 | } |
||
218 | |||
219 | /** |
||
220 | * Return the domain to use to curl the page |
||
221 | * |
||
222 | * @return array|scalar|string |
||
223 | */ |
||
224 | public function getCurlDomain(){ |
||
225 | return ( self::config()->get('alternate_domain') != null ) |
||
226 | ? self::config()->get('alternate_domain') |
||
227 | : Director::absoluteBaseURL(); |
||
228 | } |
||
229 | |||
230 | /** |
||
231 | * Return a url ready to be curled |
||
232 | * |
||
233 | * @param string $url |
||
234 | * @return string |
||
235 | */ |
||
236 | public function getCurlURL($url){ |
||
237 | $domain = $this->getCurlDomain(); |
||
238 | return "$domain/$url"; |
||
239 | } |
||
240 | |||
241 | /** |
||
242 | * Get the page headers from a curl response |
||
243 | * |
||
244 | * @param resource $ch |
||
245 | * @param string $data |
||
246 | * @return string |
||
247 | */ |
||
248 | public function getPageHeaders($ch, $data){ |
||
249 | $header_size = curl_getinfo( $ch, CURLINFO_HEADER_SIZE ); |
||
250 | $header = explode( "\r\n\r\n", substr( $data, 0, $header_size ) ); |
||
251 | array_pop( $header ); // Remove last element as it will always be empty |
||
252 | return array_pop( $header ); |
||
253 | } |
||
254 | |||
255 | /** |
||
256 | * Get the body of a curl response |
||
257 | * |
||
258 | * @param resource $ch |
||
259 | * @param string $data |
||
260 | * @return string |
||
261 | */ |
||
262 | public function getPageBody($ch, $data){ |
||
263 | $header_size = curl_getinfo( $ch, CURLINFO_HEADER_SIZE ); |
||
264 | return substr( $data, $header_size ); |
||
265 | } |
||
266 | |||
267 | /** |
||
268 | * Curl the passed $url using the X-Crawl-ID header and parse the data |
||
269 | * into an array |
||
270 | * |
||
271 | * @param string $url |
||
272 | * @param (null|string) $agent |
||
273 | * @return array |
||
274 | */ |
||
275 | public function loadPage($url, $agent=null){ |
||
276 | $ch = $this->setupCurl($url, $agent, true); |
||
277 | $data = curl_exec($ch); |
||
278 | $fetched = str_replace($this->getCurlDomain(), '', curl_getinfo($ch, CURLINFO_EFFECTIVE_URL)); |
||
279 | $header = $this->getPageHeaders($ch, $data); |
||
280 | $body = preg_replace('/[\s]+/mu', ' ', $this->getPageBody($ch, $data)); |
||
281 | |||
282 | curl_close( $ch ); |
||
283 | |||
284 | if( !strpos( $header, ' 200 ' ) ) { |
||
285 | return array( 'headers' => false, 'body' => false ); |
||
286 | } |
||
287 | |||
288 | $field_data = $this->getHTMLFieldsData($body); |
||
289 | $body = str_replace($field_data[0], $field_data[2], $body); |
||
290 | |||
291 | return array( 'headers' => $header, 'body' => $body, 'field_data' => $field_data, 'url_fetched' => $fetched ); |
||
292 | } |
||
293 | |||
294 | /** |
||
295 | * If ErrorPage exists for Error Code 503 return it |
||
296 | * else create it and return it |
||
297 | * |
||
298 | * @return ErrorPage |
||
299 | */ |
||
300 | public static function getPermissionDeniedPage() { |
||
301 | $page = ErrorPage::get()->find('ErrorCode', 503); |
||
302 | if (!$page) { |
||
303 | $page = ErrorPage::create(array( |
||
304 | 'ErrorCode' => 503, |
||
305 | 'Title' => 'Permission Denied' |
||
306 | )); |
||
307 | $page->write(); |
||
308 | } |
||
309 | |||
310 | return $page; |
||
311 | } |
||
312 | } |
||
313 |
This check looks from parameters that have been defined for a function or method, but which are not used in the method body.