1 | <?php |
||
15 | class ContentFactory |
||
16 | { |
||
17 | /** |
||
18 | * Build a parser instance from a string |
||
19 | * @param string $item Can be an URL or a file content |
||
20 | * @return Content The built instance |
||
21 | */ |
||
22 | public static function build($item) |
||
23 | { |
||
24 | if (filter_var($item, FILTER_VALIDATE_URL)!==false) { |
||
25 | $parsed = parse_url($item); |
||
26 | if (isset($parsed['path']) && $parsed['path'] != '/robots.txt') { |
||
27 | throw (new InvalidUrlException( |
||
28 | sprintf( |
||
29 | 'The robots.txt file can\'t be found at: %s', |
||
30 | $item |
||
31 | ) |
||
32 | )) |
||
33 | ->setUrl($item); |
||
34 | } |
||
35 | |||
36 | $parsed['path'] = '/robots.txt'; |
||
37 | $parsed = array_intersect_key( |
||
38 | $parsed, |
||
39 | array_flip(['scheme', 'host', 'port', 'path']) |
||
40 | ); |
||
41 | $port = isset($parsed['port'])?':'.$parsed['port']:''; |
||
42 | $url = $parsed['scheme'].'://'.$parsed['host'].$port.$parsed['path']; |
||
43 | |||
44 | $item = self::download($url); |
||
45 | } |
||
46 | |||
47 | return new Content($item); |
||
48 | } |
||
49 | |||
50 | /** |
||
51 | * Extract the content at URL |
||
52 | * @param string $url The robots.txt URL |
||
53 | * @return string The robots file content |
||
54 | */ |
||
55 | protected static function download($url) |
||
73 | } |
||
74 |