Completed
Push — develop ( 990136...183c36 )
by Stéphane
02:08
created

ContentFactory::build()   B

Complexity

Conditions 5
Paths 4

Size

Total Lines 27
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 14
CRAP Score 5.4558

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 27
ccs 14
cts 19
cp 0.7368
rs 8.439
cc 5
eloc 16
nc 4
nop 1
crap 5.4558
1
<?php
2
3
namespace Bee4\RobotsTxt;
4
5
use Bee4\RobotsTxt\Exception\InvalidArgumentException;
6
use Bee4\RobotsTxt\Exception\RuntimeException;
7
8
/**
9
 * Class ContentFactory
10
 * Take an URL, try to load the robots.txt file and return content
11
 *
12
 * @copyright Bee4 2015
13
 * @author    Stephane HULARD <[email protected]>
14
 */
15
class ContentFactory
16
{
17
    /**
18
     * Build a parser instance from a string
19
     * @param  string $item     Can be an URL or a file content
20
     * @return Content          The built instance
21
     */
22 1
    public static function build($item)
23
    {
24 1
        if (filter_var($item, FILTER_VALIDATE_URL)!==false) {
25 1
            $parsed = parse_url($item);
26 1
            if (isset($parsed['path']) && $parsed['path'] != '/robots.txt') {
27
                throw new InvalidArgumentException(
28
                    sprintf(
29
                        'The robots.txt file can\'t be found at: %s this file
30
                        must be hosted at website root',
31
                        $item
32
                    )
33
                );
34
            }
35
36 1
            $parsed['path'] = '/robots.txt';
37 1
            $parsed = array_intersect_key(
38 1
                $parsed,
39 1
                array_flip(['scheme', 'host', 'port', 'path'])
40 1
            );
41 1
            $port = isset($parsed['port'])?':'.$parsed['port']:'';
42 1
            $url = $parsed['scheme'].'://'.$parsed['host'].$port.$parsed['path'];
43
44 1
            $item = self::download($url);
45 1
        }
46
47 1
        return new Content($item);
48
    }
49
50
    /**
51
     * Extract the content at URL
52
     * @param  string $url The robots.txt URL
53
     * @return string      The robots file content
54
     */
55 1
    protected static function download($url)
56
    {
57 1
        $handle = curl_init();
58 1
        curl_setopt($handle, CURLOPT_URL, $url);
59 1
        curl_setopt($handle, CURLOPT_RETURNTRANSFER, true);
60 1
        $item = curl_exec($handle);
61 1
        $status = curl_getinfo($handle, CURLINFO_HTTP_CODE);
62 1
        curl_close($handle);
63
64 1
        if ($status !== 200) {
65
            throw new RuntimeException(sprintf(
66
                'Can\'t access the robots.txt file at: %s',
67
                $url
68
            ));
69
        }
70
71 1
        return $item;
72
    }
73
}
74