RobotsTxtParser::parseLine()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 7
rs 10
c 0
b 0
f 0
cc 2
nc 2
nop 1
1
<?php
2
/**
3
 * vipnytt/RobotsTxtParser
4
 *
5
 * @link https://github.com/VIPnytt/RobotsTxtParser
6
 * @license https://github.com/VIPnytt/RobotsTxtParser/blob/master/LICENSE The MIT License (MIT)
7
 */
8
9
namespace vipnytt\RobotsTxtParser\Parser;
10
11
use vipnytt\RobotsTxtParser\Client\RenderClient;
12
use vipnytt\RobotsTxtParser\Handler\Directives\RootDirectiveHandler;
13
use vipnytt\RobotsTxtParser\Parser\Directives\DirectiveParserTrait;
14
use vipnytt\RobotsTxtParser\RobotsTxtInterface;
15
16
/**
17
 * Class RobotsTxtParser
18
 *
19
 * @package vipnytt\RobotsTxtParser\Parser
20
 */
21
class RobotsTxtParser implements RobotsTxtInterface
22
{
23
    use DirectiveParserTrait;
24
25
    /**
26
     * Directive white list
27
     */
28
    const TOP_LEVEL_DIRECTIVES = [
29
        self::DIRECTIVE_CLEAN_PARAM => 'cleanParam',
30
        self::DIRECTIVE_HOST => 'host',
31
        self::DIRECTIVE_SITEMAP => 'sitemap',
32
    ];
33
34
    /**
35
     * Root directive handler
36
     * @var RootDirectiveHandler
37
     */
38
    protected $handler;
39
40
    /**
41
     * TxtClient constructor.
42
     *
43
     * @param string $baseUri
44
     * @param string $content
45
     * @param string|null $effectiveUri
46
     */
47
    public function __construct($baseUri, $content, $effectiveUri = null)
48
    {
49
        mb_internal_encoding(self::ENCODING);
50
        $baseParser = new UriParser($baseUri);
51
        $baseUri = $baseParser->base();
52
        $effectiveBase = $baseUri;
53
        if ($effectiveUri !== null) {
54
            $effectiveParser = new UriParser($effectiveUri);
55
            $effectiveBase = $effectiveParser->base();
56
        }
57
        $this->handler = new RootDirectiveHandler($baseUri, $effectiveBase);
58
        $this->parseTxt($content);
59
    }
60
61
    /**
62
     * Client robots.txt
63
     *
64
     * @param string $txt
65
     * @return bool
66
     */
67
    private function parseTxt($txt)
68
    {
69
        $result = [];
70
        $lines = array_map('trim', mb_split('\r\n|\n|\r', $txt));
71
        // If robots txt is empty
72
        if (empty($lines)) {
73
            return false;
74
        }
75
        // Parse each line individually
76
        foreach ($lines as $key => $line) {
77
            // Limit rule length
78
            $line = mb_substr($line, 0, self::MAX_LENGTH_RULE);
79
            // Remove comments
80
            $line = explode('#', $line, 2)[0];
81
            // Parse line
82
            $result[] = $this->parseLine($line);
83
            unset($lines[$key]);
84
        }
85
        return in_array(true, $result, true);
86
    }
87
88
    /**
89
     * Add line
90
     *
91
     * @param string $line
92
     * @return bool
93
     */
94
    private function parseLine($line)
95
    {
96
        if (($pair = $this->generateRulePair($line, array_keys(self::TOP_LEVEL_DIRECTIVES))) !== false) {
97
            return $this->handler->{self::TOP_LEVEL_DIRECTIVES[$pair[0]]}->add($pair[1]);
98
        }
99
        return $this->handler->userAgent->add($line);
100
    }
101
102
    /**
103
     * Render
104
     *
105
     * @return RenderClient
106
     */
107
    public function render()
108
    {
109
        return new RenderClient($this->handler);
110
    }
111
112
    /**
113
     * Rule array
114
     *
115
     * @return array
116
     */
117
    public function export()
118
    {
119
        return [
120
            self::DIRECTIVE_HOST => $this->handler->host->client()->export(),
121
            self::DIRECTIVE_CLEAN_PARAM => $this->handler->cleanParam->client()->export(),
122
            self::DIRECTIVE_SITEMAP => $this->handler->sitemap->client()->export(),
123
            self::DIRECTIVE_USER_AGENT => $this->handler->userAgent->export(),
124
        ];
125
    }
126
}
127