Parser   A
last analyzed

Complexity

Total Complexity 12

Size/Duplication

Total Lines 120
Duplicated Lines 0 %

Coupling/Cohesion

Components 0
Dependencies 3

Importance

Changes 0
Metric Value
wmc 12
lcom 0
cbo 3
dl 0
loc 120
rs 10
c 0
b 0
f 0

3 Methods

Rating   Name   Duplication   Size   Complexity  
D parse() 0 45 9
A extractRows() 0 13 1
A createDirective() 0 20 2
1
<?php
2
3
/*
4
 * This file is part of the bisarca/robots-txt package.
5
 *
6
 * (c) Emanuele Minotto <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
namespace Bisarca\RobotsTxt;
13
14
use Bisarca\RobotsTxt\Directive\DirectiveInterface;
15
use Bisarca\RobotsTxt\Directive\NonGroupInterface;
16
use Bisarca\RobotsTxt\Directive\StartOfGroupInterface;
17
use Bisarca\RobotsTxt\Exception\ExceptionInterface;
18
use Bisarca\RobotsTxt\Exception\MissingDirectiveException;
19
20
/**
21
 * Robots.txt file parser.
22
 */
23
class Parser
24
{
25
    /**
26
     * Registered directives.
27
     *
28
     * @var string[]
29
     */
30
    const DIRECTIVES = [
31
        'allow' => Directive\Allow::class,
32
        'comment' => Directive\Comment::class,
33
        'disallow' => Directive\Disallow::class,
34
        'host' => Directive\Host::class,
35
        'sitemap' => Directive\Sitemap::class,
36
        'user-agent' => Directive\UserAgent::class,
37
    ];
38
39
    /**
40
     * Parse robots.txt content.
41
     *
42
     * @param string $content
43
     *
44
     * @return Rulesets
45
     */
46
    public function parse(string $content): Rulesets
47
    {
48
        $rows = $this->extractRows($content);
49
        $groups = [];
50
51
        $counter = -1;
52
        $type = false;
53
54
        foreach ($rows as $row) {
55
            try {
56
                $directive = $this->createDirective($row);
57
            } catch (ExceptionInterface $exception) {
58
                continue;
59
            }
60
61
            $previous = $type;
62
            $type = $directive instanceof StartOfGroupInterface;
63
64
            if (
65
                $directive instanceof NonGroupInterface ||
66
                (
67
                    $type &&
68
                    !($type && $previous)
69
                )
70
            ) {
71
                ++$counter;
72
            }
73
74
            if (!isset($groups[$counter])) {
75
                $groups[$counter] = [];
76
            }
77
78
            $groups[$counter][] = $directive;
79
        }
80
81
        // any group-member records without a preceding
82
        // start-of-group record are ignored
83
        unset($groups[-1]);
84
85
        foreach ($groups as $index => $group) {
86
            $groups[$index] = new Ruleset(...$group);
87
        }
88
89
        return new Rulesets(...$groups);
90
    }
91
92
    /**
93
     * Extract single rows from main content.
94
     *
95
     * @param string $content
96
     *
97
     * @return array
98
     */
99
    private function extractRows(string $content): array
100
    {
101
        // split by EOL
102
        $rows = explode(PHP_EOL, $content);
103
104
        // remove comments and wrapper spaces
105
        $rows = array_map(function ($row) {
106
            return trim(preg_replace('/^(.*)#.*/', '$1', $row));
107
        }, $rows);
108
109
        // empty lines aren't useful
110
        return array_filter($rows);
111
    }
112
113
    /**
114
     * Creates a directive from the raw line contained in the robots.txt file.
115
     *
116
     * @param string $row Raw line
117
     *
118
     * @throws MissingDirectiveException If no directive is available
119
     *
120
     * @return DirectiveInterface
121
     */
122
    private function createDirective(string $row): DirectiveInterface
123
    {
124
        $directives = array_filter(
125
            self::DIRECTIVES,
126
            function ($field) use ($row) {
127
                return preg_match(sprintf('/^%s:\s+.+/i', $field), $row);
128
            },
129
            ARRAY_FILTER_USE_KEY
130
        );
131
        $directives = array_values($directives);
132
133
        // no directives found for this row
134
        // no action required
135
        if (empty($directives)) {
136
            throw MissingDirectiveException::create($row);
137
        }
138
139
        // directives should be sorted by priority
140
        return new $directives[0]($row);
141
    }
142
}
143