Rulesets::getUserAgentRules()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 17
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 17
rs 9.4285
c 0
b 0
f 0
cc 2
eloc 9
nc 2
nop 1
1
<?php
2
3
/*
4
 * This file is part of the bisarca/robots-txt package.
5
 *
6
 * (c) Emanuele Minotto <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
namespace Bisarca\RobotsTxt;
13
14
use Bisarca\RobotsTxt\Directive\Host;
15
use Bisarca\RobotsTxt\Directive\PathDirectiveInterface;
16
use Bisarca\RobotsTxt\Directive\Sitemap;
17
use Bisarca\RobotsTxt\Directive\UserAgent;
18
use Generator;
19
20
/**
21
 * Set of groups of directives.
22
 */
23
class Rulesets extends AbstractSet
24
{
25
    /**
26
     * Class constructor with optional initialization data.
27
     *
28
     * @param Ruleset[] $rulesets
29
     */
30
    public function __construct(Ruleset ...$rulesets)
31
    {
32
        $this->data = $rulesets;
33
    }
34
35
    /**
36
     * Adds a ruleset.
37
     *
38
     * @param Ruleset $ruleset
39
     */
40
    public function add(Ruleset $ruleset)
41
    {
42
        $this->data[] = $ruleset;
43
    }
44
45
    /**
46
     * Checks if a ruleset is contained.
47
     *
48
     * @param Ruleset $ruleset
49
     *
50
     * @return bool
51
     */
52
    public function has(Ruleset $ruleset): bool
53
    {
54
        return false !== array_search($ruleset, $this->data, true);
55
    }
56
57
    /**
58
     * Remove an element.
59
     *
60
     * @param Ruleset $ruleset
61
     *
62
     * @return bool
63
     */
64 View Code Duplication
    public function remove(Ruleset $ruleset): bool
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
65
    {
66
        $key = array_search($ruleset, $this->data, true);
67
68
        if (false !== $key) {
69
            unset($this->data[$key]);
70
            $this->data = array_values($this->data);
71
72
            return true;
73
        }
74
75
        return false;
76
    }
77
78
    /**
79
     * Checks if a user agent is allowed.
80
     *
81
     * @param string $userAgent
82
     * @param string $path
83
     *
84
     * @return bool
85
     */
86
    public function isUserAgentAllowed(
87
        string $userAgent,
88
        string $path = PathDirectiveInterface::DEFAULT_PATH
89
    ): bool {
90
        // if the robots.txt is empty, than
91
        // the bot can always access
92
        if ($this->isEmpty()) {
93
            return true;
94
        }
95
96
        return $this
97
            ->getUserAgentRules($userAgent)
98
            ->isUserAgentAllowed($path);
99
    }
100
101
    /**
102
     * Gets roles for a specified user-agent.
103
     *
104
     * @param string $userAgent Default "*"
105
     *
106
     * @return Ruleset
107
     */
108
    public function getUserAgentRules(string $userAgent = UserAgent::ALL_AGENTS): Ruleset
109
    {
110
        // if the robots.txt is empty, than
111
        // no rules for that user-agent are defined
112
        if ($this->isEmpty()) {
113
            return new Ruleset();
114
        }
115
116
        $topUserAgent = $this->getTopUserAgent($userAgent);
117
118
        return array_values(array_filter(
119
            $this->data,
120
            function (Ruleset $ruleset) use ($topUserAgent) {
121
                return $ruleset->has($topUserAgent);
122
            }
123
        ))[0];
124
    }
125
126
    /**
127
     * Extract sitemap directives.
128
     *
129
     * @return Generator
130
     */
131
    public function getSitemaps(): Generator
132
    {
133
        foreach ($this->data as $ruleset) {
134
            yield from $ruleset->getDirectives(Sitemap::class);
135
        }
136
    }
137
138
    /**
139
     * Checks if the host directive is defined.
140
     *
141
     * @return bool
142
     */
143
    public function hasHost(): bool
144
    {
145
        foreach ($this->data as $ruleset) {
146
            if ($ruleset->getDirectives(Host::class)) {
147
                return true;
148
            }
149
        }
150
151
        return false;
152
    }
153
154
    /**
155
     * Gets the host directive.
156
     *
157
     * @return Host
158
     */
159
    public function getHost(): Host
160
    {
161
        foreach ($this->data as $ruleset) {
162
            if ($directives = $ruleset->getDirectives(Host::class)) {
163
                return $directives[0];
164
            }
165
        }
166
    }
167
168
    /**
169
     * Gets top User-Agent directive.
170
     *
171
     * @param string $userAgent
172
     *
173
     * @return UserAgent
174
     */
175
    private function getTopUserAgent(string $userAgent): UserAgent
176
    {
177
        $userAgent = mb_strtolower($userAgent);
178
        $top = null;
179
        $levenshtein = PHP_INT_MAX;
180
181
        foreach ($this->data as $ruleset) {
182
            $directives = array_filter(
183
                $ruleset->getDirectives(UserAgent::class),
184
                function ($directive) use ($userAgent) {
185
                    return $directive->isMatching($userAgent);
186
                }
187
            );
188
189
            foreach ($directives as $index => $directive) {
190
                $localUa = mb_strtolower($directive->getValue());
191
                $lev = levenshtein($userAgent, $localUa);
192
193
                if (0 === $lev) {
194
                    return $directive;
195
                }
196
197
                if ($lev < $levenshtein) {
198
                    $top = $directive;
199
                    $levenshtein = $lev;
200
                }
201
            }
202
        }
203
204
        return $top;
205
    }
206
}
207