Blame::getQuery()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 0
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types = 1);
4
5
namespace App\Model;
6
7
use App\Repository\BlameRepository;
8
9
/**
10
 * A Blame will search the given page for the given text and return the relevant revisions and authors.
11
 */
12
class Blame extends Authorship
13
{
14
    /** @var string Text to search for. */
15
    protected string $query;
16
17
    /** @var array|null Matches, keyed by revision ID, each with keys 'edit' <Edit> and 'tokens' <string[]>. */
18
    protected ?array $matches;
19
20
    /** @var Edit|null Target revision that is being blamed. */
21
    protected ?Edit $asOf;
22
23
    /**
24
     * Blame constructor.
25
     * @param BlameRepository $repository
26
     * @param Page $page The page to process.
27
     * @param string $query Text to search for.
28
     * @param string|null $target Either a revision ID or date in YYYY-MM-DD format. Null to use latest revision.
29
     */
30
    public function __construct(
31
        BlameRepository $repository,
32
        Page $page,
33
        string $query,
34
        ?string $target = null
35
    ) {
36
        parent::__construct($repository, $page, $target);
37
        $this->query = $query;
38
    }
39
40
    /**
41
     * Get the search query.
42
     * @return string
43
     */
44
    public function getQuery(): string
45
    {
46
        return $this->query;
47
    }
48
49
    /**
50
     * Matches, keyed by revision ID, each with keys 'edit' <Edit> and 'tokens' <string[]>.
51
     * @return array|null
52
     */
53
    public function getMatches(): ?array
54
    {
55
        return $this->matches;
56
    }
57
58
    /**
59
     * Get all the matches as Edits.
60
     * @return Edit[]|null
61
     */
62
    public function getEdits(): ?array
63
    {
64
        return array_column($this->matches, 'edit');
0 ignored issues
show
Bug introduced by
It seems like $this->matches can also be of type null; however, parameter $array of array_column() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

64
        return array_column(/** @scrutinizer ignore-type */ $this->matches, 'edit');
Loading history...
65
    }
66
67
    /**
68
     * Strip out spaces, since they are not accounted for in the WikiWho API.
69
     * @return string
70
     */
71
    public function getTokenizedQuery(): string
72
    {
73
        return strtolower(preg_replace('/\s*/m', '', $this->query));
74
    }
75
76
    /**
77
     * Get the first "token" of the search query. A "token" in this case is a word or group of syntax,
78
     * roughly correlating to the token structure returned by the WikiWho API.
79
     * @return string
80
     */
81
    public function getFirstQueryToken(): string
82
    {
83
        return strtolower(preg_split('/[\n\s]/', $this->query)[0]);
84
    }
85
86
    /**
87
     * Get the target revision that is being blamed.
88
     * @return Edit|null
89
     */
90
    public function getAsOf(): ?Edit
91
    {
92
        if (isset($this->asOf)) {
93
            return $this->asOf;
94
        }
95
96
        $this->asOf = $this->target
97
            ? $this->repository->getEditFromRevId($this->page, $this->target)
0 ignored issues
show
Bug introduced by
The method getEditFromRevId() does not exist on App\Repository\Repository. It seems like you code against a sub-type of App\Repository\Repository such as App\Repository\BlameRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

97
            ? $this->repository->/** @scrutinizer ignore-call */ getEditFromRevId($this->page, $this->target)
Loading history...
98
            : null;
99
100
        return $this->asOf;
101
    }
102
103
    /**
104
     * Get authorship attribution from the WikiWho API.
105
     * @see https://www.f-squared.org/wikiwho/
106
     */
107
    public function prepareData(): void
108
    {
109
        if (isset($this->matches)) {
110
            return;
111
        }
112
113
        // Set revision data. self::setRevisionData() returns null if there are errors.
114
        $revisionData = $this->getRevisionData(true);
115
        if (null === $revisionData) {
116
            return;
117
        }
118
119
        $matches = $this->searchTokens($revisionData['tokens']);
120
121
        // We want the results grouped by editor and revision ID.
122
        $this->matches = [];
123
        foreach ($matches as $match) {
124
            if (isset($this->matches[$match['id']])) {
125
                $this->matches[$match['id']]['tokens'][] = $match['token'];
126
                continue;
127
            }
128
129
            $edit = $this->repository->getEditFromRevId($this->page, $match['id']);
130
            if ($edit) {
131
                $this->matches[$match['id']] = [
132
                    'edit' => $edit,
133
                    'tokens' => [$match['token']],
134
                ];
135
            }
136
        }
137
    }
138
139
    /**
140
     * Find matches of search query in the given list of tokens.
141
     * @param array $tokens
142
     * @return array
143
     */
144
    private function searchTokens(array $tokens): array
145
    {
146
        $matchData = [];
147
        $matchDataSoFar = [];
148
        $matchSoFar = '';
149
        $firstQueryToken = $this->getFirstQueryToken();
150
        $tokenizedQuery = $this->getTokenizedQuery();
151
152
        foreach ($tokens as $token) {
153
            // The previous matches plus the new token. This is basically a candidate for what may become $matchSoFar.
154
            $newMatchSoFar = $matchSoFar.$token['str'];
155
156
            // We first check if the first token of the query matches, because we want to allow for partial matches
157
            // (e.g. for query "barbaz", the tokens ["foobar","baz"] should match).
158
            if (false !== strpos($newMatchSoFar, $firstQueryToken)) {
159
                // If the full query is in the new match, use it, otherwise use just the first token. This is because
160
                // the full match may exist across multiple tokens, but the first match is only a partial match.
161
                $newMatchSoFar = false !== strpos($newMatchSoFar, $tokenizedQuery)
162
                    ? $newMatchSoFar
163
                    : $firstQueryToken;
164
            }
165
166
            // Keep track of tokens that match. To allow partial matches,
167
            // we check the query against $newMatchSoFar and vice versa.
168
            if (false !== strpos($tokenizedQuery, $newMatchSoFar) ||
169
                false !== strpos($newMatchSoFar, $tokenizedQuery)
170
            ) {
171
                $matchSoFar = $newMatchSoFar;
172
                $matchDataSoFar[] = [
173
                    'id' => $token['o_rev_id'],
174
                    'editor' => $token['editor'],
175
                    'token' => $token['str'],
176
                ];
177
            } elseif (!empty($matchSoFar)) {
178
                // We hit a token that isn't in the query string, so start over.
179
                $matchDataSoFar = [];
180
                $matchSoFar = '';
181
            }
182
183
            // A full match was found, so merge $matchDataSoFar into $matchData,
184
            // and start over to see if there are more matches in the article.
185
            if (false !== strpos($matchSoFar, $tokenizedQuery)) {
186
                $matchData = array_merge($matchData, $matchDataSoFar);
187
                $matchDataSoFar = [];
188
                $matchSoFar = '';
189
            }
190
        }
191
192
        // Full matches usually come last, but are the most relevant.
193
        return array_reverse($matchData);
194
    }
195
}
196