Passed
Push — master ( eeccf1...90d3f5 )
by MusikAnimal
06:37
created

Authorship::getError()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 0
dl 0
loc 3
ccs 0
cts 2
cp 0
crap 2
rs 10
c 0
b 0
f 0
1
<?php
2
declare(strict_types=1);
3
4
namespace AppBundle\Model;
5
6
use DateTime;
7
use GuzzleHttp\Exception\RequestException;
8
9
class Authorship extends Model
10
{
11
    /** @const string[] Domain names of wikis supported by WikiWho. */
12
    public const SUPPORTED_PROJECTS = [
13
        'de.wikipedia.org',
14
        'en.wikipedia.org',
15
        'es.wikipedia.org',
16
        'eu.wikipedia.org',
17
        'tr.wikipedia.org',
18
    ];
19
20
    /** @var int|null Target revision ID. Null for latest revision. */
21
    protected $target;
22
23
    /** @var array List of editors and the percentage of the current content that they authored. */
24
    protected $data;
25
26
    /** @var mixed[] Revision that the data pertains to, with keys 'id' and 'timestamp'. */
27
    protected $revision;
28
29
    /**
30
     * ArticleInfo constructor.
31
     * @param Page $page The page to process.
32
     * @param string|null $target Either a revision ID or date in YYYY-MM-DD format. Null to use latest revision.
33
     * @param int $limit Max number of results.
34
     */
35 1
    public function __construct(Page $page, ?string $target = null, ?int $limit = null)
36
    {
37 1
        $this->page = $page;
38 1
        $this->limit = $limit;
39 1
        $this->target = $this->getTargetRevId($target);
40 1
    }
41
42 1
    private function getTargetRevId(?string $target): ?int
43
    {
44 1
        if (null === $target) {
45 1
            return null;
46
        }
47
48
        if (preg_match('/\d{4}-\d{2}-\d{2}/', $target)) {
49
            $date = DateTime::createFromFormat('Y-m-d', $target);
50
            return $this->page->getRevisionIdAtDate($date);
0 ignored issues
show
Bug introduced by
It seems like $date can also be of type false; however, parameter $date of AppBundle\Model\Page::getRevisionIdAtDate() does only seem to accept DateTime, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

50
            return $this->page->getRevisionIdAtDate(/** @scrutinizer ignore-type */ $date);
Loading history...
51
        }
52
53
        return (int)$target;
54
    }
55
56
    /**
57
     * Domains of supported wikis.
58
     * @return string[]
59
     */
60
    public function getSupportedWikis(): array
61
    {
62
        return self::SUPPORTED_PROJECTS;
63
    }
64
65
    /**
66
     * Get the target revision ID. Null for latest revision.
67
     * @return int|null
68
     */
69
    public function getTarget(): ?int
70
    {
71
        return $this->target;
72
    }
73
74
    /**
75
     * Authorship information for the top $this->limit authors.
76
     * @return array
77
     */
78 1
    public function getList(): array
79
    {
80 1
        return $this->data['list'] ?? [];
81
    }
82
83
    /**
84
     * Get error thrown when preparing the data, or null if no error occurred.
85
     * @return string|null
86
     */
87
    public function getError(): ?string
88
    {
89
        return $this->data['error'] ?? null;
90
    }
91
92
    /**
93
     * Get the total number of authors.
94
     * @return int
95
     */
96 1
    public function getTotalAuthors(): int
97
    {
98 1
        return $this->data['totalAuthors'];
99
    }
100
101
    /**
102
     * Get the total number of characters added.
103
     * @return int
104
     */
105 1
    public function getTotalCount(): int
106
    {
107 1
        return $this->data['totalCount'];
108
    }
109
110
    /**
111
     * Get summary data on the 'other' authors who are not in the top $this->limit.
112
     * @return array|null
113
     */
114 1
    public function getOthers(): ?array
115
    {
116 1
        return $this->data['others'] ?? null;
117
    }
118
119
    /**
120
     * Get the revision the authorship data pertains to, with keys 'id' and 'timestamp'.
121
     * @return array|null
122
     */
123
    public function getRevision(): ?array
124
    {
125
        return $this->revision;
126
    }
127
128
    /**
129
     * Is the given page supported by the Authorship tool?
130
     * @param Page $page
131
     * @return bool
132
     */
133
    public static function isSupportedPage(Page $page): bool
134
    {
135
        return in_array($page->getProject()->getDomain(), self::SUPPORTED_PROJECTS) &&
136
            0 === $page->getNamespace();
137
    }
138
139
    /**
140
     * Get authorship attribution from the WikiWho API.
141
     * @see https://f-squared.org/wikiwho/
142
     */
143 1
    public function prepareData(): void
144
    {
145 1
        if (isset($this->data)) {
146
            return;
147
        }
148
149
        try {
150 1
            $ret = $this->getRepository()->getData($this->page, $this->target);
0 ignored issues
show
Bug introduced by
The method getData() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\AuthorshipRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

150
            $ret = $this->getRepository()->/** @scrutinizer ignore-call */ getData($this->page, $this->target);
Loading history...
151
        } catch (RequestException $e) {
152
            $this->data = [
153
                'error' => 'unknown',
154
            ];
155
            return;
156
        }
157
158
        // If revision can't be found, return error message.
159 1
        if (!isset($ret['revisions'][0])) {
160
            $this->data = [
161
                'error' => $ret['Error'] ?? 'Unknown',
162
            ];
163
            return;
164
        }
165
166 1
        $revId = array_keys($ret['revisions'][0])[0];
167 1
        $revisionData = $ret['revisions'][0][$revId];
168
169 1
        $this->revision = [
170 1
            'id' => $revId,
171 1
            'timestamp' => $revisionData['time'],
172
        ];
173
174 1
        [$counts, $totalCount, $userIds] = $this->countTokens($revisionData['tokens']);
175 1
        $usernameMap = $this->getUsernameMap($userIds);
176
177 1
        if (null !== $this->limit) {
178 1
            $countsToProcess = array_slice($counts, 0, $this->limit, true);
179
        } else {
180
            $countsToProcess = $counts;
181
        }
182
183 1
        $data = [];
184
185
        // Used to get the character count and percentage of the remaining N editors, after the top $this->limit.
186 1
        $percentageSum = 0;
187 1
        $countSum = 0;
188 1
        $numEditors = 0;
189
190
        // Loop through once more, creating an array with the user names (or IP addresses)
191
        // as the key, and the count and percentage as the value.
192 1
        foreach ($countsToProcess as $editor => $count) {
193 1
            if (isset($usernameMap[$editor])) {
194 1
                $index = $usernameMap[$editor];
195
            } else {
196 1
                $index = $editor;
197
            }
198
199 1
            $percentage = round(100 * ($count / $totalCount), 1);
200
201
            // If we are showing > 10 editors in the table, we still only want the top 10 for the chart.
202 1
            if ($numEditors < 10) {
203 1
                $percentageSum += $percentage;
204 1
                $countSum += $count;
205 1
                $numEditors++;
206
            }
207
208 1
            $data[$index] = [
209 1
                'count' => $count,
210 1
                'percentage' => $percentage,
211
            ];
212
        }
213
214 1
        $this->data = [
215 1
            'list' => $data,
216 1
            'totalAuthors' => count($counts),
217 1
            'totalCount' => $totalCount,
218
        ];
219
220
        // Record character count and percentage for the remaining editors.
221 1
        if ($percentageSum < 100) {
222 1
            $this->data['others'] = [
223 1
                'count' => $totalCount - $countSum,
224 1
                'percentage' => round(100 - $percentageSum, 1),
225 1
                'numEditors' => count($counts) - $numEditors,
226
            ];
227
        }
228 1
    }
229
230
    /**
231
     * Get a map of user IDs to usernames, given the IDs.
232
     * @param int[] $userIds
233
     * @return array IDs as keys, usernames as values.
234
     */
235 1
    private function getUsernameMap(array $userIds): array
236
    {
237 1
        if (empty($userIds)) {
238
            return [];
239
        }
240
241 1
        $userIdsNames = $this->getRepository()->getUsernamesFromIds(
0 ignored issues
show
Bug introduced by
The method getUsernamesFromIds() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\AuthorshipRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

241
        $userIdsNames = $this->getRepository()->/** @scrutinizer ignore-call */ getUsernamesFromIds(
Loading history...
242 1
            $this->page->getProject(),
243 1
            $userIds
244
        );
245
246 1
        $usernameMap = [];
247 1
        foreach ($userIdsNames as $userIdName) {
248 1
            $usernameMap[$userIdName['user_id']] = $userIdName['user_name'];
249
        }
250
251 1
        return $usernameMap;
252
    }
253
254
    /**
255
     * Get counts of token lengths for each author. Used in self::prepareData()
256
     * @param array $tokens
257
     * @return array [counts by user, total count, IDs of accounts]
258
     */
259 1
    private function countTokens(array $tokens): array
260
    {
261 1
        $counts = [];
262 1
        $userIds = [];
263 1
        $totalCount = 0;
264
265
        // Loop through the tokens, keeping totals (token length) for each author.
266 1
        foreach ($tokens as $token) {
267 1
            $editor = $token['editor'];
268
269
            // IPs are prefixed with '0|', otherwise it's the user ID.
270 1
            if ('0|' === substr($editor, 0, 2)) {
271 1
                $editor = substr($editor, 2);
272
            } else {
273 1
                $userIds[] = $editor;
274
            }
275
276 1
            if (!isset($counts[$editor])) {
277 1
                $counts[$editor] = 0;
278
            }
279
280 1
            $counts[$editor] += strlen($token['str']);
281 1
            $totalCount += strlen($token['str']);
282
        }
283
284
        // Sort authors by count.
285 1
        arsort($counts);
286
287 1
        return [$counts, $totalCount, $userIds];
288
    }
289
}
290