Passed
Push — master ( d0265c...1b1a87 )
by MusikAnimal
07:34
created

Authorship::getRevisionData()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 28
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 3.576

Importance

Changes 0
Metric Value
cc 3
eloc 16
c 0
b 0
f 0
nc 3
nop 1
dl 0
loc 28
ccs 9
cts 15
cp 0.6
crap 3.576
rs 9.7333
1
<?php
2
declare(strict_types=1);
3
4
namespace AppBundle\Model;
5
6
use DateTime;
7
use GuzzleHttp\Exception\RequestException;
8
9
class Authorship extends Model
10
{
11
    /** @const string[] Domain names of wikis supported by WikiWho. */
12
    public const SUPPORTED_PROJECTS = [
13
        'de.wikipedia.org',
14
        'en.wikipedia.org',
15
        'es.wikipedia.org',
16
        'eu.wikipedia.org',
17
        'tr.wikipedia.org',
18
    ];
19
20
    /** @var int|null Target revision ID. Null for latest revision. */
21
    protected $target;
22
23
    /** @var array List of editors and the percentage of the current content that they authored. */
24
    protected $data;
25
26
    /** @var mixed[] Revision that the data pertains to, with keys 'id' and 'timestamp'. */
27
    protected $revision;
28
29
    /**
30
     * ArticleInfo constructor.
31
     * @param Page $page The page to process.
32
     * @param string|null $target Either a revision ID or date in YYYY-MM-DD format. Null to use latest revision.
33
     * @param int $limit Max number of results.
34
     */
35 3
    public function __construct(Page $page, ?string $target = null, ?int $limit = null)
36
    {
37 3
        $this->page = $page;
38 3
        $this->limit = $limit;
39 3
        $this->target = $this->getTargetRevId($target);
40 3
    }
41
42 3
    private function getTargetRevId(?string $target): ?int
43
    {
44 3
        if (null === $target) {
45 3
            return null;
46
        }
47
48
        if (preg_match('/\d{4}-\d{2}-\d{2}/', $target)) {
49
            $date = DateTime::createFromFormat('Y-m-d', $target);
50
            return $this->page->getRevisionIdAtDate($date);
0 ignored issues
show
Bug introduced by
It seems like $date can also be of type false; however, parameter $date of AppBundle\Model\Page::getRevisionIdAtDate() does only seem to accept DateTime, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

50
            return $this->page->getRevisionIdAtDate(/** @scrutinizer ignore-type */ $date);
Loading history...
51
        }
52
53
        return (int)$target;
54
    }
55
56
    /**
57
     * Domains of supported wikis.
58
     * @return string[]
59
     */
60
    public function getSupportedWikis(): array
61
    {
62
        return self::SUPPORTED_PROJECTS;
63
    }
64
65
    /**
66
     * Get the target revision ID. Null for latest revision.
67
     * @return int|null
68
     */
69
    public function getTarget(): ?int
70
    {
71
        return $this->target;
72
    }
73
74
    /**
75
     * Authorship information for the top $this->limit authors.
76
     * @return array
77
     */
78 1
    public function getList(): array
79
    {
80 1
        return $this->data['list'] ?? [];
81
    }
82
83
    /**
84
     * Get error thrown when preparing the data, or null if no error occurred.
85
     * @return string|null
86
     */
87
    public function getError(): ?string
88
    {
89
        return $this->data['error'] ?? null;
90
    }
91
92
    /**
93
     * Get the total number of authors.
94
     * @return int
95
     */
96 1
    public function getTotalAuthors(): int
97
    {
98 1
        return $this->data['totalAuthors'];
99
    }
100
101
    /**
102
     * Get the total number of characters added.
103
     * @return int
104
     */
105 1
    public function getTotalCount(): int
106
    {
107 1
        return $this->data['totalCount'];
108
    }
109
110
    /**
111
     * Get summary data on the 'other' authors who are not in the top $this->limit.
112
     * @return array|null
113
     */
114 1
    public function getOthers(): ?array
115
    {
116 1
        return $this->data['others'] ?? null;
117
    }
118
119
    /**
120
     * Get the revision the authorship data pertains to, with keys 'id' and 'timestamp'.
121
     * @return array|null
122
     */
123
    public function getRevision(): ?array
124
    {
125
        return $this->revision;
126
    }
127
128
    /**
129
     * Is the given page supported by the Authorship tool?
130
     * @param Page $page
131
     * @return bool
132
     */
133
    public static function isSupportedPage(Page $page): bool
134
    {
135
        return in_array($page->getProject()->getDomain(), self::SUPPORTED_PROJECTS) &&
136
            0 === $page->getNamespace();
137
    }
138
139
    /**
140
     * Get the revision data from the WikiWho API and set $this->revision with basic info.
141
     * If there are errors, they are placed in $this->errors and null will be returned.
142
     * @param bool $returnRevId Whether or not to include revision IDs in the response.
143
     * @return array|null null if there were errors.
144
     */
145 2
    protected function getRevisionData(bool $returnRevId = false): ?array
146
    {
147
        try {
148 2
            $ret = $this->getRepository()->getData($this->page, $this->target, $returnRevId);
0 ignored issues
show
Bug introduced by
The method getData() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\AuthorshipRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

148
            $ret = $this->getRepository()->/** @scrutinizer ignore-call */ getData($this->page, $this->target, $returnRevId);
Loading history...
149
        } catch (RequestException $e) {
150
            $this->data = [
151
                'error' => 'unknown',
152
            ];
153
            return null;
154
        }
155
156
        // If revision can't be found, return error message.
157 2
        if (!isset($ret['revisions'][0])) {
158
            $this->data = [
159
                'error' => $ret['Error'] ?? 'Unknown',
160
            ];
161
            return null;
162
        }
163
164 2
        $revId = array_keys($ret['revisions'][0])[0];
165 2
        $revisionData = $ret['revisions'][0][$revId];
166
167 2
        $this->revision = [
168 2
            'id' => $revId,
169 2
            'timestamp' => $revisionData['time'],
170
        ];
171
172 2
        return $revisionData;
173
    }
174
175
    /**
176
     * Get authorship attribution from the WikiWho API.
177
     * @see https://f-squared.org/wikiwho/
178
     */
179 1
    public function prepareData(): void
180
    {
181 1
        if (isset($this->data)) {
182
            return;
183
        }
184
185
        // Set revision data. self::setRevisionData() returns null if there are errors.
186 1
        $revisionData = $this->getRevisionData();
187 1
        if (null === $revisionData) {
188
            return;
189
        }
190
191 1
        [$counts, $totalCount, $userIds] = $this->countTokens($revisionData['tokens']);
192 1
        $usernameMap = $this->getUsernameMap($userIds);
193
194 1
        if (null !== $this->limit) {
195 1
            $countsToProcess = array_slice($counts, 0, $this->limit, true);
196
        } else {
197
            $countsToProcess = $counts;
198
        }
199
200 1
        $data = [];
201
202
        // Used to get the character count and percentage of the remaining N editors, after the top $this->limit.
203 1
        $percentageSum = 0;
204 1
        $countSum = 0;
205 1
        $numEditors = 0;
206
207
        // Loop through once more, creating an array with the user names (or IP addresses)
208
        // as the key, and the count and percentage as the value.
209 1
        foreach ($countsToProcess as $editor => $count) {
210 1
            if (isset($usernameMap[$editor])) {
211 1
                $index = $usernameMap[$editor];
212
            } else {
213 1
                $index = $editor;
214
            }
215
216 1
            $percentage = round(100 * ($count / $totalCount), 1);
217
218
            // If we are showing > 10 editors in the table, we still only want the top 10 for the chart.
219 1
            if ($numEditors < 10) {
220 1
                $percentageSum += $percentage;
221 1
                $countSum += $count;
222 1
                $numEditors++;
223
            }
224
225 1
            $data[$index] = [
226 1
                'count' => $count,
227 1
                'percentage' => $percentage,
228
            ];
229
        }
230
231 1
        $this->data = [
232 1
            'list' => $data,
233 1
            'totalAuthors' => count($counts),
234 1
            'totalCount' => $totalCount,
235
        ];
236
237
        // Record character count and percentage for the remaining editors.
238 1
        if ($percentageSum < 100) {
239 1
            $this->data['others'] = [
240 1
                'count' => $totalCount - $countSum,
241 1
                'percentage' => round(100 - $percentageSum, 1),
242 1
                'numEditors' => count($counts) - $numEditors,
243
            ];
244
        }
245 1
    }
246
247
    /**
248
     * Get a map of user IDs to usernames, given the IDs.
249
     * @param int[] $userIds
250
     * @return array IDs as keys, usernames as values.
251
     */
252 1
    private function getUsernameMap(array $userIds): array
253
    {
254 1
        if (empty($userIds)) {
255
            return [];
256
        }
257
258 1
        $userIdsNames = $this->getRepository()->getUsernamesFromIds(
0 ignored issues
show
Bug introduced by
The method getUsernamesFromIds() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\AuthorshipRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

258
        $userIdsNames = $this->getRepository()->/** @scrutinizer ignore-call */ getUsernamesFromIds(
Loading history...
259 1
            $this->page->getProject(),
260 1
            $userIds
261
        );
262
263 1
        $usernameMap = [];
264 1
        foreach ($userIdsNames as $userIdName) {
265 1
            $usernameMap[$userIdName['user_id']] = $userIdName['user_name'];
266
        }
267
268 1
        return $usernameMap;
269
    }
270
271
    /**
272
     * Get counts of token lengths for each author. Used in self::prepareData()
273
     * @param array $tokens
274
     * @return array [counts by user, total count, IDs of accounts]
275
     */
276 1
    private function countTokens(array $tokens): array
277
    {
278 1
        $counts = [];
279 1
        $userIds = [];
280 1
        $totalCount = 0;
281
282
        // Loop through the tokens, keeping totals (token length) for each author.
283 1
        foreach ($tokens as $token) {
284 1
            $editor = $token['editor'];
285
286
            // IPs are prefixed with '0|', otherwise it's the user ID.
287 1
            if ('0|' === substr($editor, 0, 2)) {
288 1
                $editor = substr($editor, 2);
289
            } else {
290 1
                $userIds[] = $editor;
291
            }
292
293 1
            if (!isset($counts[$editor])) {
294 1
                $counts[$editor] = 0;
295
            }
296
297 1
            $counts[$editor] += strlen($token['str']);
298 1
            $totalCount += strlen($token['str']);
299
        }
300
301
        // Sort authors by count.
302 1
        arsort($counts);
303
304 1
        return [$counts, $totalCount, $userIds];
305
    }
306
}
307