Passed
Push — master ( 16bc58...e2c4be )
by MusikAnimal
05:53
created

Authorship::countTokens()   A

Complexity

Conditions 4
Paths 5

Size

Total Lines 29
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 15
CRAP Score 4

Importance

Changes 0
Metric Value
cc 4
eloc 15
nc 5
nop 1
dl 0
loc 29
ccs 15
cts 15
cp 1
crap 4
rs 9.7666
c 0
b 0
f 0
1
<?php
2
declare(strict_types=1);
3
4
namespace AppBundle\Model;
5
6
use DateTime;
7
8
class Authorship extends Model
9
{
10
    /** @const string[] Domain names of wikis supported by WikiWho. */
11
    public const SUPPORTED_PROJECTS = [
12
        'de.wikipedia.org',
13
        'en.wikipedia.org',
14
        'es.wikipedia.org',
15
        'eu.wikipedia.org',
16
        'tr.wikipedia.org',
17
    ];
18
19
    /** @var int|null Target revision ID. Null for latest revision. */
20
    protected $target;
21
22
    /** @var array List of editors and the percentage of the current content that they authored. */
23
    protected $data;
24
25
    /** @var mixed[] Revision that the data pertains to, with keys 'id' and 'timestamp'. */
26
    protected $revision;
27
28
    /**
29
     * ArticleInfo constructor.
30
     * @param Page $page The page to process.
31
     * @param string|null $target Either a revision ID or date in YYYY-MM-DD format. Null to use latest revision.
32
     * @param int $limit Max number of results.
33
     */
34 1
    public function __construct(Page $page, ?string $target = null, ?int $limit = null)
35
    {
36 1
        $this->page = $page;
37 1
        $this->limit = $limit;
38 1
        $this->target = $this->getTargetRevId($target);
39 1
    }
40
41 1
    private function getTargetRevId(?string $target): ?int
42
    {
43 1
        if (null === $target) {
44 1
            return null;
45
        }
46
47
        if (preg_match('/\d{4}-\d{2}-\d{2}/', $target)) {
48
            $date = DateTime::createFromFormat('Y-m-d', $target);
49
            return $this->page->getRevisionIdAtDate($date);
0 ignored issues
show
Bug introduced by
It seems like $date can also be of type false; however, parameter $date of AppBundle\Model\Page::getRevisionIdAtDate() does only seem to accept DateTime, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

49
            return $this->page->getRevisionIdAtDate(/** @scrutinizer ignore-type */ $date);
Loading history...
50
        }
51
52
        return (int)$target;
53
    }
54
55
    /**
56
     * Domains of supported wikis.
57
     * @return string[]
58
     */
59
    public function getSupportedWikis(): array
60
    {
61
        return self::SUPPORTED_PROJECTS;
62
    }
63
64
    /**
65
     * Get the target revision ID. Null for latest revision.
66
     * @return int|null
67
     */
68
    public function getTarget(): ?int
69
    {
70
        return $this->target;
71
    }
72
73
    /**
74
     * Authorship information for the top $this->limit authors.
75
     * @return array
76
     */
77 1
    public function getList(): array
78
    {
79 1
        return $this->data['list'] ?? [];
80
    }
81
82
    /**
83
     * Get error thrown when preparing the data, or null if no error occurred.
84
     * @return string|null
85
     */
86
    public function getError(): ?string
87
    {
88
        return $this->data['error'] ?? null;
89
    }
90
91
    /**
92
     * Get the total number of authors.
93
     * @return int
94
     */
95 1
    public function getTotalAuthors(): int
96
    {
97 1
        return $this->data['totalAuthors'];
98
    }
99
100
    /**
101
     * Get the total number of characters added.
102
     * @return int
103
     */
104 1
    public function getTotalCount(): int
105
    {
106 1
        return $this->data['totalCount'];
107
    }
108
109
    /**
110
     * Get summary data on the 'other' authors who are not in the top $this->limit.
111
     * @return array|null
112
     */
113 1
    public function getOthers(): ?array
114
    {
115 1
        return $this->data['others'] ?? null;
116
    }
117
118
    /**
119
     * Get the revision the authorship data pertains to, with keys 'id' and 'timestamp'.
120
     * @return array|null
121
     */
122
    public function getRevision(): ?array
123
    {
124
        return $this->revision;
125
    }
126
127
    /**
128
     * Is the given page supported by the Authorship tool?
129
     * @param Page $page
130
     * @return bool
131
     */
132
    public static function isSupportedPage(Page $page): bool
133
    {
134
        return in_array($page->getProject()->getDomain(), self::SUPPORTED_PROJECTS) &&
135
            0 === $page->getNamespace();
136
    }
137
138
    /**
139
     * Get authorship attribution from the WikiWho API.
140
     * @see https://f-squared.org/wikiwho/
141
     */
142 1
    public function prepareData(): void
143
    {
144 1
        if (isset($this->data)) {
145
            return;
146
        }
147
148
        // TODO: check for failures. Should have a success:true
149 1
        $ret = $this->getRepository()->getData($this->page, $this->target);
0 ignored issues
show
Bug introduced by
The method getData() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\AuthorshipRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

149
        $ret = $this->getRepository()->/** @scrutinizer ignore-call */ getData($this->page, $this->target);
Loading history...
150
151
        // If revision can't be found, return error message.
152 1
        if (!isset($ret['revisions'][0])) {
153
            $this->data = [
154
                'error' => $ret['Error'] ?? 'Unknown',
155
            ];
156
            return;
157
        }
158
159 1
        $revId = array_keys($ret['revisions'][0])[0];
160 1
        $revisionData = $ret['revisions'][0][$revId];
161
162 1
        $this->revision = [
163 1
            'id' => $revId,
164 1
            'timestamp' => $revisionData['time'],
165
        ];
166
167 1
        [$counts, $totalCount, $userIds] = $this->countTokens($revisionData['tokens']);
168 1
        $usernameMap = $this->getUsernameMap($userIds);
169
170 1
        if (null !== $this->limit) {
171 1
            $countsToProcess = array_slice($counts, 0, $this->limit, true);
172
        } else {
173
            $countsToProcess = $counts;
174
        }
175
176 1
        $data = [];
177
178
        // Used to get the character count and percentage of the remaining N editors, after the top $this->limit.
179 1
        $percentageSum = 0;
180 1
        $countSum = 0;
181 1
        $numEditors = 0;
182
183
        // Loop through once more, creating an array with the user names (or IP addresses)
184
        // as the key, and the count and percentage as the value.
185 1
        foreach ($countsToProcess as $editor => $count) {
186 1
            if (isset($usernameMap[$editor])) {
187 1
                $index = $usernameMap[$editor];
188
            } else {
189 1
                $index = $editor;
190
            }
191
192 1
            $percentage = round(100 * ($count / $totalCount), 1);
193
194
            // If we are showing > 10 editors in the table, we still only want the top 10 for the chart.
195 1
            if ($numEditors < 10) {
196 1
                $percentageSum += $percentage;
197 1
                $countSum += $count;
198 1
                $numEditors++;
199
            }
200
201 1
            $data[$index] = [
202 1
                'count' => $count,
203 1
                'percentage' => $percentage,
204
            ];
205
        }
206
207 1
        $this->data = [
208 1
            'list' => $data,
209 1
            'totalAuthors' => count($counts),
210 1
            'totalCount' => $totalCount,
211
        ];
212
213
        // Record character count and percentage for the remaining editors.
214 1
        if ($percentageSum < 100) {
215 1
            $this->data['others'] = [
216 1
                'count' => $totalCount - $countSum,
217 1
                'percentage' => round(100 - $percentageSum, 1),
218 1
                'numEditors' => count($counts) - $numEditors,
219
            ];
220
        }
221 1
    }
222
223
    /**
224
     * Get a map of user IDs to usernames, given the IDs.
225
     * @param int[] $userIds
226
     * @return array IDs as keys, usernames as values.
227
     */
228 1
    private function getUsernameMap(array $userIds): array
229
    {
230 1
        if (empty($userIds)) {
231
            return [];
232
        }
233
234 1
        $userIdsNames = $this->getRepository()->getUsernamesFromIds(
0 ignored issues
show
Bug introduced by
The method getUsernamesFromIds() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\AuthorshipRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

234
        $userIdsNames = $this->getRepository()->/** @scrutinizer ignore-call */ getUsernamesFromIds(
Loading history...
235 1
            $this->page->getProject(),
236 1
            $userIds
237
        );
238
239 1
        $usernameMap = [];
240 1
        foreach ($userIdsNames as $userIdName) {
241 1
            $usernameMap[$userIdName['user_id']] = $userIdName['user_name'];
242
        }
243
244 1
        return $usernameMap;
245
    }
246
247
    /**
248
     * Get counts of token lengths for each author. Used in self::prepareData()
249
     * @param array $tokens
250
     * @return array [counts by user, total count, IDs of accounts]
251
     */
252 1
    private function countTokens(array $tokens): array
253
    {
254 1
        $counts = [];
255 1
        $userIds = [];
256 1
        $totalCount = 0;
257
258
        // Loop through the tokens, keeping totals (token length) for each author.
259 1
        foreach ($tokens as $token) {
260 1
            $editor = $token['editor'];
261
262
            // IPs are prefixed with '0|', otherwise it's the user ID.
263 1
            if ('0|' === substr($editor, 0, 2)) {
264 1
                $editor = substr($editor, 2);
265
            } else {
266 1
                $userIds[] = $editor;
267
            }
268
269 1
            if (!isset($counts[$editor])) {
270 1
                $counts[$editor] = 0;
271
            }
272
273 1
            $counts[$editor] += strlen($token['str']);
274 1
            $totalCount += strlen($token['str']);
275
        }
276
277
        // Sort authors by count.
278 1
        arsort($counts);
279
280 1
        return [$counts, $totalCount, $userIds];
281
    }
282
}
283