Test Failed
Push — dependency-injection ( 7565fa )
by MusikAnimal
07:05
created

ArticleInfoApi::getMaxRevisions()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 6
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 3
nc 2
nop 0
dl 0
loc 6
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types = 1);
4
5
namespace App\Model;
6
7
use App\Helper\AutomatedEditsHelper;
8
use App\Helper\I18nHelper;
9
use App\Repository\ArticleInfoRepository;
10
use DateTime;
11
use Symfony\Component\DomCrawler\Crawler;
12
use Symfony\Component\HttpKernel\Exception\HttpException;
13
use Symfony\Component\HttpKernel\Exception\ServiceUnavailableHttpException;
14
15
/**
16
 * An ArticleInfoApi is standalone logic for the Article Info tool. These methods perform SQL queries
17
 * or make API requests and can be called directly, without any knowledge of the child ArticleInfo class.
18
 * It does require that the ArticleInfoRepository be set, however.
19
 * @see ArticleInfo
20
 */
21
class ArticleInfoApi extends Model
22
{
23
    protected AutomatedEditsHelper $autoEditsHelper;
24
    protected I18nHelper $i18n;
25
26
    /** @var int Number of revisions that belong to the page. */
27
    protected int $numRevisions;
28
29
    /** @var array Prose stats, with keys 'characters', 'words', 'references', 'unique_references', 'sections'. */
30
    protected array $proseStats;
31
32
    /** @var array Number of categories, templates and files on the page. */
33
    protected array $transclusionData;
34
35
    /** @var array Various statistics about bots that edited the page. */
36
    protected array $bots;
37
38
    /** @var int Number of edits made to the page by bots. */
39
    protected int $botRevisionCount;
40
41
    /** @var int[] Number of in and outgoing links and redirects to the page. */
42
    protected array $linksAndRedirects;
43
44
    /** @var string[] Assessments of the page (see Page::getAssessments). */
45
    protected array $assessments;
46
47
    /** @var string[] List of Wikidata and Checkwiki errors. */
48
    protected array $bugs;
49
50
    /**
51
     * ArticleInfoApi constructor.
52
     * @param ArticleInfoRepository $repository
53
     * @param I18nHelper $i18n
54
     * @param AutomatedEditsHelper $autoEditsHelper
55
     * @param Page $page The page to process.
56
     * @param false|int $start Start date as Unix timestmap.
57
     * @param false|int $end End date as Unix timestamp.
58
     */
59
    public function __construct(
60
        ArticleInfoRepository $repository,
61
        I18nHelper $i18n,
62
        AutomatedEditsHelper $autoEditsHelper,
63
        Page $page,
64
        $start = false,
65
        $end = false
66
    ) {
67
        $this->repository = $repository;
68
        $this->i18n = $i18n;
69
        $this->autoEditsHelper = $autoEditsHelper;
70
        $this->page = $page;
71
        $this->start = $start;
72
        $this->end = $end;
73
    }
74
75
    /**
76
     * Get the number of revisions belonging to the page.
77
     * @return int
78
     */
79
    public function getNumRevisions(): int
80
    {
81
        if (!isset($this->numRevisions)) {
82
            $this->numRevisions = $this->page->getNumRevisions(null, $this->start, $this->end);
0 ignored issues
show
Bug introduced by
The method getNumRevisions() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

82
            /** @scrutinizer ignore-call */ 
83
            $this->numRevisions = $this->page->getNumRevisions(null, $this->start, $this->end);

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
83
        }
84
        return $this->numRevisions;
85
    }
86
87
    /**
88
     * Are there more revisions than we should process, based on the config?
89
     * @return bool
90
     */
91
    public function tooManyRevisions(): bool
92
    {
93
        return $this->repository->maxPageRevisions > 0 &&
94
            $this->getNumRevisions() > $this->repository->maxPageRevisions;
95
    }
96
97
    /**
98
     * Get various basic info used in the API, including the number of revisions, unique authors, initial author
99
     * and edit count of the initial author. This is combined into one query for better performance. Caching is
100
     * intentionally disabled, because using the gadget, this will get hit for a different page constantly, where
101
     * the likelihood of cache benefiting us is slim.
102
     * @return string[]|false false if the page was not found.
103
     */
104
    public function getBasicEditingInfo()
105
    {
106
        return $this->repository->getBasicEditingInfo($this->page);
0 ignored issues
show
Bug introduced by
The method getBasicEditingInfo() does not exist on App\Repository\Repository. It seems like you code against a sub-type of App\Repository\Repository such as App\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

106
        return $this->repository->/** @scrutinizer ignore-call */ getBasicEditingInfo($this->page);
Loading history...
107
    }
108
109
    /**
110
     * Get the top editors to the page by edit count.
111
     * @param int $limit Default 20, maximum 1,000.
112
     * @param bool $noBots Set to non-false to exclude bots from the result.
113
     * @return array
114
     */
115
    public function getTopEditorsByEditCount(int $limit = 20, bool $noBots = false): array
116
    {
117
        // Quick cache, valid only for the same request.
118
        static $topEditors = null;
119
        if (null !== $topEditors) {
120
            return $topEditors;
121
        }
122
123
        $rows = $this->repository->getTopEditorsByEditCount(
0 ignored issues
show
Bug introduced by
The method getTopEditorsByEditCount() does not exist on App\Repository\Repository. It seems like you code against a sub-type of App\Repository\Repository such as App\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

123
        /** @scrutinizer ignore-call */ 
124
        $rows = $this->repository->getTopEditorsByEditCount(
Loading history...
124
            $this->page,
125
            $this->start,
126
            $this->end,
127
            min($limit, 1000),
128
            $noBots
129
        );
130
131
        $topEditors = [];
132
        $rank = 0;
133
        foreach ($rows as $row) {
134
            $topEditors[] = [
135
                'rank' => ++$rank,
136
                'username' => $row['username'],
137
                'count' => $row['count'],
138
                'minor' => $row['minor'],
139
                'first_edit' => [
140
                    'id' => $row['first_revid'],
141
                    'timestamp' => $row['first_timestamp'],
142
                ],
143
                'latest_edit' => [
144
                    'id' => $row['latest_revid'],
145
                    'timestamp' => $row['latest_timestamp'],
146
                ],
147
            ];
148
        }
149
150
        return $topEditors;
151
    }
152
153
    /**
154
     * Get prose and reference information.
155
     * @return array With keys 'characters', 'words', 'references', 'unique_references'
156
     */
157
    public function getProseStats(): array
158
    {
159
        if (isset($this->proseStats)) {
160
            return $this->proseStats;
161
        }
162
163
        $datetime = is_int($this->end) ? new DateTime("@$this->end") : null;
164
        $html = $this->page->getHTMLContent($datetime);
165
166
        $crawler = new Crawler($html);
167
168
        [$chars, $words] = $this->countCharsAndWords($crawler, '#mw-content-text p');
169
170
        $refs = $crawler->filter('#mw-content-text .reference');
171
        $refContent = [];
172
        $refs->each(function ($ref) use (&$refContent): void {
173
            $refContent[] = $ref->text();
174
        });
175
        $uniqueRefs = count(array_unique($refContent));
176
177
        $sections = count($crawler->filter('#mw-content-text .mw-headline'));
178
179
        $this->proseStats = [
180
            'characters' => $chars,
181
            'words' => $words,
182
            'references' => $refs->count(),
183
            'unique_references' => $uniqueRefs,
184
            'sections' => $sections,
185
        ];
186
        return $this->proseStats;
187
    }
188
189
    /**
190
     * Count the number of characters and words of the plain text within the DOM element matched by the given selector.
191
     * @param Crawler $crawler
192
     * @param string $selector HTML selector.
193
     * @return array [num chars, num words]
194
     */
195
    private function countCharsAndWords(Crawler $crawler, string $selector): array
196
    {
197
        $totalChars = 0;
198
        $totalWords = 0;
199
        $paragraphs = $crawler->filter($selector);
200
        $paragraphs->each(function ($node) use (&$totalChars, &$totalWords): void {
201
            /** @var Crawler $node */
202
            $text = preg_replace('/\[\d+]/', '', trim($node->text(null, true)));
203
            $totalChars += strlen($text);
204
            $totalWords += count(explode(' ', $text));
205
        });
206
207
        return [$totalChars, $totalWords];
208
    }
209
210
    /**
211
     * Get the page assessments of the page.
212
     * @see https://www.mediawiki.org/wiki/Extension:PageAssessments
213
     * @return string[]|false False if unsupported.
214
     * @codeCoverageIgnore
215
     */
216
    public function getAssessments()
217
    {
218
        if (!isset($this->assessments)) {
219
            $this->assessments = $this->page
0 ignored issues
show
Documentation Bug introduced by
It seems like $this->page->getProject(...ssessments($this->page) of type false is incompatible with the declared type string[] of property $assessments.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
220
                ->getProject()
221
                ->getPageAssessments()
222
                ->getAssessments($this->page);
0 ignored issues
show
Bug introduced by
It seems like $this->page can also be of type null; however, parameter $page of App\Model\PageAssessments::getAssessments() does only seem to accept App\Model\Page, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

222
                ->getAssessments(/** @scrutinizer ignore-type */ $this->page);
Loading history...
223
        }
224
        return $this->assessments;
225
    }
226
227
    /**
228
     * Get the list of page's wikidata and Checkwiki errors.
229
     * @see Page::getErrors()
230
     * @return string[]
231
     */
232
    public function getBugs(): array
233
    {
234
        if (!isset($this->bugs)) {
235
            $this->bugs = $this->page->getErrors();
236
        }
237
        return $this->bugs;
238
    }
239
240
    /**
241
     * Get the number of wikidata nad CheckWiki errors.
242
     * @return int
243
     */
244
    public function numBugs(): int
245
    {
246
        return count($this->getBugs());
247
    }
248
249
    /**
250
     * Generate the data structure that will used in the ArticleInfo API response.
251
     * @param Project $project
252
     * @param Page $page
253
     * @return array
254
     * @codeCoverageIgnore
255
     */
256
    public function getArticleInfoApiData(Project $project, Page $page): array
257
    {
258
        /** Number of days to query for pageviews */
259
        $pageviewsOffset = 30;
260
261
        $data = [
262
            'project' => $project->getDomain(),
263
            'page' => $page->getTitle(),
264
            'watchers' => (int) $page->getWatchers(),
265
            'pageviews' => $page->getLastPageviews($pageviewsOffset),
266
            'pageviews_offset' => $pageviewsOffset,
267
        ];
268
269
        $info = false;
0 ignored issues
show
Unused Code introduced by
The assignment to $info is dead and can be removed.
Loading history...
270
271
        try {
272
            $info = $this->repository->getBasicEditingInfo($page);
273
        } catch (ServiceUnavailableHttpException $e) {
274
            // No more open database connections.
275
            $data['error'] = 'Unable to fetch revision data. Please try again later.';
276
        } catch (HttpException $e) {
277
            /**
278
             * The query most likely exceeded the maximum query time,
279
             * so we'll abort and give only info retrieved by the API.
280
             */
281
            $data['error'] = 'Unable to fetch revision data. The query may have timed out.';
282
        }
283
284
        if (false !== $info) {
285
            $creationDateTime = DateTime::createFromFormat('YmdHis', $info['created_at']);
286
            $modifiedDateTime = DateTime::createFromFormat('YmdHis', $info['modified_at']);
287
            $secsSinceLastEdit = (new DateTime)->getTimestamp() - $modifiedDateTime->getTimestamp();
288
289
            // Some wikis (such foundation.wikimedia.org) may be missing the creation date.
290
            $creationDateTime = false === $creationDateTime
291
                ? null
292
                : $creationDateTime->format('Y-m-d');
293
294
            $assessment = $page->getProject()
295
                ->getPageAssessments()
296
                ->getAssessment($page);
297
298
            $data = array_merge($data, [
299
                'revisions' => (int) $info['num_edits'],
300
                'editors' => (int) $info['num_editors'],
301
                'minor_edits' => (int) $info['minor_edits'],
302
                'author' => $info['author'],
303
                'author_editcount' => null === $info['author_editcount'] ? null : (int) $info['author_editcount'],
304
                'created_at' => $creationDateTime,
305
                'created_rev_id' => $info['created_rev_id'],
306
                'modified_at' => $modifiedDateTime->format('Y-m-d H:i'),
307
                'secs_since_last_edit' => $secsSinceLastEdit,
308
                'last_edit_id' => (int) $info['modified_rev_id'],
309
                'assessment' => $assessment,
310
            ]);
311
        }
312
313
        return $data;
314
    }
315
316
    /************************ Link statistics ************************/
317
318
    /**
319
     * Get the number of external links on the page.
320
     * @return int
321
     */
322
    public function linksExtCount(): int
323
    {
324
        return $this->getLinksAndRedirects()['links_ext_count'];
325
    }
326
327
    /**
328
     * Get the number of incoming links to the page.
329
     * @return int
330
     */
331
    public function linksInCount(): int
332
    {
333
        return $this->getLinksAndRedirects()['links_in_count'];
334
    }
335
336
    /**
337
     * Get the number of outgoing links from the page.
338
     * @return int
339
     */
340
    public function linksOutCount(): int
341
    {
342
        return $this->getLinksAndRedirects()['links_out_count'];
343
    }
344
345
    /**
346
     * Get the number of redirects to the page.
347
     * @return int
348
     */
349
    public function redirectsCount(): int
350
    {
351
        return $this->getLinksAndRedirects()['redirects_count'];
352
    }
353
354
    /**
355
     * Get the number of external, incoming and outgoing links, along with the number of redirects to the page.
356
     * @return int[]
357
     * @codeCoverageIgnore
358
     */
359
    private function getLinksAndRedirects(): array
360
    {
361
        if (!isset($this->linksAndRedirects)) {
362
            $this->linksAndRedirects = $this->page->countLinksAndRedirects();
363
        }
364
        return $this->linksAndRedirects;
365
    }
366
367
    /**
368
     * Fetch transclusion data (categories, templates and files) that are on the page.
369
     * @return array With keys 'categories', 'templates' and 'files'.
370
     */
371
    public function getTransclusionData(): array
372
    {
373
        if (!isset($this->transclusionData)) {
374
            $this->transclusionData = $this->repository->getTransclusionData($this->page);
0 ignored issues
show
Bug introduced by
The method getTransclusionData() does not exist on App\Repository\Repository. It seems like you code against a sub-type of App\Repository\Repository such as App\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

374
            /** @scrutinizer ignore-call */ 
375
            $this->transclusionData = $this->repository->getTransclusionData($this->page);
Loading history...
375
        }
376
        return $this->transclusionData;
377
    }
378
379
    /**
380
     * Get the number of categories that are on the page.
381
     * @return int
382
     */
383
    public function getNumCategories(): int
384
    {
385
        return $this->getTransclusionData()['categories'];
386
    }
387
388
    /**
389
     * Get the number of templates that are on the page.
390
     * @return int
391
     */
392
    public function getNumTemplates(): int
393
    {
394
        return $this->getTransclusionData()['templates'];
395
    }
396
397
    /**
398
     * Get the number of files that are on the page.
399
     * @return int
400
     */
401
    public function getNumFiles(): int
402
    {
403
        return $this->getTransclusionData()['files'];
404
    }
405
406
    /************************ Bot statistics ************************/
407
408
    /**
409
     * Number of edits made to the page by current or former bots.
410
     * @param string[] $bots Used only in unit tests, where we supply mock data for the bots that will get processed.
411
     * @return int
412
     */
413
    public function getBotRevisionCount(?array $bots = null): int
414
    {
415
        if (isset($this->botRevisionCount)) {
416
            return $this->botRevisionCount;
417
        }
418
419
        if (null === $bots) {
420
            $bots = $this->getBots();
421
        }
422
423
        $count = 0;
424
425
        foreach (array_values($bots) as $data) {
426
            $count += $data['count'];
427
        }
428
429
        $this->botRevisionCount = $count;
430
        return $count;
431
    }
432
433
    /**
434
     * Get and set $this->bots about bots that edited the page. This is done separately from the main query because
435
     * we use this information when computing the top 10 editors in ArticleInfo, where we don't want to include bots.
436
     * @return array
437
     */
438
    public function getBots(): array
439
    {
440
        if (isset($this->bots)) {
441
            return $this->bots;
442
        }
443
444
        // Parse the bot edits.
445
        $this->bots = [];
446
447
        $limit = $this->tooManyRevisions() ? $this->repository->maxPageRevisions : null;
448
449
        $botData = $this->repository->getBotData($this->page, $this->start, $this->end, $limit);
0 ignored issues
show
Bug introduced by
The method getBotData() does not exist on App\Repository\Repository. It seems like you code against a sub-type of App\Repository\Repository such as App\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

449
        /** @scrutinizer ignore-call */ 
450
        $botData = $this->repository->getBotData($this->page, $this->start, $this->end, $limit);
Loading history...
450
        while ($bot = $botData->fetchAssociative()) {
451
            $this->bots[$bot['username']] = [
452
                'count' => (int)$bot['count'],
453
                'current' => '1' === $bot['current'],
454
            ];
455
        }
456
457
        // Sort by edit count.
458
        uasort($this->bots, function ($a, $b) {
459
            return $b['count'] - $a['count'];
460
        });
461
462
        return $this->bots;
463
    }
464
465
    /**
466
     * Get the number of bots that edited the page.
467
     * @return int
468
     */
469
    public function getNumBots(): int
470
    {
471
        return count($this->getBots());
472
    }
473
}
474