Passed
Push — master ( c783b5...b14851 )
by MusikAnimal
06:09
created

ArticleInfo::getBotRevisionCount()   A

Complexity

Conditions 4
Paths 5

Size

Total Lines 18
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 4.016

Importance

Changes 0
Metric Value
cc 4
eloc 9
nc 5
nop 1
dl 0
loc 18
ccs 9
cts 10
cp 0.9
crap 4.016
rs 9.9666
c 0
b 0
f 0
1
<?php
2
/**
3
 * This file contains only the ArticleInfo class.
4
 */
5
6
declare(strict_types = 1);
7
8
namespace AppBundle\Model;
9
10
use AppBundle\Helper\I18nHelper;
11
use DateTime;
12
use Doctrine\DBAL\Statement;
13
use Symfony\Component\DependencyInjection\ContainerInterface;
14
use Symfony\Component\DomCrawler\Crawler;
15
16
/**
17
 * An ArticleInfo provides statistics about a page on a project.
18
 */
19
class ArticleInfo extends Model
20
{
21
    /** @var ContainerInterface The application's DI container. */
22
    protected $container;
23
24
    /** @var I18nHelper For i18n and l10n. */
25
    protected $i18n;
26
27
    /** @var int Number of revisions that belong to the page. */
28
    protected $numRevisions;
29
30
    /** @var int Maximum number of revisions to process, as configured. */
31
    protected $maxRevisions;
32
33
    /** @var int Number of revisions that were actually processed. */
34
    protected $numRevisionsProcessed;
35
36
    /**
37
     * Various statistics about editors to the page. These are not User objects
38
     * so as to preserve memory.
39
     * @var mixed[]
40
     */
41
    protected $editors = [];
42
43
    /** @var mixed[] The top 10 editors to the page by number of edits. */
44
    protected $topTenEditorsByEdits;
45
46
    /** @var mixed[] The top 10 editors to the page by added text. */
47
    protected $topTenEditorsByAdded;
48
49
    /** @var int Number of edits made by the top 10 editors. */
50
    protected $topTenCount;
51
52
    /** @var mixed[] Various statistics about bots that edited the page. */
53
    protected $bots;
54
55
    /** @var int Number of edits made to the page by bots. */
56
    protected $botRevisionCount;
57
58
    /** @var mixed[] Various counts about each individual year and month of the page's history. */
59
    protected $yearMonthCounts;
60
61
    /** @var string[] Localized labels for the years, to be used in the 'Year counts' chart. */
62
    protected $yearLabels = [];
63
64
    /** @var string[] Localized labels for the months, to be used in the 'Month counts' chart. */
65
    protected $monthLabels = [];
66
67
    /** @var Edit The first edit to the page. */
68
    protected $firstEdit;
69
70
    /** @var Edit The last edit to the page. */
71
    protected $lastEdit;
72
73
    /** @var Edit Edit that made the largest addition by number of bytes. */
74
    protected $maxAddition;
75
76
    /** @var Edit Edit that made the largest deletion by number of bytes. */
77
    protected $maxDeletion;
78
79
    /** @var int[] Number of in and outgoing links and redirects to the page. */
80
    protected $linksAndRedirects;
81
82
    /** @var string[] Assessments of the page (see Page::getAssessments). */
83
    protected $assessments;
84
85
    /** @var mixed[] Prose stats, with keys 'characters', 'words', 'references', 'unique_references', 'sections'. */
86
    protected $proseStats;
87
88
    /**
89
     * Maximum number of edits that were created across all months. This is used as a comparison
90
     * for the bar charts in the months section.
91
     * @var int
92
     */
93
    protected $maxEditsPerMonth;
94
95
    /** @var string[] List of (semi-)automated tools that were used to edit the page. */
96
    protected $tools;
97
98
    /**
99
     * Total number of bytes added throughout the page's history. This is used as a comparison
100
     * when computing the top 10 editors by added text.
101
     * @var int
102
     */
103
    protected $addedBytes = 0;
104
105
    /** @var int Number of days between first and last edit. */
106
    protected $totalDays;
107
108
    /** @var int Number of minor edits to the page. */
109
    protected $minorCount = 0;
110
111
    /** @var int Number of anonymous edits to the page. */
112
    protected $anonCount = 0;
113
114
    /** @var int Number of automated edits to the page. */
115
    protected $automatedCount = 0;
116
117
    /** @var int Number of edits to the page that were reverted with the subsequent edit. */
118
    protected $revertCount = 0;
119
120
    /** @var int[] The "edits per <time>" counts. */
121
    protected $countHistory = [
122
        'day' => 0,
123
        'week' => 0,
124
        'month' => 0,
125
        'year' => 0,
126
    ];
127
128
    /** @var string[] List of wikidata and Checkwiki errors. */
129
    protected $bugs;
130
131
    /** @var array Number of categories, templates and files on the page. */
132
    protected $transclusionData;
133
134
    /**
135
     * ArticleInfo constructor.
136
     * @param Page $page The page to process.
137
     * @param ContainerInterface $container The DI container.
138
     * @param false|int $start From what date to obtain records.
139
     * @param false|int $end To what date to obtain records.
140
     */
141 12
    public function __construct(Page $page, ContainerInterface $container, $start = false, $end = false)
142
    {
143 12
        $this->page = $page;
144 12
        $this->container = $container;
145 12
        $this->start = $start;
146 12
        $this->end = $end;
147 12
    }
148
149
    /**
150
     * Make the I18nHelper accessible to ArticleInfo.
151
     * @param I18nHelper $i18n
152
     * @codeCoverageIgnore
153
     */
154
    public function setI18nHelper(I18nHelper $i18n): void
155
    {
156
        $this->i18n = $i18n;
157
    }
158
159
    /**
160
     * Get date opening date range, formatted as this is used in the views.
161
     * @return string Blank if no value exists.
162
     */
163 1
    public function getStartDate(): string
164
    {
165 1
        return '' == $this->start ? '' : date('Y-m-d', $this->start);
0 ignored issues
show
Bug introduced by
It seems like $this->start can also be of type boolean and string; however, parameter $timestamp of date() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

165
        return '' == $this->start ? '' : date('Y-m-d', /** @scrutinizer ignore-type */ $this->start);
Loading history...
166
    }
167
168
    /**
169
     * Get date closing date range, formatted as this is used in the views.
170
     * @return string Blank if no value exists.
171
     */
172 1
    public function getEndDate(): string
173
    {
174 1
        return '' == $this->end ? '' : date('Y-m-d', $this->end);
0 ignored issues
show
Bug introduced by
It seems like $this->end can also be of type boolean and string; however, parameter $timestamp of date() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

174
        return '' == $this->end ? '' : date('Y-m-d', /** @scrutinizer ignore-type */ $this->end);
Loading history...
175
    }
176
177
    /**
178
     * Get the day of last date we should show in the month/year sections,
179
     * based on $this->end or the current date.
180
     * @return int As Unix timestamp.
181
     */
182 4
    private function getLastDay(): int
183
    {
184 4
        if (false !== $this->end) {
185
            return (new DateTime('@'.$this->end))
0 ignored issues
show
Bug introduced by
Are you sure $this->end of type integer|string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

185
            return (new DateTime('@'./** @scrutinizer ignore-type */ $this->end))
Loading history...
186
                ->modify('last day of this month')
187
                ->getTimestamp();
188
        } else {
189 4
            return strtotime('last day of this month');
190
        }
191
    }
192
193
    /**
194
     * Return the start/end date values as associative array, with YYYY-MM-DD as the date format.
195
     * This is used mainly as a helper to pass to the pageviews Twig macros.
196
     * @return array
197
     */
198 1
    public function getDateParams(): array
199
    {
200 1
        if (!$this->hasDateRange()) {
201
            return [];
202
        }
203
204
        $ret = [
205 1
            'start' => $this->firstEdit->getTimestamp()->format('Y-m-d'),
206 1
            'end' => $this->lastEdit->getTimestamp()->format('Y-m-d'),
207
        ];
208
209 1
        if (false !== $this->start) {
210 1
            $ret['start'] = date('Y-m-d', $this->start);
0 ignored issues
show
Bug introduced by
It seems like $this->start can also be of type string and true; however, parameter $timestamp of date() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

210
            $ret['start'] = date('Y-m-d', /** @scrutinizer ignore-type */ $this->start);
Loading history...
211
        }
212 1
        if (false !== $this->end) {
213 1
            $ret['end'] = date('Y-m-d', $this->end);
214
        }
215
216 1
        return $ret;
217
    }
218
219
    /**
220
     * Get the number of revisions belonging to the page.
221
     * @return int
222
     */
223 4
    public function getNumRevisions(): int
224
    {
225 4
        if (!isset($this->numRevisions)) {
226 4
            $this->numRevisions = $this->page->getNumRevisions(null, $this->start, $this->end);
0 ignored issues
show
Bug introduced by
It seems like $this->end can also be of type string; however, parameter $end of AppBundle\Model\Page::getNumRevisions() does only seem to accept false|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

226
            $this->numRevisions = $this->page->getNumRevisions(null, $this->start, /** @scrutinizer ignore-type */ $this->end);
Loading history...
Bug introduced by
It seems like $this->start can also be of type string; however, parameter $start of AppBundle\Model\Page::getNumRevisions() does only seem to accept false|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

226
            $this->numRevisions = $this->page->getNumRevisions(null, /** @scrutinizer ignore-type */ $this->start, $this->end);
Loading history...
227
        }
228 4
        return $this->numRevisions;
229
    }
230
231
    /**
232
     * Get the maximum number of revisions that we should process.
233
     * @return int
234
     */
235 3
    public function getMaxRevisions(): int
236
    {
237 3
        if (!isset($this->maxRevisions)) {
238 3
            $this->maxRevisions = (int) $this->container->getParameter('app.max_page_revisions');
239
        }
240 3
        return $this->maxRevisions;
241
    }
242
243
    /**
244
     * Get the number of revisions that are actually getting processed. This goes by the app.max_page_revisions
245
     * parameter, or the actual number of revisions, whichever is smaller.
246
     * @return int
247
     */
248 3
    public function getNumRevisionsProcessed(): int
249
    {
250 3
        if (isset($this->numRevisionsProcessed)) {
251 1
            return $this->numRevisionsProcessed;
252
        }
253
254 2
        if ($this->tooManyRevisions()) {
255 1
            $this->numRevisionsProcessed = $this->getMaxRevisions();
256
        } else {
257 1
            $this->numRevisionsProcessed = $this->getNumRevisions();
258
        }
259
260 2
        return $this->numRevisionsProcessed;
261
    }
262
263
    /**
264
     * Are there more revisions than we should process, based on the config?
265
     * @return bool
266
     */
267 3
    public function tooManyRevisions(): bool
268
    {
269 3
        return $this->getMaxRevisions() > 0 && $this->getNumRevisions() > $this->getMaxRevisions();
270
    }
271
272
    /**
273
     * Fetch and store all the data we need to show the ArticleInfo view.
274
     * @codeCoverageIgnore
275
     */
276
    public function prepareData(): void
277
    {
278
        $this->parseHistory();
279
        $this->setLogsEvents();
280
281
        // Bots need to be set before setting top 10 counts.
282
        $this->setBots();
283
284
        $this->doPostPrecessing();
285
    }
286
287
    /**
288
     * Get the number of editors that edited the page.
289
     * @return int
290
     */
291 1
    public function getNumEditors(): int
292
    {
293 1
        return count($this->editors);
294
    }
295
296
    /**
297
     * Get the number of bots that edited the page.
298
     * @return int
299
     */
300
    public function getNumBots(): int
301
    {
302
        return count($this->getBots());
303
    }
304
305
    /**
306
     * Get the number of days between the first and last edit.
307
     * @return int
308
     */
309 1
    public function getTotalDays(): int
310
    {
311 1
        if (isset($this->totalDays)) {
312 1
            return $this->totalDays;
313
        }
314 1
        $dateFirst = $this->firstEdit->getTimestamp();
315 1
        $dateLast = $this->lastEdit->getTimestamp();
316 1
        $interval = date_diff($dateLast, $dateFirst, true);
317 1
        $this->totalDays = (int)$interval->format('%a');
318 1
        return $this->totalDays;
319
    }
320
321
    /**
322
     * Returns length of the page.
323
     * @return int
324
     */
325 1
    public function getLength(): int
326
    {
327 1
        if ($this->hasDateRange()) {
328 1
            return $this->lastEdit->getLength();
329
        }
330
331
        return $this->page->getLength();
332
    }
333
334
    /**
335
     * Get the average number of days between edits to the page.
336
     * @return float
337
     */
338 1
    public function averageDaysPerEdit(): float
339
    {
340 1
        return round($this->getTotalDays() / $this->getNumRevisionsProcessed(), 1);
341
    }
342
343
    /**
344
     * Get the average number of edits per day to the page.
345
     * @return float
346
     */
347 1
    public function editsPerDay(): float
348
    {
349 1
        $editsPerDay = $this->getTotalDays()
350 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / (365 / 12 / 24))
351 1
            : 0;
352 1
        return round($editsPerDay, 1);
353
    }
354
355
    /**
356
     * Get the average number of edits per month to the page.
357
     * @return float
358
     */
359 1
    public function editsPerMonth(): float
360
    {
361 1
        $editsPerMonth = $this->getTotalDays()
362 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / (365 / 12))
363 1
            : 0;
364 1
        return min($this->getNumRevisionsProcessed(), round($editsPerMonth, 1));
365
    }
366
367
    /**
368
     * Get the average number of edits per year to the page.
369
     * @return float
370
     */
371 1
    public function editsPerYear(): float
372
    {
373 1
        $editsPerYear = $this->getTotalDays()
374 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / 365)
375 1
            : 0;
376 1
        return min($this->getNumRevisionsProcessed(), round($editsPerYear, 1));
377
    }
378
379
    /**
380
     * Get the average number of edits per editor.
381
     * @return float
382
     */
383 1
    public function editsPerEditor(): float
384
    {
385 1
        return round($this->getNumRevisionsProcessed() / count($this->editors), 1);
386
    }
387
388
    /**
389
     * Get the percentage of minor edits to the page.
390
     * @return float
391
     */
392 1
    public function minorPercentage(): float
393
    {
394 1
        return round(
395 1
            ($this->minorCount / $this->getNumRevisionsProcessed()) * 100,
396 1
            1
397
        );
398
    }
399
400
    /**
401
     * Get the percentage of anonymous edits to the page.
402
     * @return float
403
     */
404 1
    public function anonPercentage(): float
405
    {
406 1
        return round(
407 1
            ($this->anonCount / $this->getNumRevisionsProcessed()) * 100,
408 1
            1
409
        );
410
    }
411
412
    /**
413
     * Get the percentage of edits made by the top 10 editors.
414
     * @return float
415
     */
416 1
    public function topTenPercentage(): float
417
    {
418 1
        return round(($this->topTenCount / $this->getNumRevisionsProcessed()) * 100, 1);
419
    }
420
421
    /**
422
     * Get the number of times the page has been viewed in the given timeframe. If the ArticleInfo instance has a
423
     * date range, it is used instead of the value of the $latest parameter.
424
     * @param  int $latest Last N days.
425
     * @return int
426
     */
427
    public function getPageviews(int $latest): int
428
    {
429
        if (!$this->hasDateRange()) {
430
            return $this->page->getLastPageviews($latest);
431
        }
432
433
        $daterange = $this->getDateParams();
434
        return $this->page->getPageviews($daterange['start'], $daterange['end']);
435
    }
436
437
    /**
438
     * Get the page assessments of the page.
439
     * @see https://www.mediawiki.org/wiki/Extension:PageAssessments
440
     * @return string[]|false False if unsupported.
441
     * @codeCoverageIgnore
442
     */
443
    public function getAssessments()
444
    {
445
        if (!is_array($this->assessments)) {
0 ignored issues
show
introduced by
The condition is_array($this->assessments) is always true.
Loading history...
446
            $this->assessments = $this->page
447
                ->getProject()
448
                ->getPageAssessments()
449
                ->getAssessments($this->page);
450
        }
451
        return $this->assessments;
452
    }
453
454
    /**
455
     * Get the number of automated edits made to the page.
456
     * @return int
457
     */
458 1
    public function getAutomatedCount(): int
459
    {
460 1
        return $this->automatedCount;
461
    }
462
463
    /**
464
     * Get the number of edits to the page that were reverted with the subsequent edit.
465
     * @return int
466
     */
467 1
    public function getRevertCount(): int
468
    {
469 1
        return $this->revertCount;
470
    }
471
472
    /**
473
     * Get the number of edits to the page made by logged out users.
474
     * @return int
475
     */
476 1
    public function getAnonCount(): int
477
    {
478 1
        return $this->anonCount;
479
    }
480
481
    /**
482
     * Get the number of minor edits to the page.
483
     * @return int
484
     */
485 1
    public function getMinorCount(): int
486
    {
487 1
        return $this->minorCount;
488
    }
489
490
    /**
491
     * Get the number of edits to the page made in the past day, week, month and year.
492
     * @return int[] With keys 'day', 'week', 'month' and 'year'.
493
     */
494
    public function getCountHistory(): array
495
    {
496
        return $this->countHistory;
497
    }
498
499
    /**
500
     * Get the number of edits to the page made by the top 10 editors.
501
     * @return int
502
     */
503 1
    public function getTopTenCount(): int
504
    {
505 1
        return $this->topTenCount;
506
    }
507
508
    /**
509
     * Get the top editors to the page by edit count.
510
     * @param int $limit Default 20, maximum 1,000.
511
     * @param bool $noBots Set to non-false to exclude bots from the result.
512
     * @return array
513
     */
514
    public function getTopEditorsByEditCount(int $limit = 20, bool $noBots = false): array
515
    {
516
        // Quick cache, valid only for the same request.
517
        static $topEditors = null;
518
        if (null !== $topEditors) {
519
            return $topEditors;
520
        }
521
522
        $rows = $this->getRepository()->getTopEditorsByEditCount(
0 ignored issues
show
Bug introduced by
The method getTopEditorsByEditCount() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

522
        $rows = $this->getRepository()->/** @scrutinizer ignore-call */ getTopEditorsByEditCount(
Loading history...
523
            $this->page,
524
            $this->start,
525
            $this->end,
526
            min($limit, 1000),
527
            $noBots
528
        );
529
530
        $topEditors = [];
531
        $rank = 0;
532
        foreach ($rows as $row) {
533
            $topEditors[] = [
534
                'rank' => ++$rank,
535
                'username' => $row['username'],
536
                'count' => $row['count'],
537
                'minor' => $row['minor'],
538
                'first_edit' => [
539
                    'id' => $row['first_revid'],
540
                    'timestamp' => $row['first_timestamp'],
541
                ],
542
                'latest_edit' => [
543
                    'id' => $row['latest_revid'],
544
                    'timestamp' => $row['latest_timestamp'],
545
                ],
546
            ];
547
        }
548
549
        return $topEditors;
550
    }
551
552
    /**
553
     * Get the first edit to the page.
554
     * @return Edit
555
     */
556 1
    public function getFirstEdit(): Edit
557
    {
558 1
        return $this->firstEdit;
559
    }
560
561
    /**
562
     * Get the last edit to the page.
563
     * @return Edit
564
     */
565 1
    public function getLastEdit(): Edit
566
    {
567 1
        return $this->lastEdit;
568
    }
569
570
    /**
571
     * Get the edit that made the largest addition to the page (by number of bytes).
572
     * @return Edit|null
573
     */
574 1
    public function getMaxAddition(): ?Edit
575
    {
576 1
        return $this->maxAddition;
577
    }
578
579
    /**
580
     * Get the edit that made the largest removal to the page (by number of bytes).
581
     * @return Edit|null
582
     */
583 1
    public function getMaxDeletion(): ?Edit
584
    {
585 1
        return $this->maxDeletion;
586
    }
587
588
    /**
589
     * Get the list of editors to the page, including various statistics.
590
     * @return mixed[]
591
     */
592 1
    public function getEditors(): array
593
    {
594 1
        return $this->editors;
595
    }
596
597
    /**
598
     * Get the list of the top editors to the page (by edits), including various statistics.
599
     * @return mixed[]
600
     */
601 1
    public function topTenEditorsByEdits(): array
602
    {
603 1
        return $this->topTenEditorsByEdits;
604
    }
605
606
    /**
607
     * Get the list of the top editors to the page (by added text), including various statistics.
608
     * @return mixed[]
609
     */
610 1
    public function topTenEditorsByAdded(): array
611
    {
612 1
        return $this->topTenEditorsByAdded;
613
    }
614
615
    /**
616
     * Get various counts about each individual year and month of the page's history.
617
     * @return mixed[]
618
     */
619 2
    public function getYearMonthCounts(): array
620
    {
621 2
        return $this->yearMonthCounts;
622
    }
623
624
    /**
625
     * Get the localized labels for the 'Year counts' chart.
626
     * @return string[]
627
     */
628
    public function getYearLabels(): array
629
    {
630
        return $this->yearLabels;
631
    }
632
633
    /**
634
     * Get the localized labels for the 'Month counts' chart.
635
     * @return string[]
636
     */
637
    public function getMonthLabels(): array
638
    {
639
        return $this->monthLabels;
640
    }
641
642
    /**
643
     * Get the maximum number of edits that were created across all months. This is used as a
644
     * comparison for the bar charts in the months section.
645
     * @return int
646
     */
647 1
    public function getMaxEditsPerMonth(): int
648
    {
649 1
        return $this->maxEditsPerMonth;
650
    }
651
652
    /**
653
     * Get a list of (semi-)automated tools that were used to edit the page, including
654
     * the number of times they were used, and a link to the tool's homepage.
655
     * @return string[]
656
     */
657 1
    public function getTools(): array
658
    {
659 1
        return $this->tools;
660
    }
661
662
    /**
663
     * Get the list of page's wikidata and Checkwiki errors.
664
     * @see Page::getErrors()
665
     * @return string[]
666
     */
667
    public function getBugs(): array
668
    {
669
        if (!is_array($this->bugs)) {
0 ignored issues
show
introduced by
The condition is_array($this->bugs) is always true.
Loading history...
670
            $this->bugs = $this->page->getErrors();
671
        }
672
        return $this->bugs;
673
    }
674
675
    /**
676
     * Get the number of wikidata nad CheckWiki errors.
677
     * @return int
678
     */
679
    public function numBugs(): int
680
    {
681
        return count($this->getBugs());
682
    }
683
684
    /**
685
     * Get the number of external links on the page.
686
     * @return int
687
     */
688 1
    public function linksExtCount(): int
689
    {
690 1
        return $this->getLinksAndRedirects()['links_ext_count'];
691
    }
692
693
    /**
694
     * Get the number of incoming links to the page.
695
     * @return int
696
     */
697 1
    public function linksInCount(): int
698
    {
699 1
        return $this->getLinksAndRedirects()['links_in_count'];
700
    }
701
702
    /**
703
     * Get the number of outgoing links from the page.
704
     * @return int
705
     */
706 1
    public function linksOutCount(): int
707
    {
708 1
        return $this->getLinksAndRedirects()['links_out_count'];
709
    }
710
711
    /**
712
     * Get the number of redirects to the page.
713
     * @return int
714
     */
715 1
    public function redirectsCount(): int
716
    {
717 1
        return $this->getLinksAndRedirects()['redirects_count'];
718
    }
719
720
    /**
721
     * Get the number of external, incoming and outgoing links, along with the number of redirects to the page.
722
     * @return int[]
723
     * @codeCoverageIgnore
724
     */
725
    private function getLinksAndRedirects(): array
726
    {
727
        if (!is_array($this->linksAndRedirects)) {
0 ignored issues
show
introduced by
The condition is_array($this->linksAndRedirects) is always true.
Loading history...
728
            $this->linksAndRedirects = $this->page->countLinksAndRedirects();
729
        }
730
        return $this->linksAndRedirects;
731
    }
732
733
    /**
734
     * Parse the revision history, collecting our core statistics.
735
     *
736
     * Untestable because it relies on getting a PDO statement. All the important
737
     * logic lives in other methods which are tested.
738
     * @codeCoverageIgnore
739
     */
740
    private function parseHistory(): void
741
    {
742
        if ($this->tooManyRevisions()) {
743
            $limit = $this->getMaxRevisions();
744
        } else {
745
            $limit = null;
746
        }
747
748
        // Third parameter is ignored if $limit is null.
749
        $revStmt = $this->page->getRevisionsStmt(
750
            null,
751
            $limit,
752
            $this->getNumRevisions(),
753
            $this->start,
0 ignored issues
show
Bug introduced by
It seems like $this->start can also be of type string; however, parameter $start of AppBundle\Model\Page::getRevisionsStmt() does only seem to accept false|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

753
            /** @scrutinizer ignore-type */ $this->start,
Loading history...
754
            $this->end
0 ignored issues
show
Bug introduced by
It seems like $this->end can also be of type string; however, parameter $end of AppBundle\Model\Page::getRevisionsStmt() does only seem to accept false|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

754
            /** @scrutinizer ignore-type */ $this->end
Loading history...
755
        );
756
        $revCount = 0;
757
758
        /**
759
         * Data about previous edits so that we can use them as a basis for comparison.
760
         * @var Edit[]
761
         */
762
        $prevEdits = [
763
            // The previous Edit, used to discount content that was reverted.
764
            'prev' => null,
765
766
            // The SHA-1 of the edit *before* the previous edit. Used for more
767
            // accurate revert detection.
768
            'prevSha' => null,
769
770
            // The last edit deemed to be the max addition of content. This is kept track of
771
            // in case we find out the next edit was reverted (and was also a max edit),
772
            // in which case we'll want to discount it and use this one instead.
773
            'maxAddition' => null,
774
775
            // Same as with maxAddition, except the maximum amount of content deleted.
776
            // This is used to discount content that was reverted.
777
            'maxDeletion' => null,
778
        ];
779
780
        while ($rev = $revStmt->fetch()) {
781
            $edit = new Edit($this->page, $rev);
782
783
            if (0 === $revCount) {
784
                $this->firstEdit = $edit;
785
            }
786
787
            // Sometimes, with old revisions (2001 era), the revisions from 2002 come before 2001
788
            if ($edit->getTimestamp() < $this->firstEdit->getTimestamp()) {
789
                $this->firstEdit = $edit;
790
            }
791
792
            $prevEdits = $this->updateCounts($edit, $prevEdits);
793
794
            $revCount++;
795
        }
796
797
        $this->numRevisionsProcessed = $revCount;
798
799
        // Various sorts
800
        arsort($this->editors);
801
        ksort($this->yearMonthCounts);
802
        if ($this->tools) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->tools of type string[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
803
            arsort($this->tools);
804
        }
805
    }
806
807
    /**
808
     * Update various counts based on the current edit.
809
     * @param Edit $edit
810
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'
811
     * @return Edit[] Updated version of $prevEdits.
812
     */
813 4
    private function updateCounts(Edit $edit, array $prevEdits): array
814
    {
815
        // Update the counts for the year and month of the current edit.
816 4
        $this->updateYearMonthCounts($edit);
817
818
        // Update counts for the user who made the edit.
819 4
        $this->updateUserCounts($edit);
820
821
        // Update the year/month/user counts of anon and minor edits.
822 4
        $this->updateAnonMinorCounts($edit);
823
824
        // Update counts for automated tool usage, if applicable.
825 4
        $this->updateToolCounts($edit);
826
827
        // Increment "edits per <time>" counts
828 4
        $this->updateCountHistory($edit);
829
830
        // Update figures regarding content addition/removal, and the revert count.
831 4
        $prevEdits = $this->updateContentSizes($edit, $prevEdits);
832
833
        // Now that we've updated all the counts, we can reset
834
        // the prev and last edits, which are used for tracking.
835
        // But first, let's copy over the SHA of the actual previous edit
836
        // and put it in our $prevEdits['prev'], so that we'll know
837
        // that content added after $prevEdit['prev'] was reverted.
838 4
        if (null !== $prevEdits['prev']) {
839 4
            $prevEdits['prevSha'] = $prevEdits['prev']->getSha();
840
        }
841 4
        $prevEdits['prev'] = $edit;
842 4
        $this->lastEdit = $edit;
843
844 4
        return $prevEdits;
845
    }
846
847
    /**
848
     * Update various figures about content sizes based on the given edit.
849
     * @param Edit $edit
850
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
851
     * @return Edit[] Updated version of $prevEdits.
852
     */
853 4
    private function updateContentSizes(Edit &$edit, array $prevEdits): array
854
    {
855
        // Check if it was a revert
856 4
        if ($this->isRevert($edit, $prevEdits)) {
857 4
            $edit->setReverted(true);
858 4
            return $this->updateContentSizesRevert($prevEdits);
859
        } else {
860 4
            return $this->updateContentSizesNonRevert($edit, $prevEdits);
861
        }
862
    }
863
864
    /**
865
     * Is the given Edit a revert?
866
     * @param Edit $edit
867
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
868
     * @return bool
869
     */
870 4
    private function isRevert(Edit $edit, array $prevEdits): bool
871
    {
872 4
        return $edit->getSha() === $prevEdits['prevSha'] || $edit->isRevert($this->container);
873
    }
874
875
    /**
876
     * Updates the figures on content sizes assuming the given edit was a revert of the previous one.
877
     * In such a case, we don't want to treat the previous edit as legit content addition or removal.
878
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
879
     * @return Edit[] Updated version of $prevEdits, for tracking.
880
     */
881 4
    private function updateContentSizesRevert(array $prevEdits): array
882
    {
883 4
        $this->revertCount++;
884
885
        // Adjust addedBytes given this edit was a revert of the previous one.
886 4
        if ($prevEdits['prev'] && !$prevEdits['prev']->isReverted() && $prevEdits['prev']->getSize() > 0) {
887
            $this->addedBytes -= $prevEdits['prev']->getSize();
888
889
            // Also deduct from the user's individual added byte count.
890
            // We don't do this if the previous edit was reverted, since that would make the net bytes zero.
891
            if ($prevEdits['prev']->getUser()) {
892
                $username = $prevEdits['prev']->getUser()->getUsername();
893
                $this->editors[$username]['added'] -= $prevEdits['prev']->getSize();
894
            }
895
        }
896
897
        // @TODO: Test this against an edit war (use your sandbox).
898
        // Also remove as max added or deleted, if applicable.
899 4
        if ($this->maxAddition && $prevEdits['prev']->getId() === $this->maxAddition->getId()) {
900
            $this->maxAddition = $prevEdits['maxAddition'];
901
            $prevEdits['maxAddition'] = $prevEdits['prev']; // In the event of edit wars.
902 4
        } elseif ($this->maxDeletion && $prevEdits['prev']->getId() === $this->maxDeletion->getId()) {
903 4
            $this->maxDeletion = $prevEdits['maxDeletion'];
904 4
            $prevEdits['maxDeletion'] = $prevEdits['prev']; // In the event of edit wars.
905
        }
906
907 4
        return $prevEdits;
908
    }
909
910
    /**
911
     * Updates the figures on content sizes assuming the given edit was NOT a revert of the previous edit.
912
     * @param Edit $edit
913
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
914
     * @return Edit[] Updated version of $prevEdits, for tracking.
915
     */
916 4
    private function updateContentSizesNonRevert(Edit $edit, array $prevEdits): array
917
    {
918 4
        $editSize = $this->getEditSize($edit, $prevEdits);
919
920
        // Edit was not a revert, so treat size > 0 as content added.
921 4
        if ($editSize > 0) {
922 4
            $this->addedBytes += $editSize;
923
924 4
            if ($edit->getUser()) {
925 4
                $this->editors[$edit->getUser()->getUsername()]['added'] += $editSize;
926
            }
927
928
            // Keep track of edit with max addition.
929 4
            if (!$this->maxAddition || $editSize > $this->maxAddition->getSize()) {
930
                // Keep track of old maxAddition in case we find out the next $edit was reverted
931
                // (and was also a max edit), in which case we'll want to use this one ($edit).
932 4
                $prevEdits['maxAddition'] = $this->maxAddition;
933
934 4
                $this->maxAddition = $edit;
935
            }
936 4
        } elseif ($editSize < 0 && (!$this->maxDeletion || $editSize < $this->maxDeletion->getSize())) {
937
            // Keep track of old maxDeletion in case we find out the next edit was reverted
938
            // (and was also a max deletion), in which case we'll want to use this one.
939 4
            $prevEdits['maxDeletion'] = $this->maxDeletion;
940
941 4
            $this->maxDeletion = $edit;
942
        }
943
944 4
        return $prevEdits;
945
    }
946
947
    /**
948
     * Get the size of the given edit, based on the previous edit (if present).
949
     * We also don't return the actual edit size if last revision had a length of null.
950
     * This happens when the edit follows other edits that were revision-deleted.
951
     * @see T148857 for more information.
952
     * @todo Remove once T101631 is resolved.
953
     * @param Edit $edit
954
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
955
     * @return int
956
     */
957 4
    private function getEditSize(Edit $edit, array $prevEdits): int
958
    {
959 4
        if ($prevEdits['prev'] && null === $prevEdits['prev']->getLength()) {
0 ignored issues
show
introduced by
The condition null === $prevEdits['prev']->getLength() is always false.
Loading history...
960
            return 0;
961
        } else {
962 4
            return $edit->getSize();
963
        }
964
    }
965
966
    /**
967
     * Update counts of automated tool usage for the given edit.
968
     * @param Edit $edit
969
     */
970 4
    private function updateToolCounts(Edit $edit): void
971
    {
972 4
        $automatedTool = $edit->getTool($this->container);
973
974 4
        if (false === $automatedTool) {
975
            // Nothing to do.
976 4
            return;
977
        }
978
979 4
        $editYear = $edit->getYear();
980 4
        $editMonth = $edit->getMonth();
981
982 4
        $this->automatedCount++;
983 4
        $this->yearMonthCounts[$editYear]['automated']++;
984 4
        $this->yearMonthCounts[$editYear]['months'][$editMonth]['automated']++;
985
986 4
        if (!isset($this->tools[$automatedTool['name']])) {
987 4
            $this->tools[$automatedTool['name']] = [
988 4
                'count' => 1,
989 4
                'link' => $automatedTool['link'],
990
            ];
991
        } else {
992
            $this->tools[$automatedTool['name']]['count']++;
993
        }
994 4
    }
995
996
    /**
997
     * Update various counts for the year and month of the given edit.
998
     * @param Edit $edit
999
     */
1000 4
    private function updateYearMonthCounts(Edit $edit): void
1001
    {
1002 4
        $editYear = $edit->getYear();
1003 4
        $editMonth = $edit->getMonth();
1004
1005
        // Fill in the blank arrays for the year and 12 months if needed.
1006 4
        if (!isset($this->yearMonthCounts[$editYear])) {
1007 4
            $this->addYearMonthCountEntry($edit);
1008
        }
1009
1010
        // Increment year and month counts for all edits
1011 4
        $this->yearMonthCounts[$editYear]['all']++;
1012 4
        $this->yearMonthCounts[$editYear]['months'][$editMonth]['all']++;
1013
        // This will ultimately be the size of the page by the end of the year
1014 4
        $this->yearMonthCounts[$editYear]['size'] = (int) $edit->getLength();
1015
1016
        // Keep track of which month had the most edits
1017 4
        $editsThisMonth = $this->yearMonthCounts[$editYear]['months'][$editMonth]['all'];
1018 4
        if ($editsThisMonth > $this->maxEditsPerMonth) {
1019 4
            $this->maxEditsPerMonth = $editsThisMonth;
1020
        }
1021 4
    }
1022
1023
    /**
1024
     * Add a new entry to $this->yearMonthCounts for the given year,
1025
     * with blank values for each month. This called during self::parseHistory().
1026
     * @param Edit $edit
1027
     */
1028 4
    private function addYearMonthCountEntry(Edit $edit): void
1029
    {
1030 4
        $this->yearLabels[] = $this->i18n->dateFormat($edit->getTimestamp(), 'yyyy');
1031 4
        $editYear = $edit->getYear();
1032
1033
        // Beginning of the month at 00:00:00.
1034 4
        $firstEditTime = mktime(0, 0, 0, (int)$this->firstEdit->getMonth(), 1, (int)$this->firstEdit->getYear());
1035
1036 4
        $this->yearMonthCounts[$editYear] = [
1037
            'all' => 0,
1038
            'minor' => 0,
1039
            'anon' => 0,
1040
            'automated' => 0,
1041
            'size' => 0, // Keep track of the size by the end of the year.
1042
            'events' => [],
1043
            'months' => [],
1044
        ];
1045
1046 4
        for ($i = 1; $i <= 12; $i++) {
1047 4
            $timeObj = mktime(0, 0, 0, $i, 1, (int)$editYear);
1048
1049
            // Don't show zeros for months before the first edit or after the current month.
1050 4
            if ($timeObj < $firstEditTime || $timeObj > $this->getLastDay()) {
1051 4
                continue;
1052
            }
1053
1054 4
            $this->monthLabels[] = $this->i18n->dateFormat($timeObj, 'yyyy-MM');
1055 4
            $this->yearMonthCounts[$editYear]['months'][sprintf('%02d', $i)] = [
1056
                'all' => 0,
1057
                'minor' => 0,
1058
                'anon' => 0,
1059
                'automated' => 0,
1060
            ];
1061
        }
1062 4
    }
1063
1064
    /**
1065
     * Update the counts of anon and minor edits for year, month, and user of the given edit.
1066
     * @param Edit $edit
1067
     */
1068 4
    private function updateAnonMinorCounts(Edit $edit): void
1069
    {
1070 4
        $editYear = $edit->getYear();
1071 4
        $editMonth = $edit->getMonth();
1072
1073
        // If anonymous, increase counts
1074 4
        if ($edit->isAnon()) {
1075 4
            $this->anonCount++;
1076 4
            $this->yearMonthCounts[$editYear]['anon']++;
1077 4
            $this->yearMonthCounts[$editYear]['months'][$editMonth]['anon']++;
1078
        }
1079
1080
        // If minor edit, increase counts
1081 4
        if ($edit->isMinor()) {
1082 4
            $this->minorCount++;
1083 4
            $this->yearMonthCounts[$editYear]['minor']++;
1084 4
            $this->yearMonthCounts[$editYear]['months'][$editMonth]['minor']++;
1085
        }
1086 4
    }
1087
1088
    /**
1089
     * Update various counts for the user of the given edit.
1090
     * @param Edit $edit
1091
     */
1092 4
    private function updateUserCounts(Edit $edit): void
1093
    {
1094 4
        if (!$edit->getUser()) {
1095
            return;
1096
        }
1097
1098 4
        $username = $edit->getUser()->getUsername();
1099
1100
        // Initialize various user stats if needed.
1101 4
        if (!isset($this->editors[$username])) {
1102 4
            $this->editors[$username] = [
1103 4
                'all' => 0,
1104 4
                'minor' => 0,
1105 4
                'minorPercentage' => 0,
1106 4
                'first' => $edit->getTimestamp(),
1107 4
                'firstId' => $edit->getId(),
1108
                'last' => null,
1109
                'atbe' => null,
1110 4
                'added' => 0,
1111
            ];
1112
        }
1113
1114
        // Increment user counts
1115 4
        $this->editors[$username]['all']++;
1116 4
        $this->editors[$username]['last'] = $edit->getTimestamp();
1117 4
        $this->editors[$username]['lastId'] = $edit->getId();
1118
1119
        // Increment minor counts for this user
1120 4
        if ($edit->isMinor()) {
1121 4
            $this->editors[$username]['minor']++;
1122
        }
1123 4
    }
1124
1125
    /**
1126
     * Increment "edits per <time>" counts based on the given edit.
1127
     * @param Edit $edit
1128
     */
1129 4
    private function updateCountHistory(Edit $edit): void
1130
    {
1131 4
        $editTimestamp = $edit->getTimestamp();
1132
1133 4
        if ($editTimestamp > new DateTime('-1 day')) {
1134
            $this->countHistory['day']++;
1135
        }
1136 4
        if ($editTimestamp > new DateTime('-1 week')) {
1137
            $this->countHistory['week']++;
1138
        }
1139 4
        if ($editTimestamp > new DateTime('-1 month')) {
1140
            $this->countHistory['month']++;
1141
        }
1142 4
        if ($editTimestamp > new DateTime('-1 year')) {
1143
            $this->countHistory['year']++;
1144
        }
1145 4
    }
1146
1147
    /**
1148
     * Get info about bots that edited the page.
1149
     * @return mixed[] Contains the bot's username, edit count to the page, and whether or not they are currently a bot.
1150
     */
1151 1
    public function getBots(): array
1152
    {
1153 1
        return $this->bots;
1154
    }
1155
1156
    /**
1157
     * Set info about bots that edited the page. This is done as a private setter because we need this information
1158
     * when computing the top 10 editors, where we don't want to include bots.
1159
     */
1160
    private function setBots(): void
1161
    {
1162
        // Parse the bot edits.
1163
        $bots = [];
1164
1165
        /** @var Statement $botData */
1166
        $botData = $this->getRepository()->getBotData($this->page, $this->start, $this->end);
0 ignored issues
show
Bug introduced by
The method getBotData() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1166
        $botData = $this->getRepository()->/** @scrutinizer ignore-call */ getBotData($this->page, $this->start, $this->end);
Loading history...
1167
        while ($bot = $botData->fetch()) {
1168
            $bots[$bot['username']] = [
1169
                'count' => (int)$bot['count'],
1170
                'current' => '1' === $bot['current'],
1171
            ];
1172
        }
1173
1174
        // Sort by edit count.
1175
        uasort($bots, function ($a, $b) {
1176
            return $b['count'] - $a['count'];
1177
        });
1178
1179
        $this->bots = $bots;
1180
    }
1181
1182
    /**
1183
     * Number of edits made to the page by current or former bots.
1184
     * @param string[] $bots Used only in unit tests, where we supply mock data for the bots that will get processed.
1185
     * @return int
1186
     */
1187 2
    public function getBotRevisionCount(?array $bots = null): int
1188
    {
1189 2
        if (isset($this->botRevisionCount)) {
1190
            return $this->botRevisionCount;
1191
        }
1192
1193 2
        if (null === $bots) {
1194 1
            $bots = $this->getBots();
1195
        }
1196
1197 2
        $count = 0;
1198
1199 2
        foreach (array_values($bots) as $data) {
1200 2
            $count += $data['count'];
1201
        }
1202
1203 2
        $this->botRevisionCount = $count;
1204 2
        return $count;
1205
    }
1206
1207
    /**
1208
     * Query for log events during each year of the article's history, and set the results in $this->yearMonthCounts.
1209
     */
1210 1
    private function setLogsEvents(): void
1211
    {
1212 1
        $logData = $this->getRepository()->getLogEvents(
0 ignored issues
show
Bug introduced by
The method getLogEvents() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1212
        $logData = $this->getRepository()->/** @scrutinizer ignore-call */ getLogEvents(
Loading history...
1213 1
            $this->page,
1214 1
            $this->start,
1215 1
            $this->end
1216
        );
1217
1218 1
        foreach ($logData as $event) {
1219 1
            $time = strtotime($event['timestamp']);
1220 1
            $year = date('Y', $time);
1221
1222 1
            if (!isset($this->yearMonthCounts[$year])) {
1223
                break;
1224
            }
1225
1226 1
            $yearEvents = $this->yearMonthCounts[$year]['events'];
1227
1228
            // Convert log type value to i18n key.
1229 1
            switch ($event['log_type']) {
1230 1
                case 'protect':
1231 1
                    $action = 'protections';
1232 1
                    break;
1233 1
                case 'delete':
1234 1
                    $action = 'deletions';
1235 1
                    break;
1236
                case 'move':
1237
                    $action = 'moves';
1238
                    break;
1239
                // count pending-changes protections along with normal protections.
1240
                case 'stable':
1241
                    $action = 'protections';
1242
                    break;
1243
            }
1244
1245 1
            if (empty($yearEvents[$action])) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $action does not seem to be defined for all execution paths leading up to this point.
Loading history...
1246 1
                $yearEvents[$action] = 1;
1247
            } else {
1248
                $yearEvents[$action]++;
1249
            }
1250
1251 1
            $this->yearMonthCounts[$year]['events'] = $yearEvents;
1252
        }
1253 1
    }
1254
1255
    /**
1256
     * Set statistics about the top 10 editors by added text and number of edits.
1257
     * This is ran *after* parseHistory() since we need the grand totals first.
1258
     * Various stats are also set for each editor in $this->editors to be used in the charts.
1259
     */
1260 4
    private function doPostPrecessing(): void
1261
    {
1262 4
        $topTenCount = $counter = 0;
1263 4
        $topTenEditorsByEdits = [];
1264
1265 4
        foreach ($this->editors as $editor => $info) {
1266
            // Count how many users are in the top 10% by number of edits, excluding bots.
1267 4
            if ($counter < 10 && !in_array($editor, array_keys($this->bots))) {
1268 4
                $topTenCount += $info['all'];
1269 4
                $counter++;
1270
1271
                // To be used in the Top Ten charts.
1272 4
                $topTenEditorsByEdits[] = [
1273 4
                    'label' => $editor,
1274 4
                    'value' => $info['all'],
1275
                ];
1276
            }
1277
1278
            // Compute the percentage of minor edits the user made.
1279 4
            $this->editors[$editor]['minorPercentage'] = $info['all']
1280 4
                ? ($info['minor'] / $info['all']) * 100
1281
                : 0;
1282
1283 4
            if ($info['all'] > 1) {
1284
                // Number of seconds/days between first and last edit.
1285 4
                $secs = $info['last']->getTimestamp() - $info['first']->getTimestamp();
1286 4
                $days = $secs / (60 * 60 * 24);
1287
1288
                // Average time between edits (in days).
1289 4
                $this->editors[$editor]['atbe'] = $days / $info['all'];
1290
            }
1291
        }
1292
1293
        // Loop through again and add percentages.
1294 4
        $this->topTenEditorsByEdits = array_map(function ($editor) use ($topTenCount) {
1295 4
            $editor['percentage'] = 100 * ($editor['value'] / $topTenCount);
1296 4
            return $editor;
1297 4
        }, $topTenEditorsByEdits);
1298
1299 4
        $this->topTenEditorsByAdded = $this->getTopTenByAdded();
1300
1301 4
        $this->topTenCount = $topTenCount;
1302 4
    }
1303
1304
    /**
1305
     * Get the top ten editors by added text.
1306
     * @return array With keys 'label', 'value' and 'percentage', ready to be used by the pieChart Twig helper.
1307
     */
1308 4
    private function getTopTenByAdded(): array
1309
    {
1310
        // First sort editors array by the amount of text they added.
1311 4
        $topTenEditorsByAdded = $this->editors;
1312 4
        uasort($topTenEditorsByAdded, function ($a, $b) {
1313 4
            if ($a['added'] === $b['added']) {
1314 4
                return 0;
1315
            }
1316 4
            return $a['added'] > $b['added'] ? -1 : 1;
1317 4
        });
1318
1319
        // Slice to the top 10.
1320 4
        $topTenEditorsByAdded = array_keys(array_slice($topTenEditorsByAdded, 0, 10, true));
1321
1322
        // // Get the sum of added text so that we can add in percentages.
1323
        // $topTenTotalAdded = array_sum(array_map(function ($editor) {
1324
        //     return $this->editors[$editor]['added'];
1325
        // }, $topTenEditorsByAdded));
1326
1327
        // Then build a new array of top 10 editors by added text in the data structure needed for the chart.
1328 4
        return array_map(function ($editor) {
1329 4
            $added = $this->editors[$editor]['added'];
1330
            return [
1331 4
                'label' => $editor,
1332 4
                'value' => $added,
1333 4
                'percentage' => 0 === $this->addedBytes
1334
                    ? 0
1335 4
                    : 100 * ($added / $this->addedBytes),
1336
            ];
1337 4
        }, $topTenEditorsByAdded);
1338
    }
1339
1340
    /**
1341
     * Get prose and reference information.
1342
     * @return array With keys 'characters', 'words', 'references', 'unique_references'
1343
     */
1344 1
    public function getProseStats(): array
1345
    {
1346 1
        if (isset($this->proseStats)) {
1347
            return $this->proseStats;
1348
        }
1349
1350 1
        $datetime = false !== $this->end ? new DateTime('@'.$this->end) : null;
0 ignored issues
show
Bug introduced by
Are you sure $this->end of type integer|string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1350
        $datetime = false !== $this->end ? new DateTime('@'./** @scrutinizer ignore-type */ $this->end) : null;
Loading history...
1351 1
        $html = $this->page->getHTMLContent($datetime);
1352
1353 1
        $crawler = new Crawler($html);
1354
1355 1
        [$chars, $words] = $this->countCharsAndWords($crawler, '#mw-content-text p');
1356
1357 1
        $refs = $crawler->filter('#mw-content-text .reference');
1358 1
        $refContent = [];
1359 1
        $refs->each(function ($ref) use (&$refContent): void {
1360 1
            $refContent[] = $ref->text();
1361 1
        });
1362 1
        $uniqueRefs = count(array_unique($refContent));
1363
1364 1
        $sections = count($crawler->filter('#mw-content-text .mw-headline'));
1365
1366 1
        $this->proseStats = [
1367 1
            'characters' => $chars,
1368 1
            'words' => $words,
1369 1
            'references' => $refs->count(),
1370 1
            'unique_references' => $uniqueRefs,
1371 1
            'sections' => $sections,
1372
        ];
1373 1
        return $this->proseStats;
1374
    }
1375
1376
    /**
1377
     * Count the number of characters and words of the plain text within the DOM element matched by the given selector.
1378
     * @param Crawler $crawler
1379
     * @param string $selector HTML selector.
1380
     * @return array [num chars, num words]
1381
     */
1382 1
    private function countCharsAndWords(Crawler $crawler, string $selector): array
1383
    {
1384 1
        $totalChars = 0;
1385 1
        $totalWords = 0;
1386 1
        $paragraphs = $crawler->filter($selector);
1387 1
        $paragraphs->each(function ($node) use (&$totalChars, &$totalWords): void {
1388 1
            $text = preg_replace('/\[\d+\]/', '', trim($node->text()));
1389 1
            $totalChars += strlen($text);
1390 1
            $totalWords += count(explode(' ', $text));
1391 1
        });
1392
1393 1
        return [$totalChars, $totalWords];
1394
    }
1395
1396
    /**
1397
     * Fetch transclusion data (categories, templates and files) that are on the page.
1398
     * @return array With keys 'categories', 'templates' and 'files'.
1399
     */
1400 1
    private function getTransclusionData(): array
1401
    {
1402 1
        if (!is_array($this->transclusionData)) {
0 ignored issues
show
introduced by
The condition is_array($this->transclusionData) is always true.
Loading history...
1403 1
            $this->transclusionData = $this->getRepository()
1404 1
                ->getTransclusionData($this->page);
1405
        }
1406 1
        return $this->transclusionData;
1407
    }
1408
1409
    /**
1410
     * Get the number of categories that are on the page.
1411
     * @return int
1412
     */
1413 1
    public function getNumCategories(): int
1414
    {
1415 1
        return $this->getTransclusionData()['categories'];
1416
    }
1417
1418
    /**
1419
     * Get the number of templates that are on the page.
1420
     * @return int
1421
     */
1422 1
    public function getNumTemplates(): int
1423
    {
1424 1
        return $this->getTransclusionData()['templates'];
1425
    }
1426
1427
    /**
1428
     * Get the number of files that are on the page.
1429
     * @return int
1430
     */
1431 1
    public function getNumFiles(): int
1432
    {
1433 1
        return $this->getTransclusionData()['files'];
1434
    }
1435
}
1436