Passed
Push — master ( 16bc58...e2c4be )
by MusikAnimal
05:53
created

ArticleInfo::countTokens()   A

Complexity

Conditions 4
Paths 5

Size

Total Lines 29
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 15
CRAP Score 4

Importance

Changes 0
Metric Value
cc 4
eloc 15
nc 5
nop 1
dl 0
loc 29
ccs 15
cts 15
cp 1
crap 4
rs 9.7666
c 0
b 0
f 0
1
<?php
2
/**
3
 * This file contains only the ArticleInfo class.
4
 */
5
6
declare(strict_types = 1);
7
8
namespace AppBundle\Model;
9
10
use AppBundle\Helper\I18nHelper;
11
use DateTime;
12
use Doctrine\DBAL\Statement;
13
use Symfony\Component\DependencyInjection\ContainerInterface;
14
use Symfony\Component\DomCrawler\Crawler;
15
16
/**
17
 * An ArticleInfo provides statistics about a page on a project.
18
 */
19
class ArticleInfo extends Model
20
{
21
    /** @var ContainerInterface The application's DI container. */
22
    protected $container;
23
24
    /** @var I18nHelper For i18n and l10n. */
25
    protected $i18n;
26
27
    /** @var int Number of revisions that belong to the page. */
28
    protected $numRevisions;
29
30
    /** @var int Maximum number of revisions to process, as configured. */
31
    protected $maxRevisions;
32
33
    /** @var int Number of revisions that were actually processed. */
34
    protected $numRevisionsProcessed;
35
36
    /**
37
     * Various statistics about editors to the page. These are not User objects
38
     * so as to preserve memory.
39
     * @var mixed[]
40
     */
41
    protected $editors = [];
42
43
    /** @var mixed[] The top 10 editors to the page by number of edits. */
44
    protected $topTenEditorsByEdits;
45
46
    /** @var mixed[] The top 10 editors to the page by added text. */
47
    protected $topTenEditorsByAdded;
48
49
    /** @var int Number of edits made by the top 10 editors. */
50
    protected $topTenCount;
51
52
    /** @var mixed[] Various statistics about bots that edited the page. */
53
    protected $bots;
54
55
    /** @var int Number of edits made to the page by bots. */
56
    protected $botRevisionCount;
57
58
    /** @var mixed[] Various counts about each individual year and month of the page's history. */
59
    protected $yearMonthCounts;
60
61
    /** @var string[] Localized labels for the years, to be used in the 'Year counts' chart. */
62
    protected $yearLabels = [];
63
64
    /** @var string[] Localized labels for the months, to be used in the 'Month counts' chart. */
65
    protected $monthLabels = [];
66
67
    /** @var Edit The first edit to the page. */
68
    protected $firstEdit;
69
70
    /** @var Edit The last edit to the page. */
71
    protected $lastEdit;
72
73
    /** @var Edit Edit that made the largest addition by number of bytes. */
74
    protected $maxAddition;
75
76
    /** @var Edit Edit that made the largest deletion by number of bytes. */
77
    protected $maxDeletion;
78
79
    /** @var int[] Number of in and outgoing links and redirects to the page. */
80
    protected $linksAndRedirects;
81
82
    /** @var string[] Assessments of the page (see Page::getAssessments). */
83
    protected $assessments;
84
85
    /**
86
     * Maximum number of edits that were created across all months. This is used as a comparison
87
     * for the bar charts in the months section.
88
     * @var int
89
     */
90
    protected $maxEditsPerMonth;
91
92
    /** @var string[] List of (semi-)automated tools that were used to edit the page. */
93
    protected $tools;
94
95
    /**
96
     * Total number of bytes added throughout the page's history. This is used as a comparison
97
     * when computing the top 10 editors by added text.
98
     * @var int
99
     */
100
    protected $addedBytes = 0;
101
102
    /** @var int Number of days between first and last edit. */
103
    protected $totalDays;
104
105
    /** @var int Number of minor edits to the page. */
106
    protected $minorCount = 0;
107
108
    /** @var int Number of anonymous edits to the page. */
109
    protected $anonCount = 0;
110
111
    /** @var int Number of automated edits to the page. */
112
    protected $automatedCount = 0;
113
114
    /** @var int Number of edits to the page that were reverted with the subsequent edit. */
115
    protected $revertCount = 0;
116
117
    /** @var int[] The "edits per <time>" counts. */
118
    protected $countHistory = [
119
        'day' => 0,
120
        'week' => 0,
121
        'month' => 0,
122
        'year' => 0,
123
    ];
124
125
    /** @var string[] List of wikidata and Checkwiki errors. */
126
    protected $bugs;
127
128
    /** @var array Number of categories, templates and files on the page. */
129
    protected $transclusionData;
130
131
    /**
132
     * ArticleInfo constructor.
133
     * @param Page $page The page to process.
134
     * @param ContainerInterface $container The DI container.
135
     * @param false|int $start From what date to obtain records.
136
     * @param false|int $end To what date to obtain records.
137
     */
138 12
    public function __construct(Page $page, ContainerInterface $container, $start = false, $end = false)
139
    {
140 12
        $this->page = $page;
141 12
        $this->container = $container;
142 12
        $this->start = $start;
143 12
        $this->end = $end;
144 12
    }
145
146
    /**
147
     * Make the I18nHelper accessible to ArticleInfo.
148
     * @param I18nHelper $i18n
149
     * @codeCoverageIgnore
150
     */
151
    public function setI18nHelper(I18nHelper $i18n): void
152
    {
153
        $this->i18n = $i18n;
154
    }
155
156
    /**
157
     * Get date opening date range, formatted as this is used in the views.
158
     * @return string Blank if no value exists.
159
     */
160 1
    public function getStartDate(): string
161
    {
162 1
        return '' == $this->start ? '' : date('Y-m-d', $this->start);
0 ignored issues
show
Bug introduced by
It seems like $this->start can also be of type boolean and string; however, parameter $timestamp of date() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

162
        return '' == $this->start ? '' : date('Y-m-d', /** @scrutinizer ignore-type */ $this->start);
Loading history...
163
    }
164
165
    /**
166
     * Get date closing date range, formatted as this is used in the views.
167
     * @return string Blank if no value exists.
168
     */
169 1
    public function getEndDate(): string
170
    {
171 1
        return '' == $this->end ? '' : date('Y-m-d', $this->end);
0 ignored issues
show
Bug introduced by
It seems like $this->end can also be of type boolean and string; however, parameter $timestamp of date() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

171
        return '' == $this->end ? '' : date('Y-m-d', /** @scrutinizer ignore-type */ $this->end);
Loading history...
172
    }
173
174
    /**
175
     * Get the day of last date we should show in the month/year sections,
176
     * based on $this->end or the current date.
177
     * @return int As Unix timestamp.
178
     */
179 4
    private function getLastDay(): int
180
    {
181 4
        if (false !== $this->end) {
182
            return (new DateTime('@'.$this->end))
0 ignored issues
show
Bug introduced by
Are you sure $this->end of type integer|string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

182
            return (new DateTime('@'./** @scrutinizer ignore-type */ $this->end))
Loading history...
183
                ->modify('last day of this month')
184
                ->getTimestamp();
185
        } else {
186 4
            return strtotime('last day of this month');
187
        }
188
    }
189
190
    /**
191
     * Return the start/end date values as associative array, with YYYY-MM-DD as the date format.
192
     * This is used mainly as a helper to pass to the pageviews Twig macros.
193
     * @return array
194
     */
195 1
    public function getDateParams(): array
196
    {
197 1
        if (!$this->hasDateRange()) {
198
            return [];
199
        }
200
201
        $ret = [
202 1
            'start' => $this->firstEdit->getTimestamp()->format('Y-m-d'),
203 1
            'end' => $this->lastEdit->getTimestamp()->format('Y-m-d'),
204
        ];
205
206 1
        if (false !== $this->start) {
207 1
            $ret['start'] = date('Y-m-d', $this->start);
0 ignored issues
show
Bug introduced by
It seems like $this->start can also be of type string and true; however, parameter $timestamp of date() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

207
            $ret['start'] = date('Y-m-d', /** @scrutinizer ignore-type */ $this->start);
Loading history...
208
        }
209 1
        if (false !== $this->end) {
210 1
            $ret['end'] = date('Y-m-d', $this->end);
211
        }
212
213 1
        return $ret;
214
    }
215
216
    /**
217
     * Get the number of revisions belonging to the page.
218
     * @return int
219
     */
220 4
    public function getNumRevisions(): int
221
    {
222 4
        if (!isset($this->numRevisions)) {
223 4
            $this->numRevisions = $this->page->getNumRevisions(null, $this->start, $this->end);
0 ignored issues
show
Bug introduced by
It seems like $this->start can also be of type string; however, parameter $start of AppBundle\Model\Page::getNumRevisions() does only seem to accept false|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

223
            $this->numRevisions = $this->page->getNumRevisions(null, /** @scrutinizer ignore-type */ $this->start, $this->end);
Loading history...
Bug introduced by
It seems like $this->end can also be of type string; however, parameter $end of AppBundle\Model\Page::getNumRevisions() does only seem to accept false|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

223
            $this->numRevisions = $this->page->getNumRevisions(null, $this->start, /** @scrutinizer ignore-type */ $this->end);
Loading history...
224
        }
225 4
        return $this->numRevisions;
226
    }
227
228
    /**
229
     * Get the maximum number of revisions that we should process.
230
     * @return int
231
     */
232 3
    public function getMaxRevisions(): int
233
    {
234 3
        if (!isset($this->maxRevisions)) {
235 3
            $this->maxRevisions = (int) $this->container->getParameter('app.max_page_revisions');
236
        }
237 3
        return $this->maxRevisions;
238
    }
239
240
    /**
241
     * Get the number of revisions that are actually getting processed. This goes by the app.max_page_revisions
242
     * parameter, or the actual number of revisions, whichever is smaller.
243
     * @return int
244
     */
245 3
    public function getNumRevisionsProcessed(): int
246
    {
247 3
        if (isset($this->numRevisionsProcessed)) {
248 1
            return $this->numRevisionsProcessed;
249
        }
250
251 2
        if ($this->tooManyRevisions()) {
252 1
            $this->numRevisionsProcessed = $this->getMaxRevisions();
253
        } else {
254 1
            $this->numRevisionsProcessed = $this->getNumRevisions();
255
        }
256
257 2
        return $this->numRevisionsProcessed;
258
    }
259
260
    /**
261
     * Are there more revisions than we should process, based on the config?
262
     * @return bool
263
     */
264 3
    public function tooManyRevisions(): bool
265
    {
266 3
        return $this->getMaxRevisions() > 0 && $this->getNumRevisions() > $this->getMaxRevisions();
267
    }
268
269
    /**
270
     * Fetch and store all the data we need to show the ArticleInfo view.
271
     * @codeCoverageIgnore
272
     */
273
    public function prepareData(): void
274
    {
275
        $this->parseHistory();
276
        $this->setLogsEvents();
277
278
        // Bots need to be set before setting top 10 counts.
279
        $this->setBots();
280
281
        $this->doPostPrecessing();
282
    }
283
284
    /**
285
     * Get the number of editors that edited the page.
286
     * @return int
287
     */
288 1
    public function getNumEditors(): int
289
    {
290 1
        return count($this->editors);
291
    }
292
293
    /**
294
     * Get the number of bots that edited the page.
295
     * @return int
296
     */
297
    public function getNumBots(): int
298
    {
299
        return count($this->getBots());
300
    }
301
302
    /**
303
     * Get the number of days between the first and last edit.
304
     * @return int
305
     */
306 1
    public function getTotalDays(): int
307
    {
308 1
        if (isset($this->totalDays)) {
309 1
            return $this->totalDays;
310
        }
311 1
        $dateFirst = $this->firstEdit->getTimestamp();
312 1
        $dateLast = $this->lastEdit->getTimestamp();
313 1
        $interval = date_diff($dateLast, $dateFirst, true);
314 1
        $this->totalDays = (int)$interval->format('%a');
315 1
        return $this->totalDays;
316
    }
317
318
    /**
319
     * Returns length of the page.
320
     * @return int
321
     */
322 1
    public function getLength(): int
323
    {
324 1
        if ($this->hasDateRange()) {
325 1
            return $this->lastEdit->getLength();
326
        }
327
328
        return $this->page->getLength();
329
    }
330
331
    /**
332
     * Get the average number of days between edits to the page.
333
     * @return float
334
     */
335 1
    public function averageDaysPerEdit(): float
336
    {
337 1
        return round($this->getTotalDays() / $this->getNumRevisionsProcessed(), 1);
338
    }
339
340
    /**
341
     * Get the average number of edits per day to the page.
342
     * @return float
343
     */
344 1
    public function editsPerDay(): float
345
    {
346 1
        $editsPerDay = $this->getTotalDays()
347 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / (365 / 12 / 24))
348 1
            : 0;
349 1
        return round($editsPerDay, 1);
350
    }
351
352
    /**
353
     * Get the average number of edits per month to the page.
354
     * @return float
355
     */
356 1
    public function editsPerMonth(): float
357
    {
358 1
        $editsPerMonth = $this->getTotalDays()
359 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / (365 / 12))
360 1
            : 0;
361 1
        return min($this->getNumRevisionsProcessed(), round($editsPerMonth, 1));
362
    }
363
364
    /**
365
     * Get the average number of edits per year to the page.
366
     * @return float
367
     */
368 1
    public function editsPerYear(): float
369
    {
370 1
        $editsPerYear = $this->getTotalDays()
371 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / 365)
372 1
            : 0;
373 1
        return min($this->getNumRevisionsProcessed(), round($editsPerYear, 1));
374
    }
375
376
    /**
377
     * Get the average number of edits per editor.
378
     * @return float
379
     */
380 1
    public function editsPerEditor(): float
381
    {
382 1
        return round($this->getNumRevisionsProcessed() / count($this->editors), 1);
383
    }
384
385
    /**
386
     * Get the percentage of minor edits to the page.
387
     * @return float
388
     */
389 1
    public function minorPercentage(): float
390
    {
391 1
        return round(
392 1
            ($this->minorCount / $this->getNumRevisionsProcessed()) * 100,
393 1
            1
394
        );
395
    }
396
397
    /**
398
     * Get the percentage of anonymous edits to the page.
399
     * @return float
400
     */
401 1
    public function anonPercentage(): float
402
    {
403 1
        return round(
404 1
            ($this->anonCount / $this->getNumRevisionsProcessed()) * 100,
405 1
            1
406
        );
407
    }
408
409
    /**
410
     * Get the percentage of edits made by the top 10 editors.
411
     * @return float
412
     */
413 1
    public function topTenPercentage(): float
414
    {
415 1
        return round(($this->topTenCount / $this->getNumRevisionsProcessed()) * 100, 1);
416
    }
417
418
    /**
419
     * Get the number of times the page has been viewed in the given timeframe. If the ArticleInfo instance has a
420
     * date range, it is used instead of the value of the $latest parameter.
421
     * @param  int $latest Last N days.
422
     * @return int
423
     */
424
    public function getPageviews(int $latest): int
425
    {
426
        if (!$this->hasDateRange()) {
427
            return $this->page->getLastPageviews($latest);
428
        }
429
430
        $daterange = $this->getDateParams();
431
        return $this->page->getPageviews($daterange['start'], $daterange['end']);
432
    }
433
434
    /**
435
     * Get the page assessments of the page.
436
     * @see https://www.mediawiki.org/wiki/Extension:PageAssessments
437
     * @return string[]|false False if unsupported.
438
     * @codeCoverageIgnore
439
     */
440
    public function getAssessments()
441
    {
442
        if (!is_array($this->assessments)) {
0 ignored issues
show
introduced by
The condition is_array($this->assessments) is always true.
Loading history...
443
            $this->assessments = $this->page
444
                ->getProject()
445
                ->getPageAssessments()
446
                ->getAssessments($this->page);
447
        }
448
        return $this->assessments;
449
    }
450
451
    /**
452
     * Get the number of automated edits made to the page.
453
     * @return int
454
     */
455 1
    public function getAutomatedCount(): int
456
    {
457 1
        return $this->automatedCount;
458
    }
459
460
    /**
461
     * Get the number of edits to the page that were reverted with the subsequent edit.
462
     * @return int
463
     */
464 1
    public function getRevertCount(): int
465
    {
466 1
        return $this->revertCount;
467
    }
468
469
    /**
470
     * Get the number of edits to the page made by logged out users.
471
     * @return int
472
     */
473 1
    public function getAnonCount(): int
474
    {
475 1
        return $this->anonCount;
476
    }
477
478
    /**
479
     * Get the number of minor edits to the page.
480
     * @return int
481
     */
482 1
    public function getMinorCount(): int
483
    {
484 1
        return $this->minorCount;
485
    }
486
487
    /**
488
     * Get the number of edits to the page made in the past day, week, month and year.
489
     * @return int[] With keys 'day', 'week', 'month' and 'year'.
490
     */
491
    public function getCountHistory(): array
492
    {
493
        return $this->countHistory;
494
    }
495
496
    /**
497
     * Get the number of edits to the page made by the top 10 editors.
498
     * @return int
499
     */
500 1
    public function getTopTenCount(): int
501
    {
502 1
        return $this->topTenCount;
503
    }
504
505
    /**
506
     * Get the top editors to the page by edit count.
507
     * @param int $limit Default 20, maximum 1,000.
508
     * @param bool $noBots Set to non-false to exclude bots from the result.
509
     * @return array
510
     */
511
    public function getTopEditorsByEditCount(int $limit = 20, bool $noBots = false): array
512
    {
513
        // Quick cache, valid only for the same request.
514
        static $topEditors = null;
515
        if (null !== $topEditors) {
516
            return $topEditors;
517
        }
518
519
        $rows = $this->getRepository()->getTopEditorsByEditCount(
0 ignored issues
show
Bug introduced by
The method getTopEditorsByEditCount() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

519
        $rows = $this->getRepository()->/** @scrutinizer ignore-call */ getTopEditorsByEditCount(
Loading history...
520
            $this->page,
521
            $this->start,
522
            $this->end,
523
            min($limit, 1000),
524
            $noBots
525
        );
526
527
        $topEditors = [];
528
        $rank = 0;
529
        foreach ($rows as $row) {
530
            $topEditors[] = [
531
                'rank' => ++$rank,
532
                'username' => $row['username'],
533
                'count' => $row['count'],
534
                'minor' => $row['minor'],
535
                'first_edit' => [
536
                    'id' => $row['first_revid'],
537
                    'timestamp' => $row['first_timestamp'],
538
                ],
539
                'latest_edit' => [
540
                    'id' => $row['latest_revid'],
541
                    'timestamp' => $row['latest_timestamp'],
542
                ],
543
            ];
544
        }
545
546
        return $topEditors;
547
    }
548
549
    /**
550
     * Get the first edit to the page.
551
     * @return Edit
552
     */
553 1
    public function getFirstEdit(): Edit
554
    {
555 1
        return $this->firstEdit;
556
    }
557
558
    /**
559
     * Get the last edit to the page.
560
     * @return Edit
561
     */
562 1
    public function getLastEdit(): Edit
563
    {
564 1
        return $this->lastEdit;
565
    }
566
567
    /**
568
     * Get the edit that made the largest addition to the page (by number of bytes).
569
     * @return Edit|null
570
     */
571 1
    public function getMaxAddition(): ?Edit
572
    {
573 1
        return $this->maxAddition;
574
    }
575
576
    /**
577
     * Get the edit that made the largest removal to the page (by number of bytes).
578
     * @return Edit|null
579
     */
580 1
    public function getMaxDeletion(): ?Edit
581
    {
582 1
        return $this->maxDeletion;
583
    }
584
585
    /**
586
     * Get the list of editors to the page, including various statistics.
587
     * @return mixed[]
588
     */
589 1
    public function getEditors(): array
590
    {
591 1
        return $this->editors;
592
    }
593
594
    /**
595
     * Get the list of the top editors to the page (by edits), including various statistics.
596
     * @return mixed[]
597
     */
598 1
    public function topTenEditorsByEdits(): array
599
    {
600 1
        return $this->topTenEditorsByEdits;
601
    }
602
603
    /**
604
     * Get the list of the top editors to the page (by added text), including various statistics.
605
     * @return mixed[]
606
     */
607 1
    public function topTenEditorsByAdded(): array
608
    {
609 1
        return $this->topTenEditorsByAdded;
610
    }
611
612
    /**
613
     * Get various counts about each individual year and month of the page's history.
614
     * @return mixed[]
615
     */
616 2
    public function getYearMonthCounts(): array
617
    {
618 2
        return $this->yearMonthCounts;
619
    }
620
621
    /**
622
     * Get the localized labels for the 'Year counts' chart.
623
     * @return string[]
624
     */
625
    public function getYearLabels(): array
626
    {
627
        return $this->yearLabels;
628
    }
629
630
    /**
631
     * Get the localized labels for the 'Month counts' chart.
632
     * @return string[]
633
     */
634
    public function getMonthLabels(): array
635
    {
636
        return $this->monthLabels;
637
    }
638
639
    /**
640
     * Get the maximum number of edits that were created across all months. This is used as a
641
     * comparison for the bar charts in the months section.
642
     * @return int
643
     */
644 1
    public function getMaxEditsPerMonth(): int
645
    {
646 1
        return $this->maxEditsPerMonth;
647
    }
648
649
    /**
650
     * Get a list of (semi-)automated tools that were used to edit the page, including
651
     * the number of times they were used, and a link to the tool's homepage.
652
     * @return string[]
653
     */
654 1
    public function getTools(): array
655
    {
656 1
        return $this->tools;
657
    }
658
659
    /**
660
     * Get the list of page's wikidata and Checkwiki errors.
661
     * @see Page::getErrors()
662
     * @return string[]
663
     */
664
    public function getBugs(): array
665
    {
666
        if (!is_array($this->bugs)) {
0 ignored issues
show
introduced by
The condition is_array($this->bugs) is always true.
Loading history...
667
            $this->bugs = $this->page->getErrors();
668
        }
669
        return $this->bugs;
670
    }
671
672
    /**
673
     * Get the number of wikidata nad CheckWiki errors.
674
     * @return int
675
     */
676
    public function numBugs(): int
677
    {
678
        return count($this->getBugs());
679
    }
680
681
    /**
682
     * Get the number of external links on the page.
683
     * @return int
684
     */
685 1
    public function linksExtCount(): int
686
    {
687 1
        return $this->getLinksAndRedirects()['links_ext_count'];
688
    }
689
690
    /**
691
     * Get the number of incoming links to the page.
692
     * @return int
693
     */
694 1
    public function linksInCount(): int
695
    {
696 1
        return $this->getLinksAndRedirects()['links_in_count'];
697
    }
698
699
    /**
700
     * Get the number of outgoing links from the page.
701
     * @return int
702
     */
703 1
    public function linksOutCount(): int
704
    {
705 1
        return $this->getLinksAndRedirects()['links_out_count'];
706
    }
707
708
    /**
709
     * Get the number of redirects to the page.
710
     * @return int
711
     */
712 1
    public function redirectsCount(): int
713
    {
714 1
        return $this->getLinksAndRedirects()['redirects_count'];
715
    }
716
717
    /**
718
     * Get the number of external, incoming and outgoing links, along with the number of redirects to the page.
719
     * @return int[]
720
     * @codeCoverageIgnore
721
     */
722
    private function getLinksAndRedirects(): array
723
    {
724
        if (!is_array($this->linksAndRedirects)) {
0 ignored issues
show
introduced by
The condition is_array($this->linksAndRedirects) is always true.
Loading history...
725
            $this->linksAndRedirects = $this->page->countLinksAndRedirects();
726
        }
727
        return $this->linksAndRedirects;
728
    }
729
730
    /**
731
     * Parse the revision history, collecting our core statistics.
732
     *
733
     * Untestable because it relies on getting a PDO statement. All the important
734
     * logic lives in other methods which are tested.
735
     * @codeCoverageIgnore
736
     */
737
    private function parseHistory(): void
738
    {
739
        if ($this->tooManyRevisions()) {
740
            $limit = $this->getMaxRevisions();
741
        } else {
742
            $limit = null;
743
        }
744
745
        // Third parameter is ignored if $limit is null.
746
        $revStmt = $this->page->getRevisionsStmt(
747
            null,
748
            $limit,
749
            $this->getNumRevisions(),
750
            $this->start,
0 ignored issues
show
Bug introduced by
It seems like $this->start can also be of type string; however, parameter $start of AppBundle\Model\Page::getRevisionsStmt() does only seem to accept false|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

750
            /** @scrutinizer ignore-type */ $this->start,
Loading history...
751
            $this->end
0 ignored issues
show
Bug introduced by
It seems like $this->end can also be of type string; however, parameter $end of AppBundle\Model\Page::getRevisionsStmt() does only seem to accept false|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

751
            /** @scrutinizer ignore-type */ $this->end
Loading history...
752
        );
753
        $revCount = 0;
754
755
        /**
756
         * Data about previous edits so that we can use them as a basis for comparison.
757
         * @var Edit[]
758
         */
759
        $prevEdits = [
760
            // The previous Edit, used to discount content that was reverted.
761
            'prev' => null,
762
763
            // The SHA-1 of the edit *before* the previous edit. Used for more
764
            // accurate revert detection.
765
            'prevSha' => null,
766
767
            // The last edit deemed to be the max addition of content. This is kept track of
768
            // in case we find out the next edit was reverted (and was also a max edit),
769
            // in which case we'll want to discount it and use this one instead.
770
            'maxAddition' => null,
771
772
            // Same as with maxAddition, except the maximum amount of content deleted.
773
            // This is used to discount content that was reverted.
774
            'maxDeletion' => null,
775
        ];
776
777
        while ($rev = $revStmt->fetch()) {
778
            $edit = new Edit($this->page, $rev);
779
780
            if (0 === $revCount) {
781
                $this->firstEdit = $edit;
782
            }
783
784
            // Sometimes, with old revisions (2001 era), the revisions from 2002 come before 2001
785
            if ($edit->getTimestamp() < $this->firstEdit->getTimestamp()) {
786
                $this->firstEdit = $edit;
787
            }
788
789
            $prevEdits = $this->updateCounts($edit, $prevEdits);
790
791
            $revCount++;
792
        }
793
794
        $this->numRevisionsProcessed = $revCount;
795
796
        // Various sorts
797
        arsort($this->editors);
798
        ksort($this->yearMonthCounts);
799
        if ($this->tools) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->tools of type string[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
800
            arsort($this->tools);
801
        }
802
    }
803
804
    /**
805
     * Update various counts based on the current edit.
806
     * @param Edit $edit
807
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'
808
     * @return Edit[] Updated version of $prevEdits.
809
     */
810 4
    private function updateCounts(Edit $edit, array $prevEdits): array
811
    {
812
        // Update the counts for the year and month of the current edit.
813 4
        $this->updateYearMonthCounts($edit);
814
815
        // Update counts for the user who made the edit.
816 4
        $this->updateUserCounts($edit);
817
818
        // Update the year/month/user counts of anon and minor edits.
819 4
        $this->updateAnonMinorCounts($edit);
820
821
        // Update counts for automated tool usage, if applicable.
822 4
        $this->updateToolCounts($edit);
823
824
        // Increment "edits per <time>" counts
825 4
        $this->updateCountHistory($edit);
826
827
        // Update figures regarding content addition/removal, and the revert count.
828 4
        $prevEdits = $this->updateContentSizes($edit, $prevEdits);
829
830
        // Now that we've updated all the counts, we can reset
831
        // the prev and last edits, which are used for tracking.
832
        // But first, let's copy over the SHA of the actual previous edit
833
        // and put it in our $prevEdits['prev'], so that we'll know
834
        // that content added after $prevEdit['prev'] was reverted.
835 4
        if (null !== $prevEdits['prev']) {
836 4
            $prevEdits['prevSha'] = $prevEdits['prev']->getSha();
837
        }
838 4
        $prevEdits['prev'] = $edit;
839 4
        $this->lastEdit = $edit;
840
841 4
        return $prevEdits;
842
    }
843
844
    /**
845
     * Update various figures about content sizes based on the given edit.
846
     * @param Edit $edit
847
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
848
     * @return Edit[] Updated version of $prevEdits.
849
     */
850 4
    private function updateContentSizes(Edit &$edit, array $prevEdits): array
851
    {
852
        // Check if it was a revert
853 4
        if ($this->isRevert($edit, $prevEdits)) {
854 4
            $edit->setReverted(true);
855 4
            return $this->updateContentSizesRevert($prevEdits);
856
        } else {
857 4
            return $this->updateContentSizesNonRevert($edit, $prevEdits);
858
        }
859
    }
860
861
    /**
862
     * Is the given Edit a revert?
863
     * @param Edit $edit
864
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
865
     * @return bool
866
     */
867 4
    private function isRevert(Edit $edit, array $prevEdits): bool
868
    {
869 4
        return $edit->getSha() === $prevEdits['prevSha'] || $edit->isRevert($this->container);
870
    }
871
872
    /**
873
     * Updates the figures on content sizes assuming the given edit was a revert of the previous one.
874
     * In such a case, we don't want to treat the previous edit as legit content addition or removal.
875
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
876
     * @return Edit[] Updated version of $prevEdits, for tracking.
877
     */
878 4
    private function updateContentSizesRevert(array $prevEdits): array
879
    {
880 4
        $this->revertCount++;
881
882
        // Adjust addedBytes given this edit was a revert of the previous one.
883 4
        if ($prevEdits['prev'] && !$prevEdits['prev']->isReverted() && $prevEdits['prev']->getSize() > 0) {
884
            $this->addedBytes -= $prevEdits['prev']->getSize();
885
886
            // Also deduct from the user's individual added byte count.
887
            // We don't do this if the previous edit was reverted, since that would make the net bytes zero.
888
            if ($prevEdits['prev']->getUser()) {
889
                $username = $prevEdits['prev']->getUser()->getUsername();
890
                $this->editors[$username]['added'] -= $prevEdits['prev']->getSize();
891
            }
892
        }
893
894
        // @TODO: Test this against an edit war (use your sandbox).
895
        // Also remove as max added or deleted, if applicable.
896 4
        if ($this->maxAddition && $prevEdits['prev']->getId() === $this->maxAddition->getId()) {
897
            $this->maxAddition = $prevEdits['maxAddition'];
898
            $prevEdits['maxAddition'] = $prevEdits['prev']; // In the event of edit wars.
899 4
        } elseif ($this->maxDeletion && $prevEdits['prev']->getId() === $this->maxDeletion->getId()) {
900 4
            $this->maxDeletion = $prevEdits['maxDeletion'];
901 4
            $prevEdits['maxDeletion'] = $prevEdits['prev']; // In the event of edit wars.
902
        }
903
904 4
        return $prevEdits;
905
    }
906
907
    /**
908
     * Updates the figures on content sizes assuming the given edit was NOT a revert of the previous edit.
909
     * @param Edit $edit
910
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
911
     * @return Edit[] Updated version of $prevEdits, for tracking.
912
     */
913 4
    private function updateContentSizesNonRevert(Edit $edit, array $prevEdits): array
914
    {
915 4
        $editSize = $this->getEditSize($edit, $prevEdits);
916
917
        // Edit was not a revert, so treat size > 0 as content added.
918 4
        if ($editSize > 0) {
919 4
            $this->addedBytes += $editSize;
920
921 4
            if ($edit->getUser()) {
922 4
                $this->editors[$edit->getUser()->getUsername()]['added'] += $editSize;
923
            }
924
925
            // Keep track of edit with max addition.
926 4
            if (!$this->maxAddition || $editSize > $this->maxAddition->getSize()) {
927
                // Keep track of old maxAddition in case we find out the next $edit was reverted
928
                // (and was also a max edit), in which case we'll want to use this one ($edit).
929 4
                $prevEdits['maxAddition'] = $this->maxAddition;
930
931 4
                $this->maxAddition = $edit;
932
            }
933 4
        } elseif ($editSize < 0 && (!$this->maxDeletion || $editSize < $this->maxDeletion->getSize())) {
934
            // Keep track of old maxDeletion in case we find out the next edit was reverted
935
            // (and was also a max deletion), in which case we'll want to use this one.
936 4
            $prevEdits['maxDeletion'] = $this->maxDeletion;
937
938 4
            $this->maxDeletion = $edit;
939
        }
940
941 4
        return $prevEdits;
942
    }
943
944
    /**
945
     * Get the size of the given edit, based on the previous edit (if present).
946
     * We also don't return the actual edit size if last revision had a length of null.
947
     * This happens when the edit follows other edits that were revision-deleted.
948
     * @see T148857 for more information.
949
     * @todo Remove once T101631 is resolved.
950
     * @param Edit $edit
951
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
952
     * @return int
953
     */
954 4
    private function getEditSize(Edit $edit, array $prevEdits): int
955
    {
956 4
        if ($prevEdits['prev'] && null === $prevEdits['prev']->getLength()) {
0 ignored issues
show
introduced by
The condition null === $prevEdits['prev']->getLength() is always false.
Loading history...
957
            return 0;
958
        } else {
959 4
            return $edit->getSize();
960
        }
961
    }
962
963
    /**
964
     * Update counts of automated tool usage for the given edit.
965
     * @param Edit $edit
966
     */
967 4
    private function updateToolCounts(Edit $edit): void
968
    {
969 4
        $automatedTool = $edit->getTool($this->container);
970
971 4
        if (false === $automatedTool) {
972
            // Nothing to do.
973 4
            return;
974
        }
975
976 4
        $editYear = $edit->getYear();
977 4
        $editMonth = $edit->getMonth();
978
979 4
        $this->automatedCount++;
980 4
        $this->yearMonthCounts[$editYear]['automated']++;
981 4
        $this->yearMonthCounts[$editYear]['months'][$editMonth]['automated']++;
982
983 4
        if (!isset($this->tools[$automatedTool['name']])) {
984 4
            $this->tools[$automatedTool['name']] = [
985 4
                'count' => 1,
986 4
                'link' => $automatedTool['link'],
987
            ];
988
        } else {
989
            $this->tools[$automatedTool['name']]['count']++;
990
        }
991 4
    }
992
993
    /**
994
     * Update various counts for the year and month of the given edit.
995
     * @param Edit $edit
996
     */
997 4
    private function updateYearMonthCounts(Edit $edit): void
998
    {
999 4
        $editYear = $edit->getYear();
1000 4
        $editMonth = $edit->getMonth();
1001
1002
        // Fill in the blank arrays for the year and 12 months if needed.
1003 4
        if (!isset($this->yearMonthCounts[$editYear])) {
1004 4
            $this->addYearMonthCountEntry($edit);
1005
        }
1006
1007
        // Increment year and month counts for all edits
1008 4
        $this->yearMonthCounts[$editYear]['all']++;
1009 4
        $this->yearMonthCounts[$editYear]['months'][$editMonth]['all']++;
1010
        // This will ultimately be the size of the page by the end of the year
1011 4
        $this->yearMonthCounts[$editYear]['size'] = (int) $edit->getLength();
1012
1013
        // Keep track of which month had the most edits
1014 4
        $editsThisMonth = $this->yearMonthCounts[$editYear]['months'][$editMonth]['all'];
1015 4
        if ($editsThisMonth > $this->maxEditsPerMonth) {
1016 4
            $this->maxEditsPerMonth = $editsThisMonth;
1017
        }
1018 4
    }
1019
1020
    /**
1021
     * Add a new entry to $this->yearMonthCounts for the given year,
1022
     * with blank values for each month. This called during self::parseHistory().
1023
     * @param Edit $edit
1024
     */
1025 4
    private function addYearMonthCountEntry(Edit $edit): void
1026
    {
1027 4
        $this->yearLabels[] = $this->i18n->dateFormat($edit->getTimestamp(), 'yyyy');
1028 4
        $editYear = $edit->getYear();
1029
1030
        // Beginning of the month at 00:00:00.
1031 4
        $firstEditTime = mktime(0, 0, 0, (int)$this->firstEdit->getMonth(), 1, (int)$this->firstEdit->getYear());
1032
1033 4
        $this->yearMonthCounts[$editYear] = [
1034
            'all' => 0,
1035
            'minor' => 0,
1036
            'anon' => 0,
1037
            'automated' => 0,
1038
            'size' => 0, // Keep track of the size by the end of the year.
1039
            'events' => [],
1040
            'months' => [],
1041
        ];
1042
1043 4
        for ($i = 1; $i <= 12; $i++) {
1044 4
            $timeObj = mktime(0, 0, 0, $i, 1, (int)$editYear);
1045
1046
            // Don't show zeros for months before the first edit or after the current month.
1047 4
            if ($timeObj < $firstEditTime || $timeObj > $this->getLastDay()) {
1048 4
                continue;
1049
            }
1050
1051 4
            $this->monthLabels[] = $this->i18n->dateFormat($timeObj, 'yyyy-MM');
1052 4
            $this->yearMonthCounts[$editYear]['months'][sprintf('%02d', $i)] = [
1053
                'all' => 0,
1054
                'minor' => 0,
1055
                'anon' => 0,
1056
                'automated' => 0,
1057
            ];
1058
        }
1059 4
    }
1060
1061
    /**
1062
     * Update the counts of anon and minor edits for year, month, and user of the given edit.
1063
     * @param Edit $edit
1064
     */
1065 4
    private function updateAnonMinorCounts(Edit $edit): void
1066
    {
1067 4
        $editYear = $edit->getYear();
1068 4
        $editMonth = $edit->getMonth();
1069
1070
        // If anonymous, increase counts
1071 4
        if ($edit->isAnon()) {
1072 4
            $this->anonCount++;
1073 4
            $this->yearMonthCounts[$editYear]['anon']++;
1074 4
            $this->yearMonthCounts[$editYear]['months'][$editMonth]['anon']++;
1075
        }
1076
1077
        // If minor edit, increase counts
1078 4
        if ($edit->isMinor()) {
1079 4
            $this->minorCount++;
1080 4
            $this->yearMonthCounts[$editYear]['minor']++;
1081 4
            $this->yearMonthCounts[$editYear]['months'][$editMonth]['minor']++;
1082
        }
1083 4
    }
1084
1085
    /**
1086
     * Update various counts for the user of the given edit.
1087
     * @param Edit $edit
1088
     */
1089 4
    private function updateUserCounts(Edit $edit): void
1090
    {
1091 4
        if (!$edit->getUser()) {
1092
            return;
1093
        }
1094
1095 4
        $username = $edit->getUser()->getUsername();
1096
1097
        // Initialize various user stats if needed.
1098 4
        if (!isset($this->editors[$username])) {
1099 4
            $this->editors[$username] = [
1100 4
                'all' => 0,
1101 4
                'minor' => 0,
1102 4
                'minorPercentage' => 0,
1103 4
                'first' => $edit->getTimestamp(),
1104 4
                'firstId' => $edit->getId(),
1105
                'last' => null,
1106
                'atbe' => null,
1107 4
                'added' => 0,
1108
            ];
1109
        }
1110
1111
        // Increment user counts
1112 4
        $this->editors[$username]['all']++;
1113 4
        $this->editors[$username]['last'] = $edit->getTimestamp();
1114 4
        $this->editors[$username]['lastId'] = $edit->getId();
1115
1116
        // Increment minor counts for this user
1117 4
        if ($edit->isMinor()) {
1118 4
            $this->editors[$username]['minor']++;
1119
        }
1120 4
    }
1121
1122
    /**
1123
     * Increment "edits per <time>" counts based on the given edit.
1124
     * @param Edit $edit
1125
     */
1126 4
    private function updateCountHistory(Edit $edit): void
1127
    {
1128 4
        $editTimestamp = $edit->getTimestamp();
1129
1130 4
        if ($editTimestamp > new DateTime('-1 day')) {
1131
            $this->countHistory['day']++;
1132
        }
1133 4
        if ($editTimestamp > new DateTime('-1 week')) {
1134
            $this->countHistory['week']++;
1135
        }
1136 4
        if ($editTimestamp > new DateTime('-1 month')) {
1137
            $this->countHistory['month']++;
1138
        }
1139 4
        if ($editTimestamp > new DateTime('-1 year')) {
1140
            $this->countHistory['year']++;
1141
        }
1142 4
    }
1143
1144
    /**
1145
     * Get info about bots that edited the page.
1146
     * @return mixed[] Contains the bot's username, edit count to the page, and whether or not they are currently a bot.
1147
     */
1148 1
    public function getBots(): array
1149
    {
1150 1
        return $this->bots;
1151
    }
1152
1153
    /**
1154
     * Set info about bots that edited the page. This is done as a private setter because we need this information
1155
     * when computing the top 10 editors, where we don't want to include bots.
1156
     */
1157
    private function setBots(): void
1158
    {
1159
        // Parse the bot edits.
1160
        $bots = [];
1161
1162
        /** @var Statement $botData */
1163
        $botData = $this->getRepository()->getBotData($this->page, $this->start, $this->end);
0 ignored issues
show
Bug introduced by
The method getBotData() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1163
        $botData = $this->getRepository()->/** @scrutinizer ignore-call */ getBotData($this->page, $this->start, $this->end);
Loading history...
1164
        while ($bot = $botData->fetch()) {
1165
            $bots[$bot['username']] = [
1166
                'count' => (int)$bot['count'],
1167
                'current' => '1' === $bot['current'],
1168
            ];
1169
        }
1170
1171
        // Sort by edit count.
1172
        uasort($bots, function ($a, $b) {
1173
            return $b['count'] - $a['count'];
1174
        });
1175
1176
        $this->bots = $bots;
1177
    }
1178
1179
    /**
1180
     * Number of edits made to the page by current or former bots.
1181
     * @param string[] $bots Used only in unit tests, where we supply mock data for the bots that will get processed.
1182
     * @return int
1183
     */
1184 2
    public function getBotRevisionCount(?array $bots = null): int
1185
    {
1186 2
        if (isset($this->botRevisionCount)) {
1187
            return $this->botRevisionCount;
1188
        }
1189
1190 2
        if (null === $bots) {
1191 1
            $bots = $this->getBots();
1192
        }
1193
1194 2
        $count = 0;
1195
1196 2
        foreach (array_values($bots) as $data) {
1197 2
            $count += $data['count'];
1198
        }
1199
1200 2
        $this->botRevisionCount = $count;
1201 2
        return $count;
1202
    }
1203
1204
    /**
1205
     * Query for log events during each year of the article's history, and set the results in $this->yearMonthCounts.
1206
     */
1207 1
    private function setLogsEvents(): void
1208
    {
1209 1
        $logData = $this->getRepository()->getLogEvents(
0 ignored issues
show
Bug introduced by
The method getLogEvents() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1209
        $logData = $this->getRepository()->/** @scrutinizer ignore-call */ getLogEvents(
Loading history...
1210 1
            $this->page,
1211 1
            $this->start,
1212 1
            $this->end
1213
        );
1214
1215 1
        foreach ($logData as $event) {
1216 1
            $time = strtotime($event['timestamp']);
1217 1
            $year = date('Y', $time);
1218
1219 1
            if (!isset($this->yearMonthCounts[$year])) {
1220
                break;
1221
            }
1222
1223 1
            $yearEvents = $this->yearMonthCounts[$year]['events'];
1224
1225
            // Convert log type value to i18n key.
1226 1
            switch ($event['log_type']) {
1227 1
                case 'protect':
1228 1
                    $action = 'protections';
1229 1
                    break;
1230 1
                case 'delete':
1231 1
                    $action = 'deletions';
1232 1
                    break;
1233
                case 'move':
1234
                    $action = 'moves';
1235
                    break;
1236
                // count pending-changes protections along with normal protections.
1237
                case 'stable':
1238
                    $action = 'protections';
1239
                    break;
1240
            }
1241
1242 1
            if (empty($yearEvents[$action])) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $action does not seem to be defined for all execution paths leading up to this point.
Loading history...
1243 1
                $yearEvents[$action] = 1;
1244
            } else {
1245
                $yearEvents[$action]++;
1246
            }
1247
1248 1
            $this->yearMonthCounts[$year]['events'] = $yearEvents;
1249
        }
1250 1
    }
1251
1252
    /**
1253
     * Set statistics about the top 10 editors by added text and number of edits.
1254
     * This is ran *after* parseHistory() since we need the grand totals first.
1255
     * Various stats are also set for each editor in $this->editors to be used in the charts.
1256
     */
1257 4
    private function doPostPrecessing(): void
1258
    {
1259 4
        $topTenCount = $counter = 0;
1260 4
        $topTenEditorsByEdits = [];
1261
1262 4
        foreach ($this->editors as $editor => $info) {
1263
            // Count how many users are in the top 10% by number of edits, excluding bots.
1264 4
            if ($counter < 10 && !in_array($editor, array_keys($this->bots))) {
1265 4
                $topTenCount += $info['all'];
1266 4
                $counter++;
1267
1268
                // To be used in the Top Ten charts.
1269 4
                $topTenEditorsByEdits[] = [
1270 4
                    'label' => $editor,
1271 4
                    'value' => $info['all'],
1272
                ];
1273
            }
1274
1275
            // Compute the percentage of minor edits the user made.
1276 4
            $this->editors[$editor]['minorPercentage'] = $info['all']
1277 4
                ? ($info['minor'] / $info['all']) * 100
1278
                : 0;
1279
1280 4
            if ($info['all'] > 1) {
1281
                // Number of seconds/days between first and last edit.
1282 4
                $secs = $info['last']->getTimestamp() - $info['first']->getTimestamp();
1283 4
                $days = $secs / (60 * 60 * 24);
1284
1285
                // Average time between edits (in days).
1286 4
                $this->editors[$editor]['atbe'] = $days / $info['all'];
1287
            }
1288
        }
1289
1290
        // Loop through again and add percentages.
1291 4
        $this->topTenEditorsByEdits = array_map(function ($editor) use ($topTenCount) {
1292 4
            $editor['percentage'] = 100 * ($editor['value'] / $topTenCount);
1293 4
            return $editor;
1294 4
        }, $topTenEditorsByEdits);
1295
1296 4
        $this->topTenEditorsByAdded = $this->getTopTenByAdded();
1297
1298 4
        $this->topTenCount = $topTenCount;
1299 4
    }
1300
1301
    /**
1302
     * Get the top ten editors by added text.
1303
     * @return array With keys 'label', 'value' and 'percentage', ready to be used by the pieChart Twig helper.
1304
     */
1305 4
    private function getTopTenByAdded(): array
1306
    {
1307
        // First sort editors array by the amount of text they added.
1308 4
        $topTenEditorsByAdded = $this->editors;
1309 4
        uasort($topTenEditorsByAdded, function ($a, $b) {
1310 4
            if ($a['added'] === $b['added']) {
1311 4
                return 0;
1312
            }
1313 4
            return $a['added'] > $b['added'] ? -1 : 1;
1314 4
        });
1315
1316
        // Slice to the top 10.
1317 4
        $topTenEditorsByAdded = array_keys(array_slice($topTenEditorsByAdded, 0, 10, true));
1318
1319
        // // Get the sum of added text so that we can add in percentages.
1320
        // $topTenTotalAdded = array_sum(array_map(function ($editor) {
1321
        //     return $this->editors[$editor]['added'];
1322
        // }, $topTenEditorsByAdded));
1323
1324
        // Then build a new array of top 10 editors by added text in the data structure needed for the chart.
1325 4
        return array_map(function ($editor) {
1326 4
            $added = $this->editors[$editor]['added'];
1327
            return [
1328 4
                'label' => $editor,
1329 4
                'value' => $added,
1330 4
                'percentage' => 0 === $this->addedBytes
1331
                    ? 0
1332 4
                    : 100 * ($added / $this->addedBytes),
1333
            ];
1334 4
        }, $topTenEditorsByAdded);
1335
    }
1336
1337
    /**
1338
     * Get prose and reference information.
1339
     * @return array With keys 'characters', 'words', 'references', 'unique_references'
1340
     */
1341 1
    public function getProseStats(): array
1342
    {
1343 1
        $datetime = false !== $this->end ? new DateTime('@'.$this->end) : null;
0 ignored issues
show
Bug introduced by
Are you sure $this->end of type integer|string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1343
        $datetime = false !== $this->end ? new DateTime('@'./** @scrutinizer ignore-type */ $this->end) : null;
Loading history...
1344 1
        $html = $this->page->getHTMLContent($datetime);
1345
1346 1
        $crawler = new Crawler($html);
1347
1348 1
        [$chars, $words] = $this->countCharsAndWords($crawler, '#mw-content-text p');
1349
1350 1
        $refs = $crawler->filter('#mw-content-text .reference');
1351 1
        $refContent = [];
1352 1
        $refs->each(function ($ref) use (&$refContent): void {
1353 1
            $refContent[] = $ref->text();
1354 1
        });
1355 1
        $uniqueRefs = count(array_unique($refContent));
1356
1357 1
        $sections = count($crawler->filter('#mw-content-text .mw-headline'));
1358
1359
        return [
1360 1
            'characters' => $chars,
1361 1
            'words' => $words,
1362 1
            'references' => $refs->count(),
1363 1
            'unique_references' => $uniqueRefs,
1364 1
            'sections' => $sections,
1365
        ];
1366
    }
1367
1368
    /**
1369
     * Count the number of characters and words of the plain text within the DOM element matched by the given selector.
1370
     * @param Crawler $crawler
1371
     * @param string $selector HTML selector.
1372
     * @return array [num chars, num words]
1373
     */
1374 1
    private function countCharsAndWords(Crawler $crawler, string $selector): array
1375
    {
1376 1
        $totalChars = 0;
1377 1
        $totalWords = 0;
1378 1
        $paragraphs = $crawler->filter($selector);
1379 1
        $paragraphs->each(function ($node) use (&$totalChars, &$totalWords): void {
1380 1
            $text = preg_replace('/\[\d+\]/', '', trim($node->text()));
1381 1
            $totalChars += strlen($text);
1382 1
            $totalWords += count(explode(' ', $text));
1383 1
        });
1384
1385 1
        return [$totalChars, $totalWords];
1386
    }
1387
1388
    /**
1389
     * Fetch transclusion data (categories, templates and files) that are on the page.
1390
     * @return array With keys 'categories', 'templates' and 'files'.
1391
     */
1392 1
    private function getTransclusionData(): array
1393
    {
1394 1
        if (!is_array($this->transclusionData)) {
0 ignored issues
show
introduced by
The condition is_array($this->transclusionData) is always true.
Loading history...
1395 1
            $this->transclusionData = $this->getRepository()
1396 1
                ->getTransclusionData($this->page);
1397
        }
1398 1
        return $this->transclusionData;
1399
    }
1400
1401
    /**
1402
     * Get the number of categories that are on the page.
1403
     * @return int
1404
     */
1405 1
    public function getNumCategories(): int
1406
    {
1407 1
        return $this->getTransclusionData()['categories'];
1408
    }
1409
1410
    /**
1411
     * Get the number of templates that are on the page.
1412
     * @return int
1413
     */
1414 1
    public function getNumTemplates(): int
1415
    {
1416 1
        return $this->getTransclusionData()['templates'];
1417
    }
1418
1419
    /**
1420
     * Get the number of files that are on the page.
1421
     * @return int
1422
     */
1423 1
    public function getNumFiles(): int
1424
    {
1425 1
        return $this->getTransclusionData()['files'];
1426
    }
1427
}
1428