Passed
Push — master ( 30ceb2...65dd1b )
by MusikAnimal
05:23
created

ArticleInfo::getLastEdit()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 0
dl 0
loc 3
ccs 2
cts 2
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
 * This file contains only the ArticleInfo class.
4
 */
5
6
declare(strict_types = 1);
7
8
namespace AppBundle\Model;
9
10
use AppBundle\Helper\I18nHelper;
11
use DateTime;
12
use Symfony\Component\DependencyInjection\ContainerInterface;
13
use Symfony\Component\DomCrawler\Crawler;
14
15
/**
16
 * An ArticleInfo provides statistics about a page on a project.
17
 */
18
class ArticleInfo extends Model
19
{
20
    /** @const string[] Domain names of wikis supported by WikiWho. */
21
    public const TEXTSHARE_WIKIS = [
22
        'en.wikipedia.org',
23
        'de.wikipedia.org',
24
        'eu.wikipedia.org',
25
        'tr.wikipedia.org',
26
        'es.wikipedia.org',
27
    ];
28
29
    /** @var ContainerInterface The application's DI container. */
30
    protected $container;
31
32
    /** @var I18nHelper For i18n and l10n. */
33
    protected $i18n;
34
35
    /** @var int Number of revisions that belong to the page. */
36
    protected $numRevisions;
37
38
    /** @var int Maximum number of revisions to process, as configured. */
39
    protected $maxRevisions;
40
41
    /** @var int Number of revisions that were actually processed. */
42
    protected $numRevisionsProcessed;
43
44
    /**
45
     * Various statistics about editors to the page. These are not User objects
46
     * so as to preserve memory.
47
     * @var mixed[]
48
     */
49
    protected $editors;
50
51
    /** @var mixed[] The top 10 editors to the page by number of edits. */
52
    protected $topTenEditorsByEdits;
53
54
    /** @var mixed[] The top 10 editors to the page by added text. */
55
    protected $topTenEditorsByAdded;
56
57
    /** @var int Number of edits made by the top 10 editors. */
58
    protected $topTenCount;
59
60
    /** @var mixed[] Various statistics about bots that edited the page. */
61
    protected $bots;
62
63
    /** @var int Number of edits made to the page by bots. */
64
    protected $botRevisionCount;
65
66
    /** @var mixed[] Various counts about each individual year and month of the page's history. */
67
    protected $yearMonthCounts;
68
69
    /** @var string[] Localized labels for the years, to be used in the 'Year counts' chart. */
70
    protected $yearLabels = [];
71
72
    /** @var string[] Localized labels for the months, to be used in the 'Month counts' chart. */
73
    protected $monthLabels = [];
74
75
    /** @var Edit The first edit to the page. */
76
    protected $firstEdit;
77
78
    /** @var Edit The last edit to the page. */
79
    protected $lastEdit;
80
81
    /** @var Edit Edit that made the largest addition by number of bytes. */
82
    protected $maxAddition;
83
84
    /** @var Edit Edit that made the largest deletion by number of bytes. */
85
    protected $maxDeletion;
86
87
    /** @var int[] Number of in and outgoing links and redirects to the page. */
88
    protected $linksAndRedirects;
89
90
    /** @var string[] Assessments of the page (see Page::getAssessments). */
91
    protected $assessments;
92
93
    /**
94
     * Maximum number of edits that were created across all months. This is used as a comparison
95
     * for the bar charts in the months section.
96
     * @var int
97
     */
98
    protected $maxEditsPerMonth;
99
100
    /** @var string[] List of (semi-)automated tools that were used to edit the page. */
101
    protected $tools;
102
103
    /**
104
     * Total number of bytes added throughout the page's history. This is used as a comparison
105
     * when computing the top 10 editors by added text.
106
     * @var int
107
     */
108
    protected $addedBytes = 0;
109
110
    /** @var int Number of days between first and last edit. */
111
    protected $totalDays;
112
113
    /** @var int Number of minor edits to the page. */
114
    protected $minorCount = 0;
115
116
    /** @var int Number of anonymous edits to the page. */
117
    protected $anonCount = 0;
118
119
    /** @var int Number of automated edits to the page. */
120
    protected $automatedCount = 0;
121
122
    /** @var int Number of edits to the page that were reverted with the subsequent edit. */
123
    protected $revertCount = 0;
124
125
    /** @var int[] The "edits per <time>" counts. */
126
    protected $countHistory = [
127
        'day' => 0,
128
        'week' => 0,
129
        'month' => 0,
130
        'year' => 0,
131
    ];
132
133
    /** @var string[] List of wikidata and Checkwiki errors. */
134
    protected $bugs;
135
136
    /** @var array List of editors and the percentage of the current content that they authored. */
137
    protected $textshares;
138
139
    /** @var array Number of categories, templates and files on the page. */
140
    protected $transclusionData;
141
142
    /**
143
     * ArticleInfo constructor.
144
     * @param Page $page The page to process.
145
     * @param ContainerInterface $container The DI container.
146
     * @param false|int $start From what date to obtain records.
147
     * @param false|int $end To what date to obtain records.
148
     */
149 13
    public function __construct(Page $page, ContainerInterface $container, $start = false, $end = false)
150
    {
151 13
        $this->page = $page;
152 13
        $this->container = $container;
153 13
        $this->start = $start;
154 13
        $this->end = $end;
155 13
    }
156
157
    /**
158
     * Make the I18nHelper accessible to ArticleInfo.
159
     * @param I18nHelper $i18n
160
     * @codeCoverageIgnore
161
     */
162
    public function setI18nHelper(I18nHelper $i18n): void
163
    {
164
        $this->i18n = $i18n;
165
    }
166
167
    /**
168
     * Get date opening date range, formatted as this is used in the views.
169
     * @return string Blank if no value exists.
170
     */
171 1
    public function getStartDate(): string
172
    {
173 1
        return '' == $this->start ? '' : date('Y-m-d', $this->start);
0 ignored issues
show
Bug introduced by
It seems like $this->start can also be of type boolean and string; however, parameter $timestamp of date() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

173
        return '' == $this->start ? '' : date('Y-m-d', /** @scrutinizer ignore-type */ $this->start);
Loading history...
174
    }
175
176
    /**
177
     * Get date closing date range, formatted as this is used in the views.
178
     * @return string Blank if no value exists.
179
     */
180 1
    public function getEndDate(): string
181
    {
182 1
        return '' == $this->end ? '' : date('Y-m-d', $this->end);
0 ignored issues
show
Bug introduced by
It seems like $this->end can also be of type boolean and string; however, parameter $timestamp of date() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

182
        return '' == $this->end ? '' : date('Y-m-d', /** @scrutinizer ignore-type */ $this->end);
Loading history...
183
    }
184
185
    /**
186
     * Get the day of last date we should show in the month/year sections,
187
     * based on $this->end or the current date.
188
     * @return int As Unix timestamp.
189
     */
190 4
    private function getLastDay(): int
191
    {
192 4
        if (false !== $this->end) {
193
            return (new DateTime('@'.$this->end))
0 ignored issues
show
Bug introduced by
Are you sure $this->end of type integer|string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

193
            return (new DateTime('@'./** @scrutinizer ignore-type */ $this->end))
Loading history...
194
                ->modify('last day of this month')
195
                ->getTimestamp();
196
        } else {
197 4
            return strtotime('last day of this month');
198
        }
199
    }
200
201
    /**
202
     * Return the start/end date values as associative array, with YYYY-MM-DD as the date format.
203
     * This is used mainly as a helper to pass to the pageviews Twig macros.
204
     * @return array
205
     */
206 1
    public function getDateParams(): array
207
    {
208 1
        if (!$this->hasDateRange()) {
209
            return [];
210
        }
211
212
        $ret = [
213 1
            'start' => $this->firstEdit->getTimestamp()->format('Y-m-d'),
214 1
            'end' => $this->lastEdit->getTimestamp()->format('Y-m-d'),
215
        ];
216
217 1
        if (false !== $this->start) {
218 1
            $ret['start'] = date('Y-m-d', $this->start);
0 ignored issues
show
Bug introduced by
It seems like $this->start can also be of type string and true; however, parameter $timestamp of date() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

218
            $ret['start'] = date('Y-m-d', /** @scrutinizer ignore-type */ $this->start);
Loading history...
219
        }
220 1
        if (false !== $this->end) {
221 1
            $ret['end'] = date('Y-m-d', $this->end);
222
        }
223
224 1
        return $ret;
225
    }
226
227
    /**
228
     * Get the number of revisions belonging to the page.
229
     * @return int
230
     */
231 4
    public function getNumRevisions(): int
232
    {
233 4
        if (!isset($this->numRevisions)) {
234 4
            $this->numRevisions = $this->page->getNumRevisions(null, $this->start, $this->end);
0 ignored issues
show
Bug introduced by
It seems like $this->end can also be of type string; however, parameter $end of AppBundle\Model\Page::getNumRevisions() does only seem to accept false|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

234
            $this->numRevisions = $this->page->getNumRevisions(null, $this->start, /** @scrutinizer ignore-type */ $this->end);
Loading history...
Bug introduced by
It seems like $this->start can also be of type string; however, parameter $start of AppBundle\Model\Page::getNumRevisions() does only seem to accept false|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

234
            $this->numRevisions = $this->page->getNumRevisions(null, /** @scrutinizer ignore-type */ $this->start, $this->end);
Loading history...
235
        }
236 4
        return $this->numRevisions;
237
    }
238
239
    /**
240
     * Get the maximum number of revisions that we should process.
241
     * @return int
242
     */
243 3
    public function getMaxRevisions(): int
244
    {
245 3
        if (!isset($this->maxRevisions)) {
246 3
            $this->maxRevisions = (int) $this->container->getParameter('app.max_page_revisions');
247
        }
248 3
        return $this->maxRevisions;
249
    }
250
251
    /**
252
     * Get the number of revisions that are actually getting processed. This goes by the app.max_page_revisions
253
     * parameter, or the actual number of revisions, whichever is smaller.
254
     * @return int
255
     */
256 3
    public function getNumRevisionsProcessed(): int
257
    {
258 3
        if (isset($this->numRevisionsProcessed)) {
259 1
            return $this->numRevisionsProcessed;
260
        }
261
262 2
        if ($this->tooManyRevisions()) {
263 1
            $this->numRevisionsProcessed = $this->getMaxRevisions();
264
        } else {
265 1
            $this->numRevisionsProcessed = $this->getNumRevisions();
266
        }
267
268 2
        return $this->numRevisionsProcessed;
269
    }
270
271
    /**
272
     * Are there more revisions than we should process, based on the config?
273
     * @return bool
274
     */
275 3
    public function tooManyRevisions(): bool
276
    {
277 3
        return $this->getMaxRevisions() > 0 && $this->getNumRevisions() > $this->getMaxRevisions();
278
    }
279
280
    /**
281
     * Fetch and store all the data we need to show the ArticleInfo view.
282
     * @codeCoverageIgnore
283
     */
284
    public function prepareData(): void
285
    {
286
        $this->parseHistory();
287
        $this->setLogsEvents();
288
289
        // Bots need to be set before setting top 10 counts.
290
        $this->setBots();
291
292
        $this->doPostPrecessing();
293
    }
294
295
    /**
296
     * Get the number of editors that edited the page.
297
     * @return int
298
     */
299 1
    public function getNumEditors(): int
300
    {
301 1
        return count($this->editors);
302
    }
303
304
    /**
305
     * Get the number of bots that edited the page.
306
     * @return int
307
     */
308
    public function getNumBots(): int
309
    {
310
        return count($this->getBots());
311
    }
312
313
    /**
314
     * Get the number of days between the first and last edit.
315
     * @return int
316
     */
317 1
    public function getTotalDays(): int
318
    {
319 1
        if (isset($this->totalDays)) {
320 1
            return $this->totalDays;
321
        }
322 1
        $dateFirst = $this->firstEdit->getTimestamp();
323 1
        $dateLast = $this->lastEdit->getTimestamp();
324 1
        $interval = date_diff($dateLast, $dateFirst, true);
325 1
        $this->totalDays = (int)$interval->format('%a');
326 1
        return $this->totalDays;
327
    }
328
329
    /**
330
     * Returns length of the page.
331
     * @return int
332
     */
333 1
    public function getLength(): int
334
    {
335 1
        if ($this->hasDateRange()) {
336 1
            return $this->lastEdit->getLength();
337
        }
338
339
        return $this->page->getLength();
340
    }
341
342
    /**
343
     * Get the average number of days between edits to the page.
344
     * @return float
345
     */
346 1
    public function averageDaysPerEdit(): float
347
    {
348 1
        return round($this->getTotalDays() / $this->getNumRevisionsProcessed(), 1);
349
    }
350
351
    /**
352
     * Get the average number of edits per day to the page.
353
     * @return float
354
     */
355 1
    public function editsPerDay(): float
356
    {
357 1
        $editsPerDay = $this->getTotalDays()
358 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / (365 / 12 / 24))
359 1
            : 0;
360 1
        return round($editsPerDay, 1);
361
    }
362
363
    /**
364
     * Get the average number of edits per month to the page.
365
     * @return float
366
     */
367 1
    public function editsPerMonth(): float
368
    {
369 1
        $editsPerMonth = $this->getTotalDays()
370 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / (365 / 12))
371 1
            : 0;
372 1
        return min($this->getNumRevisionsProcessed(), round($editsPerMonth, 1));
373
    }
374
375
    /**
376
     * Get the average number of edits per year to the page.
377
     * @return float
378
     */
379 1
    public function editsPerYear(): float
380
    {
381 1
        $editsPerYear = $this->getTotalDays()
382 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / 365)
383 1
            : 0;
384 1
        return min($this->getNumRevisionsProcessed(), round($editsPerYear, 1));
385
    }
386
387
    /**
388
     * Get the average number of edits per editor.
389
     * @return float
390
     */
391 1
    public function editsPerEditor(): float
392
    {
393 1
        return round($this->getNumRevisionsProcessed() / count($this->editors), 1);
394
    }
395
396
    /**
397
     * Get the percentage of minor edits to the page.
398
     * @return float
399
     */
400 1
    public function minorPercentage(): float
401
    {
402 1
        return round(
403 1
            ($this->minorCount / $this->getNumRevisionsProcessed()) * 100,
404 1
            1
405
        );
406
    }
407
408
    /**
409
     * Get the percentage of anonymous edits to the page.
410
     * @return float
411
     */
412 1
    public function anonPercentage(): float
413
    {
414 1
        return round(
415 1
            ($this->anonCount / $this->getNumRevisionsProcessed()) * 100,
416 1
            1
417
        );
418
    }
419
420
    /**
421
     * Get the percentage of edits made by the top 10 editors.
422
     * @return float
423
     */
424 1
    public function topTenPercentage(): float
425
    {
426 1
        return round(($this->topTenCount / $this->getNumRevisionsProcessed()) * 100, 1);
427
    }
428
429
    /**
430
     * Get the number of times the page has been viewed in the given timeframe. If the ArticleInfo instance has a
431
     * date range, it is used instead of the value of the $latest parameter.
432
     * @param  int $latest Last N days.
433
     * @return int
434
     */
435
    public function getPageviews(int $latest): int
436
    {
437
        if (!$this->hasDateRange()) {
438
            return $this->page->getLastPageviews($latest);
439
        }
440
441
        $daterange = $this->getDateParams();
442
        return $this->page->getPageviews($daterange['start'], $daterange['end']);
443
    }
444
445
    /**
446
     * Get the page assessments of the page.
447
     * @see https://www.mediawiki.org/wiki/Extension:PageAssessments
448
     * @return string[]|false False if unsupported.
449
     * @codeCoverageIgnore
450
     */
451
    public function getAssessments()
452
    {
453
        if (!is_array($this->assessments)) {
0 ignored issues
show
introduced by
The condition is_array($this->assessments) is always true.
Loading history...
454
            $this->assessments = $this->page
455
                ->getProject()
456
                ->getPageAssessments()
457
                ->getAssessments($this->page);
458
        }
459
        return $this->assessments;
460
    }
461
462
    /**
463
     * Get the number of automated edits made to the page.
464
     * @return int
465
     */
466 1
    public function getAutomatedCount(): int
467
    {
468 1
        return $this->automatedCount;
469
    }
470
471
    /**
472
     * Get the number of edits to the page that were reverted with the subsequent edit.
473
     * @return int
474
     */
475 1
    public function getRevertCount(): int
476
    {
477 1
        return $this->revertCount;
478
    }
479
480
    /**
481
     * Get the number of edits to the page made by logged out users.
482
     * @return int
483
     */
484 1
    public function getAnonCount(): int
485
    {
486 1
        return $this->anonCount;
487
    }
488
489
    /**
490
     * Get the number of minor edits to the page.
491
     * @return int
492
     */
493 1
    public function getMinorCount(): int
494
    {
495 1
        return $this->minorCount;
496
    }
497
498
    /**
499
     * Get the number of edits to the page made in the past day, week, month and year.
500
     * @return int[] With keys 'day', 'week', 'month' and 'year'.
501
     */
502
    public function getCountHistory(): array
503
    {
504
        return $this->countHistory;
505
    }
506
507
    /**
508
     * Get the number of edits to the page made by the top 10 editors.
509
     * @return int
510
     */
511 1
    public function getTopTenCount(): int
512
    {
513 1
        return $this->topTenCount;
514
    }
515
516
    /**
517
     * Get the top editors to the page by edit count.
518
     * @param int $limit Default 20, maximum 1,000.
519
     * @param bool $noBots Set to non-false to exclude bots from the result.
520
     * @return array
521
     */
522
    public function getTopEditorsByEditCount(int $limit = 20, bool $noBots = false): array
523
    {
524
        // Quick cache, valid only for the same request.
525
        static $topEditors = null;
526
        if (null !== $topEditors) {
527
            return $topEditors;
528
        }
529
530
        $rows = $this->getRepository()->getTopEditorsByEditCount(
0 ignored issues
show
Bug introduced by
The method getTopEditorsByEditCount() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

530
        $rows = $this->getRepository()->/** @scrutinizer ignore-call */ getTopEditorsByEditCount(
Loading history...
531
            $this->page,
532
            $this->start,
533
            $this->end,
534
            min($limit, 1000),
535
            $noBots
536
        );
537
538
        $topEditors = [];
539
        $rank = 0;
540
        foreach ($rows as $row) {
541
            $topEditors[] = [
542
                'rank' => ++$rank,
543
                'username' => $row['username'],
544
                'count' => $row['count'],
545
                'minor' => $row['minor'],
546
                'first_edit' => [
547
                    'id' => $row['first_revid'],
548
                    'timestamp' => $row['first_timestamp'],
549
                ],
550
                'latest_edit' => [
551
                    'id' => $row['latest_revid'],
552
                    'timestamp' => $row['latest_timestamp'],
553
                ],
554
            ];
555
        }
556
557
        return $topEditors;
558
    }
559
560
    /**
561
     * Get the first edit to the page.
562
     * @return Edit
563
     */
564 1
    public function getFirstEdit(): Edit
565
    {
566 1
        return $this->firstEdit;
567
    }
568
569
    /**
570
     * Get the last edit to the page.
571
     * @return Edit
572
     */
573 1
    public function getLastEdit(): Edit
574
    {
575 1
        return $this->lastEdit;
576
    }
577
578
    /**
579
     * Get the edit that made the largest addition to the page (by number of bytes).
580
     * @return Edit|null
581
     */
582 1
    public function getMaxAddition(): ?Edit
583
    {
584 1
        return $this->maxAddition;
585
    }
586
587
    /**
588
     * Get the edit that made the largest removal to the page (by number of bytes).
589
     * @return Edit|null
590
     */
591 1
    public function getMaxDeletion(): ?Edit
592
    {
593 1
        return $this->maxDeletion;
594
    }
595
596
    /**
597
     * Get the list of editors to the page, including various statistics.
598
     * @return mixed[]
599
     */
600 1
    public function getEditors(): array
601
    {
602 1
        return $this->editors;
603
    }
604
605
    /**
606
     * Get the list of the top editors to the page (by edits), including various statistics.
607
     * @return mixed[]
608
     */
609 1
    public function topTenEditorsByEdits(): array
610
    {
611 1
        return $this->topTenEditorsByEdits;
612
    }
613
614
    /**
615
     * Get the list of the top editors to the page (by added text), including various statistics.
616
     * @return mixed[]
617
     */
618 1
    public function topTenEditorsByAdded(): array
619
    {
620 1
        return $this->topTenEditorsByAdded;
621
    }
622
623
    /**
624
     * Get various counts about each individual year and month of the page's history.
625
     * @return mixed[]
626
     */
627 2
    public function getYearMonthCounts(): array
628
    {
629 2
        return $this->yearMonthCounts;
630
    }
631
632
    /**
633
     * Get the localized labels for the 'Year counts' chart.
634
     * @return string[]
635
     */
636
    public function getYearLabels(): array
637
    {
638
        return $this->yearLabels;
639
    }
640
641
    /**
642
     * Get the localized labels for the 'Month counts' chart.
643
     * @return string[]
644
     */
645
    public function getMonthLabels(): array
646
    {
647
        return $this->monthLabels;
648
    }
649
650
    /**
651
     * Get the maximum number of edits that were created across all months. This is used as a
652
     * comparison for the bar charts in the months section.
653
     * @return int
654
     */
655 1
    public function getMaxEditsPerMonth(): int
656
    {
657 1
        return $this->maxEditsPerMonth;
658
    }
659
660
    /**
661
     * Get a list of (semi-)automated tools that were used to edit the page, including
662
     * the number of times they were used, and a link to the tool's homepage.
663
     * @return string[]
664
     */
665 1
    public function getTools(): array
666
    {
667 1
        return $this->tools;
668
    }
669
670
    /**
671
     * Get the list of page's wikidata and Checkwiki errors.
672
     * @see Page::getErrors()
673
     * @return string[]
674
     */
675
    public function getBugs(): array
676
    {
677
        if (!is_array($this->bugs)) {
0 ignored issues
show
introduced by
The condition is_array($this->bugs) is always true.
Loading history...
678
            $this->bugs = $this->page->getErrors();
679
        }
680
        return $this->bugs;
681
    }
682
683
    /**
684
     * Get the number of wikidata nad CheckWiki errors.
685
     * @return int
686
     */
687
    public function numBugs(): int
688
    {
689
        return count($this->getBugs());
690
    }
691
692
    /**
693
     * Get the number of external links on the page.
694
     * @return int
695
     */
696 1
    public function linksExtCount(): int
697
    {
698 1
        return $this->getLinksAndRedirects()['links_ext_count'];
699
    }
700
701
    /**
702
     * Get the number of incoming links to the page.
703
     * @return int
704
     */
705 1
    public function linksInCount(): int
706
    {
707 1
        return $this->getLinksAndRedirects()['links_in_count'];
708
    }
709
710
    /**
711
     * Get the number of outgoing links from the page.
712
     * @return int
713
     */
714 1
    public function linksOutCount(): int
715
    {
716 1
        return $this->getLinksAndRedirects()['links_out_count'];
717
    }
718
719
    /**
720
     * Get the number of redirects to the page.
721
     * @return int
722
     */
723 1
    public function redirectsCount(): int
724
    {
725 1
        return $this->getLinksAndRedirects()['redirects_count'];
726
    }
727
728
    /**
729
     * Get the number of external, incoming and outgoing links, along with the number of redirects to the page.
730
     * @return int[]
731
     * @codeCoverageIgnore
732
     */
733
    private function getLinksAndRedirects(): array
734
    {
735
        if (!is_array($this->linksAndRedirects)) {
0 ignored issues
show
introduced by
The condition is_array($this->linksAndRedirects) is always true.
Loading history...
736
            $this->linksAndRedirects = $this->page->countLinksAndRedirects();
737
        }
738
        return $this->linksAndRedirects;
739
    }
740
741
    /**
742
     * Parse the revision history, collecting our core statistics.
743
     *
744
     * Untestable because it relies on getting a PDO statement. All the important
745
     * logic lives in other methods which are tested.
746
     * @codeCoverageIgnore
747
     */
748
    private function parseHistory(): void
749
    {
750
        if ($this->tooManyRevisions()) {
751
            $limit = $this->getMaxRevisions();
752
        } else {
753
            $limit = null;
754
        }
755
756
        // Third parameter is ignored if $limit is null.
757
        $revStmt = $this->page->getRevisionsStmt(
758
            null,
759
            $limit,
760
            $this->getNumRevisions(),
761
            $this->start,
0 ignored issues
show
Bug introduced by
It seems like $this->start can also be of type string; however, parameter $start of AppBundle\Model\Page::getRevisionsStmt() does only seem to accept false|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

761
            /** @scrutinizer ignore-type */ $this->start,
Loading history...
762
            $this->end
0 ignored issues
show
Bug introduced by
It seems like $this->end can also be of type string; however, parameter $end of AppBundle\Model\Page::getRevisionsStmt() does only seem to accept false|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

762
            /** @scrutinizer ignore-type */ $this->end
Loading history...
763
        );
764
        $revCount = 0;
765
766
        /**
767
         * Data about previous edits so that we can use them as a basis for comparison.
768
         * @var Edit[]
769
         */
770
        $prevEdits = [
771
            // The previous Edit, used to discount content that was reverted.
772
            'prev' => null,
773
774
            // The SHA-1 of the edit *before* the previous edit. Used for more
775
            // accurate revert detection.
776
            'prevSha' => null,
777
778
            // The last edit deemed to be the max addition of content. This is kept track of
779
            // in case we find out the next edit was reverted (and was also a max edit),
780
            // in which case we'll want to discount it and use this one instead.
781
            'maxAddition' => null,
782
783
            // Same as with maxAddition, except the maximum amount of content deleted.
784
            // This is used to discount content that was reverted.
785
            'maxDeletion' => null,
786
        ];
787
788
        while ($rev = $revStmt->fetch()) {
789
            $edit = new Edit($this->page, $rev);
790
791
            if (0 === $revCount) {
792
                $this->firstEdit = $edit;
793
            }
794
795
            // Sometimes, with old revisions (2001 era), the revisions from 2002 come before 2001
796
            if ($edit->getTimestamp() < $this->firstEdit->getTimestamp()) {
797
                $this->firstEdit = $edit;
798
            }
799
800
            $prevEdits = $this->updateCounts($edit, $prevEdits);
801
802
            $revCount++;
803
        }
804
805
        $this->numRevisionsProcessed = $revCount;
806
807
        // Various sorts
808
        arsort($this->editors);
809
        ksort($this->yearMonthCounts);
810
        if ($this->tools) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->tools of type string[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
811
            arsort($this->tools);
812
        }
813
    }
814
815
    /**
816
     * Update various counts based on the current edit.
817
     * @param Edit $edit
818
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'
819
     * @return Edit[] Updated version of $prevEdits.
820
     */
821 4
    private function updateCounts(Edit $edit, array $prevEdits): array
822
    {
823
        // Update the counts for the year and month of the current edit.
824 4
        $this->updateYearMonthCounts($edit);
825
826
        // Update counts for the user who made the edit.
827 4
        $this->updateUserCounts($edit);
828
829
        // Update the year/month/user counts of anon and minor edits.
830 4
        $this->updateAnonMinorCounts($edit);
831
832
        // Update counts for automated tool usage, if applicable.
833 4
        $this->updateToolCounts($edit);
834
835
        // Increment "edits per <time>" counts
836 4
        $this->updateCountHistory($edit);
837
838
        // Update figures regarding content addition/removal, and the revert count.
839 4
        $prevEdits = $this->updateContentSizes($edit, $prevEdits);
840
841
        // Now that we've updated all the counts, we can reset
842
        // the prev and last edits, which are used for tracking.
843
        // But first, let's copy over the SHA of the actual previous edit
844
        // and put it in our $prevEdits['prev'], so that we'll know
845
        // that content added after $prevEdit['prev'] was reverted.
846 4
        if (null !== $prevEdits['prev']) {
847 4
            $prevEdits['prevSha'] = $prevEdits['prev']->getSha();
848
        }
849 4
        $prevEdits['prev'] = $edit;
850 4
        $this->lastEdit = $edit;
851
852 4
        return $prevEdits;
853
    }
854
855
    /**
856
     * Update various figures about content sizes based on the given edit.
857
     * @param Edit $edit
858
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
859
     * @return Edit[] Updated version of $prevEdits.
860
     */
861 4
    private function updateContentSizes(Edit $edit, array $prevEdits): array
862
    {
863
        // Check if it was a revert
864 4
        if ($this->isRevert($edit, $prevEdits)) {
865 4
            return $this->updateContentSizesRevert($prevEdits);
866
        } else {
867 4
            return $this->updateContentSizesNonRevert($edit, $prevEdits);
868
        }
869
    }
870
871
    /**
872
     * Is the given Edit a revert?
873
     * @param Edit $edit
874
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
875
     * @return bool
876
     */
877 4
    private function isRevert(Edit $edit, array $prevEdits): bool
878
    {
879 4
        return $edit->getSha() === $prevEdits['prevSha'] || $edit->isRevert($this->container);
880
    }
881
882
    /**
883
     * Updates the figures on content sizes assuming the given edit was a revert of the previous one.
884
     * In such a case, we don't want to treat the previous edit as legit content addition or removal.
885
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
886
     * @return Edit[] Updated version of $prevEdits, for tracking.
887
     */
888 4
    private function updateContentSizesRevert(array $prevEdits): array
889
    {
890 4
        $this->revertCount++;
891
892
        // Adjust addedBytes given this edit was a revert of the previous one.
893 4
        if ($prevEdits['prev'] && $prevEdits['prev']->getSize() > 0) {
894
            $this->addedBytes -= $prevEdits['prev']->getSize();
895
896
            // Also deduct from the user's individual added byte count.
897
            $username = $prevEdits['prev']->getUser()->getUsername();
898
            $this->editors[$username]['added'] -= $prevEdits['prev']->getSize();
899
        }
900
901
        // @TODO: Test this against an edit war (use your sandbox).
902
        // Also remove as max added or deleted, if applicable.
903 4
        if ($this->maxAddition && $prevEdits['prev']->getId() === $this->maxAddition->getId()) {
904
            $this->maxAddition = $prevEdits['maxAddition'];
905
            $prevEdits['maxAddition'] = $prevEdits['prev']; // In the event of edit wars.
906 4
        } elseif ($this->maxDeletion && $prevEdits['prev']->getId() === $this->maxDeletion->getId()) {
907 4
            $this->maxDeletion = $prevEdits['maxDeletion'];
908 4
            $prevEdits['maxDeletion'] = $prevEdits['prev']; // In the event of edit wars.
909
        }
910
911 4
        return $prevEdits;
912
    }
913
914
    /**
915
     * Updates the figures on content sizes assuming the given edit was NOT a revert of the previous edit.
916
     * @param Edit $edit
917
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
918
     * @return Edit[] Updated version of $prevEdits, for tracking.
919
     */
920 4
    private function updateContentSizesNonRevert(Edit $edit, array $prevEdits): array
921
    {
922 4
        $editSize = $this->getEditSize($edit, $prevEdits);
923
924
        // Edit was not a revert, so treat size > 0 as content added.
925 4
        if ($editSize > 0) {
926 4
            $this->addedBytes += $editSize;
927 4
            $this->editors[$edit->getUser()->getUsername()]['added'] += $editSize;
928
929
            // Keep track of edit with max addition.
930 4
            if (!$this->maxAddition || $editSize > $this->maxAddition->getSize()) {
931
                // Keep track of old maxAddition in case we find out the next $edit was reverted
932
                // (and was also a max edit), in which case we'll want to use this one ($edit).
933 4
                $prevEdits['maxAddition'] = $this->maxAddition;
934
935 4
                $this->maxAddition = $edit;
936
            }
937 4
        } elseif ($editSize < 0 && (!$this->maxDeletion || $editSize < $this->maxDeletion->getSize())) {
938
            // Keep track of old maxDeletion in case we find out the next edit was reverted
939
            // (and was also a max deletion), in which case we'll want to use this one.
940 4
            $prevEdits['maxDeletion'] = $this->maxDeletion;
941
942 4
            $this->maxDeletion = $edit;
943
        }
944
945 4
        return $prevEdits;
946
    }
947
948
    /**
949
     * Get the size of the given edit, based on the previous edit (if present).
950
     * We also don't return the actual edit size if last revision had a length of null.
951
     * This happens when the edit follows other edits that were revision-deleted.
952
     * @see T148857 for more information.
953
     * @todo Remove once T101631 is resolved.
954
     * @param Edit $edit
955
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
956
     * @return int
957
     */
958 4
    private function getEditSize(Edit $edit, array $prevEdits): int
959
    {
960 4
        if ($prevEdits['prev'] && null === $prevEdits['prev']->getLength()) {
0 ignored issues
show
introduced by
The condition null === $prevEdits['prev']->getLength() is always false.
Loading history...
961
            return 0;
962
        } else {
963 4
            return $edit->getSize();
964
        }
965
    }
966
967
    /**
968
     * Update counts of automated tool usage for the given edit.
969
     * @param Edit $edit
970
     */
971 4
    private function updateToolCounts(Edit $edit): void
972
    {
973 4
        $automatedTool = $edit->getTool($this->container);
974
975 4
        if (false === $automatedTool) {
976
            // Nothing to do.
977 4
            return;
978
        }
979
980 4
        $editYear = $edit->getYear();
981 4
        $editMonth = $edit->getMonth();
982
983 4
        $this->automatedCount++;
984 4
        $this->yearMonthCounts[$editYear]['automated']++;
985 4
        $this->yearMonthCounts[$editYear]['months'][$editMonth]['automated']++;
986
987 4
        if (!isset($this->tools[$automatedTool['name']])) {
988 4
            $this->tools[$automatedTool['name']] = [
989 4
                'count' => 1,
990 4
                'link' => $automatedTool['link'],
991
            ];
992
        } else {
993
            $this->tools[$automatedTool['name']]['count']++;
994
        }
995 4
    }
996
997
    /**
998
     * Update various counts for the year and month of the given edit.
999
     * @param Edit $edit
1000
     */
1001 4
    private function updateYearMonthCounts(Edit $edit): void
1002
    {
1003 4
        $editYear = $edit->getYear();
1004 4
        $editMonth = $edit->getMonth();
1005
1006
        // Fill in the blank arrays for the year and 12 months if needed.
1007 4
        if (!isset($this->yearMonthCounts[$editYear])) {
1008 4
            $this->addYearMonthCountEntry($edit);
1009
        }
1010
1011
        // Increment year and month counts for all edits
1012 4
        $this->yearMonthCounts[$editYear]['all']++;
1013 4
        $this->yearMonthCounts[$editYear]['months'][$editMonth]['all']++;
1014
        // This will ultimately be the size of the page by the end of the year
1015 4
        $this->yearMonthCounts[$editYear]['size'] = (int) $edit->getLength();
1016
1017
        // Keep track of which month had the most edits
1018 4
        $editsThisMonth = $this->yearMonthCounts[$editYear]['months'][$editMonth]['all'];
1019 4
        if ($editsThisMonth > $this->maxEditsPerMonth) {
1020 4
            $this->maxEditsPerMonth = $editsThisMonth;
1021
        }
1022 4
    }
1023
1024
    /**
1025
     * Add a new entry to $this->yearMonthCounts for the given year,
1026
     * with blank values for each month. This called during self::parseHistory().
1027
     * @param Edit $edit
1028
     */
1029 4
    private function addYearMonthCountEntry(Edit $edit): void
1030
    {
1031 4
        $this->yearLabels[] = $this->i18n->dateFormat($edit->getTimestamp(), 'yyyy');
1032 4
        $editYear = $edit->getYear();
1033
1034
        // Beginning of the month at 00:00:00.
1035 4
        $firstEditTime = mktime(0, 0, 0, (int)$this->firstEdit->getMonth(), 1, (int)$this->firstEdit->getYear());
1036
1037 4
        $this->yearMonthCounts[$editYear] = [
1038
            'all' => 0,
1039
            'minor' => 0,
1040
            'anon' => 0,
1041
            'automated' => 0,
1042
            'size' => 0, // Keep track of the size by the end of the year.
1043
            'events' => [],
1044
            'months' => [],
1045
        ];
1046
1047 4
        for ($i = 1; $i <= 12; $i++) {
1048 4
            $timeObj = mktime(0, 0, 0, $i, 1, (int)$editYear);
1049
1050
            // Don't show zeros for months before the first edit or after the current month.
1051 4
            if ($timeObj < $firstEditTime || $timeObj > $this->getLastDay()) {
1052 4
                continue;
1053
            }
1054
1055 4
            $this->monthLabels[] = $this->i18n->dateFormat($timeObj, 'yyyy-MM');
1056 4
            $this->yearMonthCounts[$editYear]['months'][sprintf('%02d', $i)] = [
1057
                'all' => 0,
1058
                'minor' => 0,
1059
                'anon' => 0,
1060
                'automated' => 0,
1061
            ];
1062
        }
1063 4
    }
1064
1065
    /**
1066
     * Update the counts of anon and minor edits for year, month, and user of the given edit.
1067
     * @param Edit $edit
1068
     */
1069 4
    private function updateAnonMinorCounts(Edit $edit): void
1070
    {
1071 4
        $editYear = $edit->getYear();
1072 4
        $editMonth = $edit->getMonth();
1073
1074
        // If anonymous, increase counts
1075 4
        if ($edit->isAnon()) {
1076 4
            $this->anonCount++;
1077 4
            $this->yearMonthCounts[$editYear]['anon']++;
1078 4
            $this->yearMonthCounts[$editYear]['months'][$editMonth]['anon']++;
1079
        }
1080
1081
        // If minor edit, increase counts
1082 4
        if ($edit->isMinor()) {
1083 4
            $this->minorCount++;
1084 4
            $this->yearMonthCounts[$editYear]['minor']++;
1085 4
            $this->yearMonthCounts[$editYear]['months'][$editMonth]['minor']++;
1086
        }
1087 4
    }
1088
1089
    /**
1090
     * Update various counts for the user of the given edit.
1091
     * @param Edit $edit
1092
     */
1093 4
    private function updateUserCounts(Edit $edit): void
1094
    {
1095 4
        $username = $edit->getUser()->getUsername();
1096
1097
        // Initialize various user stats if needed.
1098 4
        if (!isset($this->editors[$username])) {
1099 4
            $this->editors[$username] = [
1100 4
                'all' => 0,
1101 4
                'minor' => 0,
1102 4
                'minorPercentage' => 0,
1103 4
                'first' => $edit->getTimestamp(),
1104 4
                'firstId' => $edit->getId(),
1105
                'last' => null,
1106
                'atbe' => null,
1107 4
                'added' => 0,
1108
            ];
1109
        }
1110
1111
        // Increment user counts
1112 4
        $this->editors[$username]['all']++;
1113 4
        $this->editors[$username]['last'] = $edit->getTimestamp();
1114 4
        $this->editors[$username]['lastId'] = $edit->getId();
1115
1116
        // Increment minor counts for this user
1117 4
        if ($edit->isMinor()) {
1118 4
            $this->editors[$username]['minor']++;
1119
        }
1120 4
    }
1121
1122
    /**
1123
     * Increment "edits per <time>" counts based on the given edit.
1124
     * @param Edit $edit
1125
     */
1126 4
    private function updateCountHistory(Edit $edit): void
1127
    {
1128 4
        $editTimestamp = $edit->getTimestamp();
1129
1130 4
        if ($editTimestamp > new DateTime('-1 day')) {
1131
            $this->countHistory['day']++;
1132
        }
1133 4
        if ($editTimestamp > new DateTime('-1 week')) {
1134
            $this->countHistory['week']++;
1135
        }
1136 4
        if ($editTimestamp > new DateTime('-1 month')) {
1137
            $this->countHistory['month']++;
1138
        }
1139 4
        if ($editTimestamp > new DateTime('-1 year')) {
1140
            $this->countHistory['year']++;
1141
        }
1142 4
    }
1143
1144
    /**
1145
     * Get info about bots that edited the page.
1146
     * @return mixed[] Contains the bot's username, edit count to the page, and whether or not they are currently a bot.
1147
     */
1148 1
    public function getBots(): array
1149
    {
1150 1
        return $this->bots;
1151
    }
1152
1153
    /**
1154
     * Set info about bots that edited the page. This is done as a private setter because we need this information
1155
     * when computing the top 10 editors, where we don't want to include bots.
1156
     */
1157
    private function setBots(): void
1158
    {
1159
        // Parse the bot edits.
1160
        $bots = [];
1161
        $botData = $this->getRepository()->getBotData($this->page, $this->start, $this->end);
0 ignored issues
show
Bug introduced by
The method getBotData() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1161
        $botData = $this->getRepository()->/** @scrutinizer ignore-call */ getBotData($this->page, $this->start, $this->end);
Loading history...
1162
        while ($bot = $botData->fetch()) {
1163
            $bots[$bot['username']] = [
1164
                'count' => (int)$bot['count'],
1165
                'current' => 'bot' === $bot['current'],
1166
            ];
1167
        }
1168
1169
        // Sort by edit count.
1170
        uasort($bots, function ($a, $b) {
1171
            return $b['count'] - $a['count'];
1172
        });
1173
1174
        $this->bots = $bots;
1175
    }
1176
1177
    /**
1178
     * Number of edits made to the page by current or former bots.
1179
     * @param string[] $bots Used only in unit tests, where we supply mock data for the bots that will get processed.
1180
     * @return int
1181
     */
1182 2
    public function getBotRevisionCount(?array $bots = null): int
1183
    {
1184 2
        if (isset($this->botRevisionCount)) {
1185
            return $this->botRevisionCount;
1186
        }
1187
1188 2
        if (null === $bots) {
1189 1
            $bots = $this->getBots();
1190
        }
1191
1192 2
        $count = 0;
1193
1194 2
        foreach (array_values($bots) as $data) {
1195 2
            $count += $data['count'];
1196
        }
1197
1198 2
        $this->botRevisionCount = $count;
1199 2
        return $count;
1200
    }
1201
1202
    /**
1203
     * Query for log events during each year of the article's history, and set the results in $this->yearMonthCounts.
1204
     */
1205 1
    private function setLogsEvents(): void
1206
    {
1207 1
        $logData = $this->getRepository()->getLogEvents(
0 ignored issues
show
Bug introduced by
The method getLogEvents() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1207
        $logData = $this->getRepository()->/** @scrutinizer ignore-call */ getLogEvents(
Loading history...
1208 1
            $this->page,
1209 1
            $this->start,
1210 1
            $this->end
1211
        );
1212
1213 1
        foreach ($logData as $event) {
1214 1
            $time = strtotime($event['timestamp']);
1215 1
            $year = date('Y', $time);
1216
1217 1
            if (!isset($this->yearMonthCounts[$year])) {
1218
                break;
1219
            }
1220
1221 1
            $yearEvents = $this->yearMonthCounts[$year]['events'];
1222
1223
            // Convert log type value to i18n key.
1224 1
            switch ($event['log_type']) {
1225 1
                case 'protect':
1226 1
                    $action = 'protections';
1227 1
                    break;
1228 1
                case 'delete':
1229 1
                    $action = 'deletions';
1230 1
                    break;
1231
                case 'move':
1232
                    $action = 'moves';
1233
                    break;
1234
                // count pending-changes protections along with normal protections.
1235
                case 'stable':
1236
                    $action = 'protections';
1237
                    break;
1238
            }
1239
1240 1
            if (empty($yearEvents[$action])) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $action does not seem to be defined for all execution paths leading up to this point.
Loading history...
1241 1
                $yearEvents[$action] = 1;
1242
            } else {
1243
                $yearEvents[$action]++;
1244
            }
1245
1246 1
            $this->yearMonthCounts[$year]['events'] = $yearEvents;
1247
        }
1248 1
    }
1249
1250
    /**
1251
     * Set statistics about the top 10 editors by added text and number of edits.
1252
     * This is ran *after* parseHistory() since we need the grand totals first.
1253
     * Various stats are also set for each editor in $this->editors to be used in the charts.
1254
     */
1255 4
    private function doPostPrecessing(): void
1256
    {
1257 4
        $topTenCount = $counter = 0;
1258 4
        $topTenEditorsByEdits = [];
1259
1260 4
        foreach ($this->editors as $editor => $info) {
1261
            // Count how many users are in the top 10% by number of edits, excluding bots.
1262 4
            if ($counter < 10 && !in_array($editor, array_keys($this->bots))) {
1263 4
                $topTenCount += $info['all'];
1264 4
                $counter++;
1265
1266
                // To be used in the Top Ten charts.
1267 4
                $topTenEditorsByEdits[] = [
1268 4
                    'label' => $editor,
1269 4
                    'value' => $info['all'],
1270
                ];
1271
            }
1272
1273
            // Compute the percentage of minor edits the user made.
1274 4
            $this->editors[$editor]['minorPercentage'] = $info['all']
1275 4
                ? ($info['minor'] / $info['all']) * 100
1276
                : 0;
1277
1278 4
            if ($info['all'] > 1) {
1279
                // Number of seconds/days between first and last edit.
1280 4
                $secs = $info['last']->getTimestamp() - $info['first']->getTimestamp();
1281 4
                $days = $secs / (60 * 60 * 24);
1282
1283
                // Average time between edits (in days).
1284 4
                $this->editors[$editor]['atbe'] = $days / $info['all'];
1285
            }
1286
        }
1287
1288
        // Loop through again and add percentages.
1289 4
        $this->topTenEditorsByEdits = array_map(function ($editor) use ($topTenCount) {
1290 4
            $editor['percentage'] = 100 * ($editor['value'] / $topTenCount);
1291 4
            return $editor;
1292 4
        }, $topTenEditorsByEdits);
1293
1294 4
        $this->topTenEditorsByAdded = $this->getTopTenByAdded();
1295
1296 4
        $this->topTenCount = $topTenCount;
1297 4
    }
1298
1299
    /**
1300
     * Get the top ten editors by added text.
1301
     * @return array With keys 'label', 'value' and 'percentage', ready to be used by the pieChart Twig helper.
1302
     */
1303 4
    private function getTopTenByAdded(): array
1304
    {
1305
        // First sort editors array by the amount of text they added.
1306 4
        $topTenEditorsByAdded = $this->editors;
1307 4
        uasort($topTenEditorsByAdded, function ($a, $b) {
1308 4
            if ($a['added'] === $b['added']) {
1309 4
                return 0;
1310
            }
1311 4
            return $a['added'] > $b['added'] ? -1 : 1;
1312 4
        });
1313
1314
        // Slice to the top 10.
1315 4
        $topTenEditorsByAdded = array_keys(array_slice($topTenEditorsByAdded, 0, 10, true));
1316
1317
        // // Get the sum of added text so that we can add in percentages.
1318
        // $topTenTotalAdded = array_sum(array_map(function ($editor) {
1319
        //     return $this->editors[$editor]['added'];
1320
        // }, $topTenEditorsByAdded));
1321
1322
        // Then build a new array of top 10 editors by added text in the data structure needed for the chart.
1323 4
        return array_map(function ($editor) {
1324 4
            $added = $this->editors[$editor]['added'];
1325
            return [
1326 4
                'label' => $editor,
1327 4
                'value' => $added,
1328 4
                'percentage' => 0 === $this->addedBytes
1329
                    ? 0
1330 4
                    : 100 * ($added / $this->addedBytes),
1331
            ];
1332 4
        }, $topTenEditorsByAdded);
1333
    }
1334
1335
    /**
1336
     * Get authorship attribution from the WikiWho API.
1337
     * @see https://f-squared.org/wikiwho/
1338
     * @param int $limit Max number of results.
1339
     * @return array
1340
     */
1341 1
    public function getTextshares(?int $limit = null): array
1342
    {
1343 1
        if (isset($this->textshares)) {
1344
            return $this->textshares;
1345
        }
1346
1347
        // TODO: check for failures. Should have a success:true
1348 1
        $ret = $this->getRepository()->getTextshares($this->page);
0 ignored issues
show
Bug introduced by
The method getTextshares() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1348
        $ret = $this->getRepository()->/** @scrutinizer ignore-call */ getTextshares($this->page);
Loading history...
1349
1350
        // If revision can't be found, return error message.
1351 1
        if (!isset($ret['revisions'][0])) {
1352
            return [
1353
                'error' => $ret['Error'] ?? 'Unknown',
1354
            ];
1355
        }
1356
1357 1
        $revId = array_keys($ret['revisions'][0])[0];
1358 1
        $tokens = $ret['revisions'][0][$revId]['tokens'];
1359
1360 1
        [$counts, $totalCount, $userIds] = $this->countTokens($tokens);
1361 1
        $usernameMap = $this->getUsernameMap($userIds);
1362
1363 1
        if (null !== $limit) {
1364 1
            $countsToProcess = array_slice($counts, 0, $limit, true);
1365
        } else {
1366
            $countsToProcess = $counts;
1367
        }
1368
1369 1
        $textshares = [];
1370
1371
        // Used to get the character count and percentage of the remaining N editors, after the top $limit.
1372 1
        $percentageSum = 0;
1373 1
        $countSum = 0;
1374 1
        $numEditors = 0;
1375
1376
        // Loop through once more, creating an array with the user names (or IP addresses)
1377
        // as the key, and the count and percentage as the value.
1378 1
        foreach ($countsToProcess as $editor => $count) {
1379 1
            if (isset($usernameMap[$editor])) {
1380 1
                $index = $usernameMap[$editor];
1381
            } else {
1382 1
                $index = $editor;
1383
            }
1384
1385 1
            $percentage = round(100 * ($count / $totalCount), 1);
1386
1387
            // If we are showing > 10 editors in the table, we still only want the top 10 for the chart.
1388 1
            if ($numEditors < 10) {
1389 1
                $percentageSum += $percentage;
1390 1
                $countSum += $count;
1391 1
                $numEditors++;
1392
            }
1393
1394 1
            $textshares[$index] = [
1395 1
                'count' => $count,
1396 1
                'percentage' => $percentage,
1397
            ];
1398
        }
1399
1400 1
        $this->textshares = [
1401 1
            'list' => $textshares,
1402 1
            'totalAuthors' => count($counts),
1403 1
            'totalCount' => $totalCount,
1404
        ];
1405
1406
        // Record character count and percentage for the remaining editors.
1407 1
        if ($percentageSum < 100) {
1408 1
            $this->textshares['others'] = [
1409 1
                'count' => $totalCount - $countSum,
1410 1
                'percentage' => round(100 - $percentageSum, 1),
1411 1
                'numEditors' => count($counts) - $numEditors,
1412
            ];
1413
        }
1414
1415 1
        return $this->textshares;
1416
    }
1417
1418
    /**
1419
     * Get a map of user IDs to usernames, given the IDs.
1420
     * @param int[] $userIds
1421
     * @return array IDs as keys, usernames as values.
1422
     */
1423 1
    private function getUsernameMap(array $userIds): array
1424
    {
1425 1
        if (empty($userIds)) {
1426
            return [];
1427
        }
1428
1429 1
        $userIdsNames = $this->getRepository()->getUsernamesFromIds(
0 ignored issues
show
Bug introduced by
The method getUsernamesFromIds() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1429
        $userIdsNames = $this->getRepository()->/** @scrutinizer ignore-call */ getUsernamesFromIds(
Loading history...
1430 1
            $this->page->getProject(),
1431 1
            $userIds
1432
        );
1433
1434 1
        $usernameMap = [];
1435 1
        foreach ($userIdsNames as $userIdName) {
1436 1
            $usernameMap[$userIdName['user_id']] = $userIdName['user_name'];
1437
        }
1438
1439 1
        return $usernameMap;
1440
    }
1441
1442
    /**
1443
     * Get counts of token lengths for each author. Used in self::getTextshares()
1444
     * @param array $tokens
1445
     * @return array [counts by user, total count, IDs of accounts]
1446
     */
1447 1
    private function countTokens(array $tokens): array
1448
    {
1449 1
        $counts = [];
1450 1
        $userIds = [];
1451 1
        $totalCount = 0;
1452
1453
        // Loop through the tokens, keeping totals (token length) for each author.
1454 1
        foreach ($tokens as $token) {
1455 1
            $editor = $token['editor'];
1456
1457
            // IPs are prefixed with '0|', otherwise it's the user ID.
1458 1
            if ('0|' === substr($editor, 0, 2)) {
1459 1
                $editor = substr($editor, 2);
1460
            } else {
1461 1
                $userIds[] = $editor;
1462
            }
1463
1464 1
            if (!isset($counts[$editor])) {
1465 1
                $counts[$editor] = 0;
1466
            }
1467
1468 1
            $counts[$editor] += strlen($token['str']);
1469 1
            $totalCount += strlen($token['str']);
1470
        }
1471
1472
        // Sort authors by count.
1473 1
        arsort($counts);
1474
1475 1
        return [$counts, $totalCount, $userIds];
1476
    }
1477
1478
    /**
1479
     * Get a list of wikis supported by WikiWho.
1480
     * @return string[]
1481
     * @codeCoverageIgnore
1482
     */
1483
    public function getTextshareWikis(): array
1484
    {
1485
        return self::TEXTSHARE_WIKIS;
1486
    }
1487
1488
    /**
1489
     * Get prose and reference information.
1490
     * @return array With keys 'characters', 'words', 'references', 'unique_references'
1491
     */
1492 1
    public function getProseStats(): array
1493
    {
1494 1
        $datetime = false !== $this->end ? new DateTime('@'.$this->end) : null;
0 ignored issues
show
Bug introduced by
Are you sure $this->end of type integer|string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1494
        $datetime = false !== $this->end ? new DateTime('@'./** @scrutinizer ignore-type */ $this->end) : null;
Loading history...
1495 1
        $html = $this->page->getHTMLContent($datetime);
1496
1497 1
        $crawler = new Crawler($html);
1498
1499 1
        [$chars, $words] = $this->countCharsAndWords($crawler, '#mw-content-text p');
1500
1501 1
        $refs = $crawler->filter('#mw-content-text .reference');
1502 1
        $refContent = [];
1503 1
        $refs->each(function ($ref) use (&$refContent): void {
1504 1
            $refContent[] = $ref->text();
1505 1
        });
1506 1
        $uniqueRefs = count(array_unique($refContent));
1507
1508 1
        $sections = count($crawler->filter('#mw-content-text .mw-headline'));
1509
1510
        return [
1511 1
            'characters' => $chars,
1512 1
            'words' => $words,
1513 1
            'references' => $refs->count(),
1514 1
            'unique_references' => $uniqueRefs,
1515 1
            'sections' => $sections,
1516
        ];
1517
    }
1518
1519
    /**
1520
     * Count the number of characters and words of the plain text within the DOM element matched by the given selector.
1521
     * @param Crawler $crawler
1522
     * @param string $selector HTML selector.
1523
     * @return array [num chars, num words]
1524
     */
1525 1
    private function countCharsAndWords(Crawler $crawler, string $selector): array
1526
    {
1527 1
        $totalChars = 0;
1528 1
        $totalWords = 0;
1529 1
        $paragraphs = $crawler->filter($selector);
1530 1
        $paragraphs->each(function ($node) use (&$totalChars, &$totalWords): void {
1531 1
            $text = preg_replace('/\[\d+\]/', '', trim($node->text()));
1532 1
            $totalChars += strlen($text);
1533 1
            $totalWords += count(explode(' ', $text));
1534 1
        });
1535
1536 1
        return [$totalChars, $totalWords];
1537
    }
1538
1539
    /**
1540
     * Fetch transclusion data (categories, templates and files) that are on the page.
1541
     * @return array With keys 'categories', 'templates' and 'files'.
1542
     */
1543 1
    private function getTransclusionData(): array
1544
    {
1545 1
        if (!is_array($this->transclusionData)) {
0 ignored issues
show
introduced by
The condition is_array($this->transclusionData) is always true.
Loading history...
1546 1
            $this->transclusionData = $this->getRepository()
1547 1
                ->getTransclusionData($this->page);
1548
        }
1549 1
        return $this->transclusionData;
1550
    }
1551
1552
    /**
1553
     * Get the number of categories that are on the page.
1554
     * @return int
1555
     */
1556 1
    public function getNumCategories(): int
1557
    {
1558 1
        return $this->getTransclusionData()['categories'];
1559
    }
1560
1561
    /**
1562
     * Get the number of templates that are on the page.
1563
     * @return int
1564
     */
1565 1
    public function getNumTemplates(): int
1566
    {
1567 1
        return $this->getTransclusionData()['templates'];
1568
    }
1569
1570
    /**
1571
     * Get the number of files that are on the page.
1572
     * @return int
1573
     */
1574 1
    public function getNumFiles(): int
1575
    {
1576 1
        return $this->getTransclusionData()['files'];
1577
    }
1578
}
1579