Passed
Push — master ( 77644d...1d6156 )
by MusikAnimal
06:06
created

ArticleInfo::topTenEditorsByEdits()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 0
dl 0
loc 3
ccs 2
cts 2
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
 * This file contains only the ArticleInfo class.
4
 */
5
6
declare(strict_types = 1);
7
8
namespace AppBundle\Model;
9
10
use AppBundle\Helper\I18nHelper;
11
use DateTime;
12
use Doctrine\DBAL\Statement;
13
use Symfony\Component\DependencyInjection\ContainerInterface;
14
use Symfony\Component\DomCrawler\Crawler;
15
16
/**
17
 * An ArticleInfo provides statistics about a page on a project.
18
 */
19
class ArticleInfo extends Model
20
{
21
    /** @const string[] Domain names of wikis supported by WikiWho. */
22
    public const TEXTSHARE_WIKIS = [
23
        'en.wikipedia.org',
24
        'de.wikipedia.org',
25
        'eu.wikipedia.org',
26
        'tr.wikipedia.org',
27
        'es.wikipedia.org',
28
    ];
29
30
    /** @var ContainerInterface The application's DI container. */
31
    protected $container;
32
33
    /** @var I18nHelper For i18n and l10n. */
34
    protected $i18n;
35
36
    /** @var int Number of revisions that belong to the page. */
37
    protected $numRevisions;
38
39
    /** @var int Maximum number of revisions to process, as configured. */
40
    protected $maxRevisions;
41
42
    /** @var int Number of revisions that were actually processed. */
43
    protected $numRevisionsProcessed;
44
45
    /**
46
     * Various statistics about editors to the page. These are not User objects
47
     * so as to preserve memory.
48
     * @var mixed[]
49
     */
50
    protected $editors = [];
51
52
    /** @var mixed[] The top 10 editors to the page by number of edits. */
53
    protected $topTenEditorsByEdits;
54
55
    /** @var mixed[] The top 10 editors to the page by added text. */
56
    protected $topTenEditorsByAdded;
57
58
    /** @var int Number of edits made by the top 10 editors. */
59
    protected $topTenCount;
60
61
    /** @var mixed[] Various statistics about bots that edited the page. */
62
    protected $bots;
63
64
    /** @var int Number of edits made to the page by bots. */
65
    protected $botRevisionCount;
66
67
    /** @var mixed[] Various counts about each individual year and month of the page's history. */
68
    protected $yearMonthCounts;
69
70
    /** @var string[] Localized labels for the years, to be used in the 'Year counts' chart. */
71
    protected $yearLabels = [];
72
73
    /** @var string[] Localized labels for the months, to be used in the 'Month counts' chart. */
74
    protected $monthLabels = [];
75
76
    /** @var Edit The first edit to the page. */
77
    protected $firstEdit;
78
79
    /** @var Edit The last edit to the page. */
80
    protected $lastEdit;
81
82
    /** @var Edit Edit that made the largest addition by number of bytes. */
83
    protected $maxAddition;
84
85
    /** @var Edit Edit that made the largest deletion by number of bytes. */
86
    protected $maxDeletion;
87
88
    /** @var int[] Number of in and outgoing links and redirects to the page. */
89
    protected $linksAndRedirects;
90
91
    /** @var string[] Assessments of the page (see Page::getAssessments). */
92
    protected $assessments;
93
94
    /**
95
     * Maximum number of edits that were created across all months. This is used as a comparison
96
     * for the bar charts in the months section.
97
     * @var int
98
     */
99
    protected $maxEditsPerMonth;
100
101
    /** @var string[] List of (semi-)automated tools that were used to edit the page. */
102
    protected $tools;
103
104
    /**
105
     * Total number of bytes added throughout the page's history. This is used as a comparison
106
     * when computing the top 10 editors by added text.
107
     * @var int
108
     */
109
    protected $addedBytes = 0;
110
111
    /** @var int Number of days between first and last edit. */
112
    protected $totalDays;
113
114
    /** @var int Number of minor edits to the page. */
115
    protected $minorCount = 0;
116
117
    /** @var int Number of anonymous edits to the page. */
118
    protected $anonCount = 0;
119
120
    /** @var int Number of automated edits to the page. */
121
    protected $automatedCount = 0;
122
123
    /** @var int Number of edits to the page that were reverted with the subsequent edit. */
124
    protected $revertCount = 0;
125
126
    /** @var int[] The "edits per <time>" counts. */
127
    protected $countHistory = [
128
        'day' => 0,
129
        'week' => 0,
130
        'month' => 0,
131
        'year' => 0,
132
    ];
133
134
    /** @var string[] List of wikidata and Checkwiki errors. */
135
    protected $bugs;
136
137
    /** @var array List of editors and the percentage of the current content that they authored. */
138
    protected $textshares;
139
140
    /** @var array Number of categories, templates and files on the page. */
141
    protected $transclusionData;
142
143
    /**
144
     * ArticleInfo constructor.
145
     * @param Page $page The page to process.
146
     * @param ContainerInterface $container The DI container.
147
     * @param false|int $start From what date to obtain records.
148
     * @param false|int $end To what date to obtain records.
149
     */
150 13
    public function __construct(Page $page, ContainerInterface $container, $start = false, $end = false)
151
    {
152 13
        $this->page = $page;
153 13
        $this->container = $container;
154 13
        $this->start = $start;
155 13
        $this->end = $end;
156 13
    }
157
158
    /**
159
     * Make the I18nHelper accessible to ArticleInfo.
160
     * @param I18nHelper $i18n
161
     * @codeCoverageIgnore
162
     */
163
    public function setI18nHelper(I18nHelper $i18n): void
164
    {
165
        $this->i18n = $i18n;
166
    }
167
168
    /**
169
     * Get date opening date range, formatted as this is used in the views.
170
     * @return string Blank if no value exists.
171
     */
172 1
    public function getStartDate(): string
173
    {
174 1
        return '' == $this->start ? '' : date('Y-m-d', $this->start);
0 ignored issues
show
Bug introduced by
It seems like $this->start can also be of type boolean and string; however, parameter $timestamp of date() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

174
        return '' == $this->start ? '' : date('Y-m-d', /** @scrutinizer ignore-type */ $this->start);
Loading history...
175
    }
176
177
    /**
178
     * Get date closing date range, formatted as this is used in the views.
179
     * @return string Blank if no value exists.
180
     */
181 1
    public function getEndDate(): string
182
    {
183 1
        return '' == $this->end ? '' : date('Y-m-d', $this->end);
0 ignored issues
show
Bug introduced by
It seems like $this->end can also be of type boolean and string; however, parameter $timestamp of date() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

183
        return '' == $this->end ? '' : date('Y-m-d', /** @scrutinizer ignore-type */ $this->end);
Loading history...
184
    }
185
186
    /**
187
     * Get the day of last date we should show in the month/year sections,
188
     * based on $this->end or the current date.
189
     * @return int As Unix timestamp.
190
     */
191 4
    private function getLastDay(): int
192
    {
193 4
        if (false !== $this->end) {
194
            return (new DateTime('@'.$this->end))
0 ignored issues
show
Bug introduced by
Are you sure $this->end of type integer|string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

194
            return (new DateTime('@'./** @scrutinizer ignore-type */ $this->end))
Loading history...
195
                ->modify('last day of this month')
196
                ->getTimestamp();
197
        } else {
198 4
            return strtotime('last day of this month');
199
        }
200
    }
201
202
    /**
203
     * Return the start/end date values as associative array, with YYYY-MM-DD as the date format.
204
     * This is used mainly as a helper to pass to the pageviews Twig macros.
205
     * @return array
206
     */
207 1
    public function getDateParams(): array
208
    {
209 1
        if (!$this->hasDateRange()) {
210
            return [];
211
        }
212
213
        $ret = [
214 1
            'start' => $this->firstEdit->getTimestamp()->format('Y-m-d'),
215 1
            'end' => $this->lastEdit->getTimestamp()->format('Y-m-d'),
216
        ];
217
218 1
        if (false !== $this->start) {
219 1
            $ret['start'] = date('Y-m-d', $this->start);
0 ignored issues
show
Bug introduced by
It seems like $this->start can also be of type string and true; however, parameter $timestamp of date() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

219
            $ret['start'] = date('Y-m-d', /** @scrutinizer ignore-type */ $this->start);
Loading history...
220
        }
221 1
        if (false !== $this->end) {
222 1
            $ret['end'] = date('Y-m-d', $this->end);
223
        }
224
225 1
        return $ret;
226
    }
227
228
    /**
229
     * Get the number of revisions belonging to the page.
230
     * @return int
231
     */
232 4
    public function getNumRevisions(): int
233
    {
234 4
        if (!isset($this->numRevisions)) {
235 4
            $this->numRevisions = $this->page->getNumRevisions(null, $this->start, $this->end);
0 ignored issues
show
Bug introduced by
It seems like $this->end can also be of type string; however, parameter $end of AppBundle\Model\Page::getNumRevisions() does only seem to accept false|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

235
            $this->numRevisions = $this->page->getNumRevisions(null, $this->start, /** @scrutinizer ignore-type */ $this->end);
Loading history...
Bug introduced by
It seems like $this->start can also be of type string; however, parameter $start of AppBundle\Model\Page::getNumRevisions() does only seem to accept false|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

235
            $this->numRevisions = $this->page->getNumRevisions(null, /** @scrutinizer ignore-type */ $this->start, $this->end);
Loading history...
236
        }
237 4
        return $this->numRevisions;
238
    }
239
240
    /**
241
     * Get the maximum number of revisions that we should process.
242
     * @return int
243
     */
244 3
    public function getMaxRevisions(): int
245
    {
246 3
        if (!isset($this->maxRevisions)) {
247 3
            $this->maxRevisions = (int) $this->container->getParameter('app.max_page_revisions');
248
        }
249 3
        return $this->maxRevisions;
250
    }
251
252
    /**
253
     * Get the number of revisions that are actually getting processed. This goes by the app.max_page_revisions
254
     * parameter, or the actual number of revisions, whichever is smaller.
255
     * @return int
256
     */
257 3
    public function getNumRevisionsProcessed(): int
258
    {
259 3
        if (isset($this->numRevisionsProcessed)) {
260 1
            return $this->numRevisionsProcessed;
261
        }
262
263 2
        if ($this->tooManyRevisions()) {
264 1
            $this->numRevisionsProcessed = $this->getMaxRevisions();
265
        } else {
266 1
            $this->numRevisionsProcessed = $this->getNumRevisions();
267
        }
268
269 2
        return $this->numRevisionsProcessed;
270
    }
271
272
    /**
273
     * Are there more revisions than we should process, based on the config?
274
     * @return bool
275
     */
276 3
    public function tooManyRevisions(): bool
277
    {
278 3
        return $this->getMaxRevisions() > 0 && $this->getNumRevisions() > $this->getMaxRevisions();
279
    }
280
281
    /**
282
     * Fetch and store all the data we need to show the ArticleInfo view.
283
     * @codeCoverageIgnore
284
     */
285
    public function prepareData(): void
286
    {
287
        $this->parseHistory();
288
        $this->setLogsEvents();
289
290
        // Bots need to be set before setting top 10 counts.
291
        $this->setBots();
292
293
        $this->doPostPrecessing();
294
    }
295
296
    /**
297
     * Get the number of editors that edited the page.
298
     * @return int
299
     */
300 1
    public function getNumEditors(): int
301
    {
302 1
        return count($this->editors);
303
    }
304
305
    /**
306
     * Get the number of bots that edited the page.
307
     * @return int
308
     */
309
    public function getNumBots(): int
310
    {
311
        return count($this->getBots());
312
    }
313
314
    /**
315
     * Get the number of days between the first and last edit.
316
     * @return int
317
     */
318 1
    public function getTotalDays(): int
319
    {
320 1
        if (isset($this->totalDays)) {
321 1
            return $this->totalDays;
322
        }
323 1
        $dateFirst = $this->firstEdit->getTimestamp();
324 1
        $dateLast = $this->lastEdit->getTimestamp();
325 1
        $interval = date_diff($dateLast, $dateFirst, true);
326 1
        $this->totalDays = (int)$interval->format('%a');
327 1
        return $this->totalDays;
328
    }
329
330
    /**
331
     * Returns length of the page.
332
     * @return int
333
     */
334 1
    public function getLength(): int
335
    {
336 1
        if ($this->hasDateRange()) {
337 1
            return $this->lastEdit->getLength();
338
        }
339
340
        return $this->page->getLength();
341
    }
342
343
    /**
344
     * Get the average number of days between edits to the page.
345
     * @return float
346
     */
347 1
    public function averageDaysPerEdit(): float
348
    {
349 1
        return round($this->getTotalDays() / $this->getNumRevisionsProcessed(), 1);
350
    }
351
352
    /**
353
     * Get the average number of edits per day to the page.
354
     * @return float
355
     */
356 1
    public function editsPerDay(): float
357
    {
358 1
        $editsPerDay = $this->getTotalDays()
359 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / (365 / 12 / 24))
360 1
            : 0;
361 1
        return round($editsPerDay, 1);
362
    }
363
364
    /**
365
     * Get the average number of edits per month to the page.
366
     * @return float
367
     */
368 1
    public function editsPerMonth(): float
369
    {
370 1
        $editsPerMonth = $this->getTotalDays()
371 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / (365 / 12))
372 1
            : 0;
373 1
        return min($this->getNumRevisionsProcessed(), round($editsPerMonth, 1));
374
    }
375
376
    /**
377
     * Get the average number of edits per year to the page.
378
     * @return float
379
     */
380 1
    public function editsPerYear(): float
381
    {
382 1
        $editsPerYear = $this->getTotalDays()
383 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / 365)
384 1
            : 0;
385 1
        return min($this->getNumRevisionsProcessed(), round($editsPerYear, 1));
386
    }
387
388
    /**
389
     * Get the average number of edits per editor.
390
     * @return float
391
     */
392 1
    public function editsPerEditor(): float
393
    {
394 1
        return round($this->getNumRevisionsProcessed() / count($this->editors), 1);
395
    }
396
397
    /**
398
     * Get the percentage of minor edits to the page.
399
     * @return float
400
     */
401 1
    public function minorPercentage(): float
402
    {
403 1
        return round(
404 1
            ($this->minorCount / $this->getNumRevisionsProcessed()) * 100,
405 1
            1
406
        );
407
    }
408
409
    /**
410
     * Get the percentage of anonymous edits to the page.
411
     * @return float
412
     */
413 1
    public function anonPercentage(): float
414
    {
415 1
        return round(
416 1
            ($this->anonCount / $this->getNumRevisionsProcessed()) * 100,
417 1
            1
418
        );
419
    }
420
421
    /**
422
     * Get the percentage of edits made by the top 10 editors.
423
     * @return float
424
     */
425 1
    public function topTenPercentage(): float
426
    {
427 1
        return round(($this->topTenCount / $this->getNumRevisionsProcessed()) * 100, 1);
428
    }
429
430
    /**
431
     * Get the number of times the page has been viewed in the given timeframe. If the ArticleInfo instance has a
432
     * date range, it is used instead of the value of the $latest parameter.
433
     * @param  int $latest Last N days.
434
     * @return int
435
     */
436
    public function getPageviews(int $latest): int
437
    {
438
        if (!$this->hasDateRange()) {
439
            return $this->page->getLastPageviews($latest);
440
        }
441
442
        $daterange = $this->getDateParams();
443
        return $this->page->getPageviews($daterange['start'], $daterange['end']);
444
    }
445
446
    /**
447
     * Get the page assessments of the page.
448
     * @see https://www.mediawiki.org/wiki/Extension:PageAssessments
449
     * @return string[]|false False if unsupported.
450
     * @codeCoverageIgnore
451
     */
452
    public function getAssessments()
453
    {
454
        if (!is_array($this->assessments)) {
0 ignored issues
show
introduced by
The condition is_array($this->assessments) is always true.
Loading history...
455
            $this->assessments = $this->page
456
                ->getProject()
457
                ->getPageAssessments()
458
                ->getAssessments($this->page);
459
        }
460
        return $this->assessments;
461
    }
462
463
    /**
464
     * Get the number of automated edits made to the page.
465
     * @return int
466
     */
467 1
    public function getAutomatedCount(): int
468
    {
469 1
        return $this->automatedCount;
470
    }
471
472
    /**
473
     * Get the number of edits to the page that were reverted with the subsequent edit.
474
     * @return int
475
     */
476 1
    public function getRevertCount(): int
477
    {
478 1
        return $this->revertCount;
479
    }
480
481
    /**
482
     * Get the number of edits to the page made by logged out users.
483
     * @return int
484
     */
485 1
    public function getAnonCount(): int
486
    {
487 1
        return $this->anonCount;
488
    }
489
490
    /**
491
     * Get the number of minor edits to the page.
492
     * @return int
493
     */
494 1
    public function getMinorCount(): int
495
    {
496 1
        return $this->minorCount;
497
    }
498
499
    /**
500
     * Get the number of edits to the page made in the past day, week, month and year.
501
     * @return int[] With keys 'day', 'week', 'month' and 'year'.
502
     */
503
    public function getCountHistory(): array
504
    {
505
        return $this->countHistory;
506
    }
507
508
    /**
509
     * Get the number of edits to the page made by the top 10 editors.
510
     * @return int
511
     */
512 1
    public function getTopTenCount(): int
513
    {
514 1
        return $this->topTenCount;
515
    }
516
517
    /**
518
     * Get the top editors to the page by edit count.
519
     * @param int $limit Default 20, maximum 1,000.
520
     * @param bool $noBots Set to non-false to exclude bots from the result.
521
     * @return array
522
     */
523
    public function getTopEditorsByEditCount(int $limit = 20, bool $noBots = false): array
524
    {
525
        // Quick cache, valid only for the same request.
526
        static $topEditors = null;
527
        if (null !== $topEditors) {
528
            return $topEditors;
529
        }
530
531
        $rows = $this->getRepository()->getTopEditorsByEditCount(
0 ignored issues
show
Bug introduced by
The method getTopEditorsByEditCount() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

531
        $rows = $this->getRepository()->/** @scrutinizer ignore-call */ getTopEditorsByEditCount(
Loading history...
532
            $this->page,
533
            $this->start,
534
            $this->end,
535
            min($limit, 1000),
536
            $noBots
537
        );
538
539
        $topEditors = [];
540
        $rank = 0;
541
        foreach ($rows as $row) {
542
            $topEditors[] = [
543
                'rank' => ++$rank,
544
                'username' => $row['username'],
545
                'count' => $row['count'],
546
                'minor' => $row['minor'],
547
                'first_edit' => [
548
                    'id' => $row['first_revid'],
549
                    'timestamp' => $row['first_timestamp'],
550
                ],
551
                'latest_edit' => [
552
                    'id' => $row['latest_revid'],
553
                    'timestamp' => $row['latest_timestamp'],
554
                ],
555
            ];
556
        }
557
558
        return $topEditors;
559
    }
560
561
    /**
562
     * Get the first edit to the page.
563
     * @return Edit
564
     */
565 1
    public function getFirstEdit(): Edit
566
    {
567 1
        return $this->firstEdit;
568
    }
569
570
    /**
571
     * Get the last edit to the page.
572
     * @return Edit
573
     */
574 1
    public function getLastEdit(): Edit
575
    {
576 1
        return $this->lastEdit;
577
    }
578
579
    /**
580
     * Get the edit that made the largest addition to the page (by number of bytes).
581
     * @return Edit|null
582
     */
583 1
    public function getMaxAddition(): ?Edit
584
    {
585 1
        return $this->maxAddition;
586
    }
587
588
    /**
589
     * Get the edit that made the largest removal to the page (by number of bytes).
590
     * @return Edit|null
591
     */
592 1
    public function getMaxDeletion(): ?Edit
593
    {
594 1
        return $this->maxDeletion;
595
    }
596
597
    /**
598
     * Get the list of editors to the page, including various statistics.
599
     * @return mixed[]
600
     */
601 1
    public function getEditors(): array
602
    {
603 1
        return $this->editors;
604
    }
605
606
    /**
607
     * Get the list of the top editors to the page (by edits), including various statistics.
608
     * @return mixed[]
609
     */
610 1
    public function topTenEditorsByEdits(): array
611
    {
612 1
        return $this->topTenEditorsByEdits;
613
    }
614
615
    /**
616
     * Get the list of the top editors to the page (by added text), including various statistics.
617
     * @return mixed[]
618
     */
619 1
    public function topTenEditorsByAdded(): array
620
    {
621 1
        return $this->topTenEditorsByAdded;
622
    }
623
624
    /**
625
     * Get various counts about each individual year and month of the page's history.
626
     * @return mixed[]
627
     */
628 2
    public function getYearMonthCounts(): array
629
    {
630 2
        return $this->yearMonthCounts;
631
    }
632
633
    /**
634
     * Get the localized labels for the 'Year counts' chart.
635
     * @return string[]
636
     */
637
    public function getYearLabels(): array
638
    {
639
        return $this->yearLabels;
640
    }
641
642
    /**
643
     * Get the localized labels for the 'Month counts' chart.
644
     * @return string[]
645
     */
646
    public function getMonthLabels(): array
647
    {
648
        return $this->monthLabels;
649
    }
650
651
    /**
652
     * Get the maximum number of edits that were created across all months. This is used as a
653
     * comparison for the bar charts in the months section.
654
     * @return int
655
     */
656 1
    public function getMaxEditsPerMonth(): int
657
    {
658 1
        return $this->maxEditsPerMonth;
659
    }
660
661
    /**
662
     * Get a list of (semi-)automated tools that were used to edit the page, including
663
     * the number of times they were used, and a link to the tool's homepage.
664
     * @return string[]
665
     */
666 1
    public function getTools(): array
667
    {
668 1
        return $this->tools;
669
    }
670
671
    /**
672
     * Get the list of page's wikidata and Checkwiki errors.
673
     * @see Page::getErrors()
674
     * @return string[]
675
     */
676
    public function getBugs(): array
677
    {
678
        if (!is_array($this->bugs)) {
0 ignored issues
show
introduced by
The condition is_array($this->bugs) is always true.
Loading history...
679
            $this->bugs = $this->page->getErrors();
680
        }
681
        return $this->bugs;
682
    }
683
684
    /**
685
     * Get the number of wikidata nad CheckWiki errors.
686
     * @return int
687
     */
688
    public function numBugs(): int
689
    {
690
        return count($this->getBugs());
691
    }
692
693
    /**
694
     * Get the number of external links on the page.
695
     * @return int
696
     */
697 1
    public function linksExtCount(): int
698
    {
699 1
        return $this->getLinksAndRedirects()['links_ext_count'];
700
    }
701
702
    /**
703
     * Get the number of incoming links to the page.
704
     * @return int
705
     */
706 1
    public function linksInCount(): int
707
    {
708 1
        return $this->getLinksAndRedirects()['links_in_count'];
709
    }
710
711
    /**
712
     * Get the number of outgoing links from the page.
713
     * @return int
714
     */
715 1
    public function linksOutCount(): int
716
    {
717 1
        return $this->getLinksAndRedirects()['links_out_count'];
718
    }
719
720
    /**
721
     * Get the number of redirects to the page.
722
     * @return int
723
     */
724 1
    public function redirectsCount(): int
725
    {
726 1
        return $this->getLinksAndRedirects()['redirects_count'];
727
    }
728
729
    /**
730
     * Get the number of external, incoming and outgoing links, along with the number of redirects to the page.
731
     * @return int[]
732
     * @codeCoverageIgnore
733
     */
734
    private function getLinksAndRedirects(): array
735
    {
736
        if (!is_array($this->linksAndRedirects)) {
0 ignored issues
show
introduced by
The condition is_array($this->linksAndRedirects) is always true.
Loading history...
737
            $this->linksAndRedirects = $this->page->countLinksAndRedirects();
738
        }
739
        return $this->linksAndRedirects;
740
    }
741
742
    /**
743
     * Parse the revision history, collecting our core statistics.
744
     *
745
     * Untestable because it relies on getting a PDO statement. All the important
746
     * logic lives in other methods which are tested.
747
     * @codeCoverageIgnore
748
     */
749
    private function parseHistory(): void
750
    {
751
        if ($this->tooManyRevisions()) {
752
            $limit = $this->getMaxRevisions();
753
        } else {
754
            $limit = null;
755
        }
756
757
        // Third parameter is ignored if $limit is null.
758
        $revStmt = $this->page->getRevisionsStmt(
759
            null,
760
            $limit,
761
            $this->getNumRevisions(),
762
            $this->start,
0 ignored issues
show
Bug introduced by
It seems like $this->start can also be of type string; however, parameter $start of AppBundle\Model\Page::getRevisionsStmt() does only seem to accept false|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

762
            /** @scrutinizer ignore-type */ $this->start,
Loading history...
763
            $this->end
0 ignored issues
show
Bug introduced by
It seems like $this->end can also be of type string; however, parameter $end of AppBundle\Model\Page::getRevisionsStmt() does only seem to accept false|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

763
            /** @scrutinizer ignore-type */ $this->end
Loading history...
764
        );
765
        $revCount = 0;
766
767
        /**
768
         * Data about previous edits so that we can use them as a basis for comparison.
769
         * @var Edit[]
770
         */
771
        $prevEdits = [
772
            // The previous Edit, used to discount content that was reverted.
773
            'prev' => null,
774
775
            // The SHA-1 of the edit *before* the previous edit. Used for more
776
            // accurate revert detection.
777
            'prevSha' => null,
778
779
            // The last edit deemed to be the max addition of content. This is kept track of
780
            // in case we find out the next edit was reverted (and was also a max edit),
781
            // in which case we'll want to discount it and use this one instead.
782
            'maxAddition' => null,
783
784
            // Same as with maxAddition, except the maximum amount of content deleted.
785
            // This is used to discount content that was reverted.
786
            'maxDeletion' => null,
787
        ];
788
789
        while ($rev = $revStmt->fetch()) {
790
            $edit = new Edit($this->page, $rev);
791
792
            if (0 === $revCount) {
793
                $this->firstEdit = $edit;
794
            }
795
796
            // Sometimes, with old revisions (2001 era), the revisions from 2002 come before 2001
797
            if ($edit->getTimestamp() < $this->firstEdit->getTimestamp()) {
798
                $this->firstEdit = $edit;
799
            }
800
801
            $prevEdits = $this->updateCounts($edit, $prevEdits);
802
803
            $revCount++;
804
        }
805
806
        $this->numRevisionsProcessed = $revCount;
807
808
        // Various sorts
809
        arsort($this->editors);
810
        ksort($this->yearMonthCounts);
811
        if ($this->tools) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->tools of type string[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
812
            arsort($this->tools);
813
        }
814
    }
815
816
    /**
817
     * Update various counts based on the current edit.
818
     * @param Edit $edit
819
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'
820
     * @return Edit[] Updated version of $prevEdits.
821
     */
822 4
    private function updateCounts(Edit $edit, array $prevEdits): array
823
    {
824
        // Update the counts for the year and month of the current edit.
825 4
        $this->updateYearMonthCounts($edit);
826
827
        // Update counts for the user who made the edit.
828 4
        $this->updateUserCounts($edit);
829
830
        // Update the year/month/user counts of anon and minor edits.
831 4
        $this->updateAnonMinorCounts($edit);
832
833
        // Update counts for automated tool usage, if applicable.
834 4
        $this->updateToolCounts($edit);
835
836
        // Increment "edits per <time>" counts
837 4
        $this->updateCountHistory($edit);
838
839
        // Update figures regarding content addition/removal, and the revert count.
840 4
        $prevEdits = $this->updateContentSizes($edit, $prevEdits);
841
842
        // Now that we've updated all the counts, we can reset
843
        // the prev and last edits, which are used for tracking.
844
        // But first, let's copy over the SHA of the actual previous edit
845
        // and put it in our $prevEdits['prev'], so that we'll know
846
        // that content added after $prevEdit['prev'] was reverted.
847 4
        if (null !== $prevEdits['prev']) {
848 4
            $prevEdits['prevSha'] = $prevEdits['prev']->getSha();
849
        }
850 4
        $prevEdits['prev'] = $edit;
851 4
        $this->lastEdit = $edit;
852
853 4
        return $prevEdits;
854
    }
855
856
    /**
857
     * Update various figures about content sizes based on the given edit.
858
     * @param Edit $edit
859
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
860
     * @return Edit[] Updated version of $prevEdits.
861
     */
862 4
    private function updateContentSizes(Edit $edit, array $prevEdits): array
863
    {
864
        // Check if it was a revert
865 4
        if ($this->isRevert($edit, $prevEdits)) {
866 4
            return $this->updateContentSizesRevert($prevEdits);
867
        } else {
868 4
            return $this->updateContentSizesNonRevert($edit, $prevEdits);
869
        }
870
    }
871
872
    /**
873
     * Is the given Edit a revert?
874
     * @param Edit $edit
875
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
876
     * @return bool
877
     */
878 4
    private function isRevert(Edit $edit, array $prevEdits): bool
879
    {
880 4
        return $edit->getSha() === $prevEdits['prevSha'] || $edit->isRevert($this->container);
881
    }
882
883
    /**
884
     * Updates the figures on content sizes assuming the given edit was a revert of the previous one.
885
     * In such a case, we don't want to treat the previous edit as legit content addition or removal.
886
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
887
     * @return Edit[] Updated version of $prevEdits, for tracking.
888
     */
889 4
    private function updateContentSizesRevert(array $prevEdits): array
890
    {
891 4
        $this->revertCount++;
892
893
        // Adjust addedBytes given this edit was a revert of the previous one.
894 4
        if ($prevEdits['prev'] && $prevEdits['prev']->getSize() > 0) {
895
            $this->addedBytes -= $prevEdits['prev']->getSize();
896
897
            // Also deduct from the user's individual added byte count.
898
            if ($prevEdits['prev']->getUser()) {
899
                $username = $prevEdits['prev']->getUser()->getUsername();
900
                $this->editors[$username]['added'] -= $prevEdits['prev']->getSize();
901
            }
902
        }
903
904
        // @TODO: Test this against an edit war (use your sandbox).
905
        // Also remove as max added or deleted, if applicable.
906 4
        if ($this->maxAddition && $prevEdits['prev']->getId() === $this->maxAddition->getId()) {
907
            $this->maxAddition = $prevEdits['maxAddition'];
908
            $prevEdits['maxAddition'] = $prevEdits['prev']; // In the event of edit wars.
909 4
        } elseif ($this->maxDeletion && $prevEdits['prev']->getId() === $this->maxDeletion->getId()) {
910 4
            $this->maxDeletion = $prevEdits['maxDeletion'];
911 4
            $prevEdits['maxDeletion'] = $prevEdits['prev']; // In the event of edit wars.
912
        }
913
914 4
        return $prevEdits;
915
    }
916
917
    /**
918
     * Updates the figures on content sizes assuming the given edit was NOT a revert of the previous edit.
919
     * @param Edit $edit
920
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
921
     * @return Edit[] Updated version of $prevEdits, for tracking.
922
     */
923 4
    private function updateContentSizesNonRevert(Edit $edit, array $prevEdits): array
924
    {
925 4
        $editSize = $this->getEditSize($edit, $prevEdits);
926
927
        // Edit was not a revert, so treat size > 0 as content added.
928 4
        if ($editSize > 0) {
929 4
            $this->addedBytes += $editSize;
930
931 4
            if ($edit->getUser()) {
932 4
                $this->editors[$edit->getUser()->getUsername()]['added'] += $editSize;
933
            }
934
935
            // Keep track of edit with max addition.
936 4
            if (!$this->maxAddition || $editSize > $this->maxAddition->getSize()) {
937
                // Keep track of old maxAddition in case we find out the next $edit was reverted
938
                // (and was also a max edit), in which case we'll want to use this one ($edit).
939 4
                $prevEdits['maxAddition'] = $this->maxAddition;
940
941 4
                $this->maxAddition = $edit;
942
            }
943 4
        } elseif ($editSize < 0 && (!$this->maxDeletion || $editSize < $this->maxDeletion->getSize())) {
944
            // Keep track of old maxDeletion in case we find out the next edit was reverted
945
            // (and was also a max deletion), in which case we'll want to use this one.
946 4
            $prevEdits['maxDeletion'] = $this->maxDeletion;
947
948 4
            $this->maxDeletion = $edit;
949
        }
950
951 4
        return $prevEdits;
952
    }
953
954
    /**
955
     * Get the size of the given edit, based on the previous edit (if present).
956
     * We also don't return the actual edit size if last revision had a length of null.
957
     * This happens when the edit follows other edits that were revision-deleted.
958
     * @see T148857 for more information.
959
     * @todo Remove once T101631 is resolved.
960
     * @param Edit $edit
961
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
962
     * @return int
963
     */
964 4
    private function getEditSize(Edit $edit, array $prevEdits): int
965
    {
966 4
        if ($prevEdits['prev'] && null === $prevEdits['prev']->getLength()) {
0 ignored issues
show
introduced by
The condition null === $prevEdits['prev']->getLength() is always false.
Loading history...
967
            return 0;
968
        } else {
969 4
            return $edit->getSize();
970
        }
971
    }
972
973
    /**
974
     * Update counts of automated tool usage for the given edit.
975
     * @param Edit $edit
976
     */
977 4
    private function updateToolCounts(Edit $edit): void
978
    {
979 4
        $automatedTool = $edit->getTool($this->container);
980
981 4
        if (false === $automatedTool) {
982
            // Nothing to do.
983 4
            return;
984
        }
985
986 4
        $editYear = $edit->getYear();
987 4
        $editMonth = $edit->getMonth();
988
989 4
        $this->automatedCount++;
990 4
        $this->yearMonthCounts[$editYear]['automated']++;
991 4
        $this->yearMonthCounts[$editYear]['months'][$editMonth]['automated']++;
992
993 4
        if (!isset($this->tools[$automatedTool['name']])) {
994 4
            $this->tools[$automatedTool['name']] = [
995 4
                'count' => 1,
996 4
                'link' => $automatedTool['link'],
997
            ];
998
        } else {
999
            $this->tools[$automatedTool['name']]['count']++;
1000
        }
1001 4
    }
1002
1003
    /**
1004
     * Update various counts for the year and month of the given edit.
1005
     * @param Edit $edit
1006
     */
1007 4
    private function updateYearMonthCounts(Edit $edit): void
1008
    {
1009 4
        $editYear = $edit->getYear();
1010 4
        $editMonth = $edit->getMonth();
1011
1012
        // Fill in the blank arrays for the year and 12 months if needed.
1013 4
        if (!isset($this->yearMonthCounts[$editYear])) {
1014 4
            $this->addYearMonthCountEntry($edit);
1015
        }
1016
1017
        // Increment year and month counts for all edits
1018 4
        $this->yearMonthCounts[$editYear]['all']++;
1019 4
        $this->yearMonthCounts[$editYear]['months'][$editMonth]['all']++;
1020
        // This will ultimately be the size of the page by the end of the year
1021 4
        $this->yearMonthCounts[$editYear]['size'] = (int) $edit->getLength();
1022
1023
        // Keep track of which month had the most edits
1024 4
        $editsThisMonth = $this->yearMonthCounts[$editYear]['months'][$editMonth]['all'];
1025 4
        if ($editsThisMonth > $this->maxEditsPerMonth) {
1026 4
            $this->maxEditsPerMonth = $editsThisMonth;
1027
        }
1028 4
    }
1029
1030
    /**
1031
     * Add a new entry to $this->yearMonthCounts for the given year,
1032
     * with blank values for each month. This called during self::parseHistory().
1033
     * @param Edit $edit
1034
     */
1035 4
    private function addYearMonthCountEntry(Edit $edit): void
1036
    {
1037 4
        $this->yearLabels[] = $this->i18n->dateFormat($edit->getTimestamp(), 'yyyy');
1038 4
        $editYear = $edit->getYear();
1039
1040
        // Beginning of the month at 00:00:00.
1041 4
        $firstEditTime = mktime(0, 0, 0, (int)$this->firstEdit->getMonth(), 1, (int)$this->firstEdit->getYear());
1042
1043 4
        $this->yearMonthCounts[$editYear] = [
1044
            'all' => 0,
1045
            'minor' => 0,
1046
            'anon' => 0,
1047
            'automated' => 0,
1048
            'size' => 0, // Keep track of the size by the end of the year.
1049
            'events' => [],
1050
            'months' => [],
1051
        ];
1052
1053 4
        for ($i = 1; $i <= 12; $i++) {
1054 4
            $timeObj = mktime(0, 0, 0, $i, 1, (int)$editYear);
1055
1056
            // Don't show zeros for months before the first edit or after the current month.
1057 4
            if ($timeObj < $firstEditTime || $timeObj > $this->getLastDay()) {
1058 4
                continue;
1059
            }
1060
1061 4
            $this->monthLabels[] = $this->i18n->dateFormat($timeObj, 'yyyy-MM');
1062 4
            $this->yearMonthCounts[$editYear]['months'][sprintf('%02d', $i)] = [
1063
                'all' => 0,
1064
                'minor' => 0,
1065
                'anon' => 0,
1066
                'automated' => 0,
1067
            ];
1068
        }
1069 4
    }
1070
1071
    /**
1072
     * Update the counts of anon and minor edits for year, month, and user of the given edit.
1073
     * @param Edit $edit
1074
     */
1075 4
    private function updateAnonMinorCounts(Edit $edit): void
1076
    {
1077 4
        $editYear = $edit->getYear();
1078 4
        $editMonth = $edit->getMonth();
1079
1080
        // If anonymous, increase counts
1081 4
        if ($edit->isAnon()) {
1082 4
            $this->anonCount++;
1083 4
            $this->yearMonthCounts[$editYear]['anon']++;
1084 4
            $this->yearMonthCounts[$editYear]['months'][$editMonth]['anon']++;
1085
        }
1086
1087
        // If minor edit, increase counts
1088 4
        if ($edit->isMinor()) {
1089 4
            $this->minorCount++;
1090 4
            $this->yearMonthCounts[$editYear]['minor']++;
1091 4
            $this->yearMonthCounts[$editYear]['months'][$editMonth]['minor']++;
1092
        }
1093 4
    }
1094
1095
    /**
1096
     * Update various counts for the user of the given edit.
1097
     * @param Edit $edit
1098
     */
1099 4
    private function updateUserCounts(Edit $edit): void
1100
    {
1101 4
        if (!$edit->getUser()) {
1102
            return;
1103
        }
1104
1105 4
        $username = $edit->getUser()->getUsername();
1106
1107
        // Initialize various user stats if needed.
1108 4
        if (!isset($this->editors[$username])) {
1109 4
            $this->editors[$username] = [
1110 4
                'all' => 0,
1111 4
                'minor' => 0,
1112 4
                'minorPercentage' => 0,
1113 4
                'first' => $edit->getTimestamp(),
1114 4
                'firstId' => $edit->getId(),
1115
                'last' => null,
1116
                'atbe' => null,
1117 4
                'added' => 0,
1118
            ];
1119
        }
1120
1121
        // Increment user counts
1122 4
        $this->editors[$username]['all']++;
1123 4
        $this->editors[$username]['last'] = $edit->getTimestamp();
1124 4
        $this->editors[$username]['lastId'] = $edit->getId();
1125
1126
        // Increment minor counts for this user
1127 4
        if ($edit->isMinor()) {
1128 4
            $this->editors[$username]['minor']++;
1129
        }
1130 4
    }
1131
1132
    /**
1133
     * Increment "edits per <time>" counts based on the given edit.
1134
     * @param Edit $edit
1135
     */
1136 4
    private function updateCountHistory(Edit $edit): void
1137
    {
1138 4
        $editTimestamp = $edit->getTimestamp();
1139
1140 4
        if ($editTimestamp > new DateTime('-1 day')) {
1141
            $this->countHistory['day']++;
1142
        }
1143 4
        if ($editTimestamp > new DateTime('-1 week')) {
1144
            $this->countHistory['week']++;
1145
        }
1146 4
        if ($editTimestamp > new DateTime('-1 month')) {
1147
            $this->countHistory['month']++;
1148
        }
1149 4
        if ($editTimestamp > new DateTime('-1 year')) {
1150
            $this->countHistory['year']++;
1151
        }
1152 4
    }
1153
1154
    /**
1155
     * Get info about bots that edited the page.
1156
     * @return mixed[] Contains the bot's username, edit count to the page, and whether or not they are currently a bot.
1157
     */
1158 1
    public function getBots(): array
1159
    {
1160 1
        return $this->bots;
1161
    }
1162
1163
    /**
1164
     * Set info about bots that edited the page. This is done as a private setter because we need this information
1165
     * when computing the top 10 editors, where we don't want to include bots.
1166
     */
1167
    private function setBots(): void
1168
    {
1169
        // Parse the bot edits.
1170
        $bots = [];
1171
1172
        /** @var Statement $botData */
1173
        $botData = $this->getRepository()->getBotData($this->page, $this->start, $this->end);
0 ignored issues
show
Bug introduced by
The method getBotData() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1173
        $botData = $this->getRepository()->/** @scrutinizer ignore-call */ getBotData($this->page, $this->start, $this->end);
Loading history...
1174
        while ($bot = $botData->fetch()) {
1175
            $bots[$bot['username']] = [
1176
                'count' => (int)$bot['count'],
1177
                'current' => '1' === $bot['current'],
1178
            ];
1179
        }
1180
1181
        // Sort by edit count.
1182
        uasort($bots, function ($a, $b) {
1183
            return $b['count'] - $a['count'];
1184
        });
1185
1186
        $this->bots = $bots;
1187
    }
1188
1189
    /**
1190
     * Number of edits made to the page by current or former bots.
1191
     * @param string[] $bots Used only in unit tests, where we supply mock data for the bots that will get processed.
1192
     * @return int
1193
     */
1194 2
    public function getBotRevisionCount(?array $bots = null): int
1195
    {
1196 2
        if (isset($this->botRevisionCount)) {
1197
            return $this->botRevisionCount;
1198
        }
1199
1200 2
        if (null === $bots) {
1201 1
            $bots = $this->getBots();
1202
        }
1203
1204 2
        $count = 0;
1205
1206 2
        foreach (array_values($bots) as $data) {
1207 2
            $count += $data['count'];
1208
        }
1209
1210 2
        $this->botRevisionCount = $count;
1211 2
        return $count;
1212
    }
1213
1214
    /**
1215
     * Query for log events during each year of the article's history, and set the results in $this->yearMonthCounts.
1216
     */
1217 1
    private function setLogsEvents(): void
1218
    {
1219 1
        $logData = $this->getRepository()->getLogEvents(
0 ignored issues
show
Bug introduced by
The method getLogEvents() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1219
        $logData = $this->getRepository()->/** @scrutinizer ignore-call */ getLogEvents(
Loading history...
1220 1
            $this->page,
1221 1
            $this->start,
1222 1
            $this->end
1223
        );
1224
1225 1
        foreach ($logData as $event) {
1226 1
            $time = strtotime($event['timestamp']);
1227 1
            $year = date('Y', $time);
1228
1229 1
            if (!isset($this->yearMonthCounts[$year])) {
1230
                break;
1231
            }
1232
1233 1
            $yearEvents = $this->yearMonthCounts[$year]['events'];
1234
1235
            // Convert log type value to i18n key.
1236 1
            switch ($event['log_type']) {
1237 1
                case 'protect':
1238 1
                    $action = 'protections';
1239 1
                    break;
1240 1
                case 'delete':
1241 1
                    $action = 'deletions';
1242 1
                    break;
1243
                case 'move':
1244
                    $action = 'moves';
1245
                    break;
1246
                // count pending-changes protections along with normal protections.
1247
                case 'stable':
1248
                    $action = 'protections';
1249
                    break;
1250
            }
1251
1252 1
            if (empty($yearEvents[$action])) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $action does not seem to be defined for all execution paths leading up to this point.
Loading history...
1253 1
                $yearEvents[$action] = 1;
1254
            } else {
1255
                $yearEvents[$action]++;
1256
            }
1257
1258 1
            $this->yearMonthCounts[$year]['events'] = $yearEvents;
1259
        }
1260 1
    }
1261
1262
    /**
1263
     * Set statistics about the top 10 editors by added text and number of edits.
1264
     * This is ran *after* parseHistory() since we need the grand totals first.
1265
     * Various stats are also set for each editor in $this->editors to be used in the charts.
1266
     */
1267 4
    private function doPostPrecessing(): void
1268
    {
1269 4
        $topTenCount = $counter = 0;
1270 4
        $topTenEditorsByEdits = [];
1271
1272 4
        foreach ($this->editors as $editor => $info) {
1273
            // Count how many users are in the top 10% by number of edits, excluding bots.
1274 4
            if ($counter < 10 && !in_array($editor, array_keys($this->bots))) {
1275 4
                $topTenCount += $info['all'];
1276 4
                $counter++;
1277
1278
                // To be used in the Top Ten charts.
1279 4
                $topTenEditorsByEdits[] = [
1280 4
                    'label' => $editor,
1281 4
                    'value' => $info['all'],
1282
                ];
1283
            }
1284
1285
            // Compute the percentage of minor edits the user made.
1286 4
            $this->editors[$editor]['minorPercentage'] = $info['all']
1287 4
                ? ($info['minor'] / $info['all']) * 100
1288
                : 0;
1289
1290 4
            if ($info['all'] > 1) {
1291
                // Number of seconds/days between first and last edit.
1292 4
                $secs = $info['last']->getTimestamp() - $info['first']->getTimestamp();
1293 4
                $days = $secs / (60 * 60 * 24);
1294
1295
                // Average time between edits (in days).
1296 4
                $this->editors[$editor]['atbe'] = $days / $info['all'];
1297
            }
1298
        }
1299
1300
        // Loop through again and add percentages.
1301 4
        $this->topTenEditorsByEdits = array_map(function ($editor) use ($topTenCount) {
1302 4
            $editor['percentage'] = 100 * ($editor['value'] / $topTenCount);
1303 4
            return $editor;
1304 4
        }, $topTenEditorsByEdits);
1305
1306 4
        $this->topTenEditorsByAdded = $this->getTopTenByAdded();
1307
1308 4
        $this->topTenCount = $topTenCount;
1309 4
    }
1310
1311
    /**
1312
     * Get the top ten editors by added text.
1313
     * @return array With keys 'label', 'value' and 'percentage', ready to be used by the pieChart Twig helper.
1314
     */
1315 4
    private function getTopTenByAdded(): array
1316
    {
1317
        // First sort editors array by the amount of text they added.
1318 4
        $topTenEditorsByAdded = $this->editors;
1319 4
        uasort($topTenEditorsByAdded, function ($a, $b) {
1320 4
            if ($a['added'] === $b['added']) {
1321 4
                return 0;
1322
            }
1323 4
            return $a['added'] > $b['added'] ? -1 : 1;
1324 4
        });
1325
1326
        // Slice to the top 10.
1327 4
        $topTenEditorsByAdded = array_keys(array_slice($topTenEditorsByAdded, 0, 10, true));
1328
1329
        // // Get the sum of added text so that we can add in percentages.
1330
        // $topTenTotalAdded = array_sum(array_map(function ($editor) {
1331
        //     return $this->editors[$editor]['added'];
1332
        // }, $topTenEditorsByAdded));
1333
1334
        // Then build a new array of top 10 editors by added text in the data structure needed for the chart.
1335 4
        return array_map(function ($editor) {
1336 4
            $added = $this->editors[$editor]['added'];
1337
            return [
1338 4
                'label' => $editor,
1339 4
                'value' => $added,
1340 4
                'percentage' => 0 === $this->addedBytes
1341
                    ? 0
1342 4
                    : 100 * ($added / $this->addedBytes),
1343
            ];
1344 4
        }, $topTenEditorsByAdded);
1345
    }
1346
1347
    /**
1348
     * Get authorship attribution from the WikiWho API.
1349
     * @see https://f-squared.org/wikiwho/
1350
     * @param int $limit Max number of results.
1351
     * @return array
1352
     */
1353 1
    public function getTextshares(?int $limit = null): array
1354
    {
1355 1
        if (isset($this->textshares)) {
1356
            return $this->textshares;
1357
        }
1358
1359
        // TODO: check for failures. Should have a success:true
1360 1
        $ret = $this->getRepository()->getTextshares($this->page);
0 ignored issues
show
Bug introduced by
The method getTextshares() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1360
        $ret = $this->getRepository()->/** @scrutinizer ignore-call */ getTextshares($this->page);
Loading history...
1361
1362
        // If revision can't be found, return error message.
1363 1
        if (!isset($ret['revisions'][0])) {
1364
            return [
1365
                'error' => $ret['Error'] ?? 'Unknown',
1366
            ];
1367
        }
1368
1369 1
        $revId = array_keys($ret['revisions'][0])[0];
1370 1
        $tokens = $ret['revisions'][0][$revId]['tokens'];
1371
1372 1
        [$counts, $totalCount, $userIds] = $this->countTokens($tokens);
1373 1
        $usernameMap = $this->getUsernameMap($userIds);
1374
1375 1
        if (null !== $limit) {
1376 1
            $countsToProcess = array_slice($counts, 0, $limit, true);
1377
        } else {
1378
            $countsToProcess = $counts;
1379
        }
1380
1381 1
        $textshares = [];
1382
1383
        // Used to get the character count and percentage of the remaining N editors, after the top $limit.
1384 1
        $percentageSum = 0;
1385 1
        $countSum = 0;
1386 1
        $numEditors = 0;
1387
1388
        // Loop through once more, creating an array with the user names (or IP addresses)
1389
        // as the key, and the count and percentage as the value.
1390 1
        foreach ($countsToProcess as $editor => $count) {
1391 1
            if (isset($usernameMap[$editor])) {
1392 1
                $index = $usernameMap[$editor];
1393
            } else {
1394 1
                $index = $editor;
1395
            }
1396
1397 1
            $percentage = round(100 * ($count / $totalCount), 1);
1398
1399
            // If we are showing > 10 editors in the table, we still only want the top 10 for the chart.
1400 1
            if ($numEditors < 10) {
1401 1
                $percentageSum += $percentage;
1402 1
                $countSum += $count;
1403 1
                $numEditors++;
1404
            }
1405
1406 1
            $textshares[$index] = [
1407 1
                'count' => $count,
1408 1
                'percentage' => $percentage,
1409
            ];
1410
        }
1411
1412 1
        $this->textshares = [
1413 1
            'list' => $textshares,
1414 1
            'totalAuthors' => count($counts),
1415 1
            'totalCount' => $totalCount,
1416
        ];
1417
1418
        // Record character count and percentage for the remaining editors.
1419 1
        if ($percentageSum < 100) {
1420 1
            $this->textshares['others'] = [
1421 1
                'count' => $totalCount - $countSum,
1422 1
                'percentage' => round(100 - $percentageSum, 1),
1423 1
                'numEditors' => count($counts) - $numEditors,
1424
            ];
1425
        }
1426
1427 1
        return $this->textshares;
1428
    }
1429
1430
    /**
1431
     * Get a map of user IDs to usernames, given the IDs.
1432
     * @param int[] $userIds
1433
     * @return array IDs as keys, usernames as values.
1434
     */
1435 1
    private function getUsernameMap(array $userIds): array
1436
    {
1437 1
        if (empty($userIds)) {
1438
            return [];
1439
        }
1440
1441 1
        $userIdsNames = $this->getRepository()->getUsernamesFromIds(
0 ignored issues
show
Bug introduced by
The method getUsernamesFromIds() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1441
        $userIdsNames = $this->getRepository()->/** @scrutinizer ignore-call */ getUsernamesFromIds(
Loading history...
1442 1
            $this->page->getProject(),
1443 1
            $userIds
1444
        );
1445
1446 1
        $usernameMap = [];
1447 1
        foreach ($userIdsNames as $userIdName) {
1448 1
            $usernameMap[$userIdName['user_id']] = $userIdName['user_name'];
1449
        }
1450
1451 1
        return $usernameMap;
1452
    }
1453
1454
    /**
1455
     * Get counts of token lengths for each author. Used in self::getTextshares()
1456
     * @param array $tokens
1457
     * @return array [counts by user, total count, IDs of accounts]
1458
     */
1459 1
    private function countTokens(array $tokens): array
1460
    {
1461 1
        $counts = [];
1462 1
        $userIds = [];
1463 1
        $totalCount = 0;
1464
1465
        // Loop through the tokens, keeping totals (token length) for each author.
1466 1
        foreach ($tokens as $token) {
1467 1
            $editor = $token['editor'];
1468
1469
            // IPs are prefixed with '0|', otherwise it's the user ID.
1470 1
            if ('0|' === substr($editor, 0, 2)) {
1471 1
                $editor = substr($editor, 2);
1472
            } else {
1473 1
                $userIds[] = $editor;
1474
            }
1475
1476 1
            if (!isset($counts[$editor])) {
1477 1
                $counts[$editor] = 0;
1478
            }
1479
1480 1
            $counts[$editor] += strlen($token['str']);
1481 1
            $totalCount += strlen($token['str']);
1482
        }
1483
1484
        // Sort authors by count.
1485 1
        arsort($counts);
1486
1487 1
        return [$counts, $totalCount, $userIds];
1488
    }
1489
1490
    /**
1491
     * Get a list of wikis supported by WikiWho.
1492
     * @return string[]
1493
     * @codeCoverageIgnore
1494
     */
1495
    public function getTextshareWikis(): array
1496
    {
1497
        return self::TEXTSHARE_WIKIS;
1498
    }
1499
1500
    /**
1501
     * Get prose and reference information.
1502
     * @return array With keys 'characters', 'words', 'references', 'unique_references'
1503
     */
1504 1
    public function getProseStats(): array
1505
    {
1506 1
        $datetime = false !== $this->end ? new DateTime('@'.$this->end) : null;
0 ignored issues
show
Bug introduced by
Are you sure $this->end of type integer|string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1506
        $datetime = false !== $this->end ? new DateTime('@'./** @scrutinizer ignore-type */ $this->end) : null;
Loading history...
1507 1
        $html = $this->page->getHTMLContent($datetime);
1508
1509 1
        $crawler = new Crawler($html);
1510
1511 1
        [$chars, $words] = $this->countCharsAndWords($crawler, '#mw-content-text p');
1512
1513 1
        $refs = $crawler->filter('#mw-content-text .reference');
1514 1
        $refContent = [];
1515 1
        $refs->each(function ($ref) use (&$refContent): void {
1516 1
            $refContent[] = $ref->text();
1517 1
        });
1518 1
        $uniqueRefs = count(array_unique($refContent));
1519
1520 1
        $sections = count($crawler->filter('#mw-content-text .mw-headline'));
1521
1522
        return [
1523 1
            'characters' => $chars,
1524 1
            'words' => $words,
1525 1
            'references' => $refs->count(),
1526 1
            'unique_references' => $uniqueRefs,
1527 1
            'sections' => $sections,
1528
        ];
1529
    }
1530
1531
    /**
1532
     * Count the number of characters and words of the plain text within the DOM element matched by the given selector.
1533
     * @param Crawler $crawler
1534
     * @param string $selector HTML selector.
1535
     * @return array [num chars, num words]
1536
     */
1537 1
    private function countCharsAndWords(Crawler $crawler, string $selector): array
1538
    {
1539 1
        $totalChars = 0;
1540 1
        $totalWords = 0;
1541 1
        $paragraphs = $crawler->filter($selector);
1542 1
        $paragraphs->each(function ($node) use (&$totalChars, &$totalWords): void {
1543 1
            $text = preg_replace('/\[\d+\]/', '', trim($node->text()));
1544 1
            $totalChars += strlen($text);
1545 1
            $totalWords += count(explode(' ', $text));
1546 1
        });
1547
1548 1
        return [$totalChars, $totalWords];
1549
    }
1550
1551
    /**
1552
     * Fetch transclusion data (categories, templates and files) that are on the page.
1553
     * @return array With keys 'categories', 'templates' and 'files'.
1554
     */
1555 1
    private function getTransclusionData(): array
1556
    {
1557 1
        if (!is_array($this->transclusionData)) {
0 ignored issues
show
introduced by
The condition is_array($this->transclusionData) is always true.
Loading history...
1558 1
            $this->transclusionData = $this->getRepository()
1559 1
                ->getTransclusionData($this->page);
1560
        }
1561 1
        return $this->transclusionData;
1562
    }
1563
1564
    /**
1565
     * Get the number of categories that are on the page.
1566
     * @return int
1567
     */
1568 1
    public function getNumCategories(): int
1569
    {
1570 1
        return $this->getTransclusionData()['categories'];
1571
    }
1572
1573
    /**
1574
     * Get the number of templates that are on the page.
1575
     * @return int
1576
     */
1577 1
    public function getNumTemplates(): int
1578
    {
1579 1
        return $this->getTransclusionData()['templates'];
1580
    }
1581
1582
    /**
1583
     * Get the number of files that are on the page.
1584
     * @return int
1585
     */
1586 1
    public function getNumFiles(): int
1587
    {
1588 1
        return $this->getTransclusionData()['files'];
1589
    }
1590
}
1591