Passed
Push — master ( 1d6156...a73ad7 )
by MusikAnimal
05:41
created

ArticleInfo::getNumFiles()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 0
dl 0
loc 3
ccs 2
cts 2
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
 * This file contains only the ArticleInfo class.
4
 */
5
6
declare(strict_types = 1);
7
8
namespace AppBundle\Model;
9
10
use AppBundle\Helper\I18nHelper;
11
use DateTime;
12
use Doctrine\DBAL\Statement;
13
use Symfony\Component\DependencyInjection\ContainerInterface;
14
use Symfony\Component\DomCrawler\Crawler;
15
16
/**
17
 * An ArticleInfo provides statistics about a page on a project.
18
 */
19
class ArticleInfo extends Model
20
{
21
    /** @const string[] Domain names of wikis supported by WikiWho. */
22
    public const TEXTSHARE_WIKIS = [
23
        'en.wikipedia.org',
24
        'de.wikipedia.org',
25
        'eu.wikipedia.org',
26
        'tr.wikipedia.org',
27
        'es.wikipedia.org',
28
    ];
29
30
    /** @var ContainerInterface The application's DI container. */
31
    protected $container;
32
33
    /** @var I18nHelper For i18n and l10n. */
34
    protected $i18n;
35
36
    /** @var int Number of revisions that belong to the page. */
37
    protected $numRevisions;
38
39
    /** @var int Maximum number of revisions to process, as configured. */
40
    protected $maxRevisions;
41
42
    /** @var int Number of revisions that were actually processed. */
43
    protected $numRevisionsProcessed;
44
45
    /**
46
     * Various statistics about editors to the page. These are not User objects
47
     * so as to preserve memory.
48
     * @var mixed[]
49
     */
50
    protected $editors = [];
51
52
    /** @var mixed[] The top 10 editors to the page by number of edits. */
53
    protected $topTenEditorsByEdits;
54
55
    /** @var mixed[] The top 10 editors to the page by added text. */
56
    protected $topTenEditorsByAdded;
57
58
    /** @var int Number of edits made by the top 10 editors. */
59
    protected $topTenCount;
60
61
    /** @var mixed[] Various statistics about bots that edited the page. */
62
    protected $bots;
63
64
    /** @var int Number of edits made to the page by bots. */
65
    protected $botRevisionCount;
66
67
    /** @var mixed[] Various counts about each individual year and month of the page's history. */
68
    protected $yearMonthCounts;
69
70
    /** @var string[] Localized labels for the years, to be used in the 'Year counts' chart. */
71
    protected $yearLabels = [];
72
73
    /** @var string[] Localized labels for the months, to be used in the 'Month counts' chart. */
74
    protected $monthLabels = [];
75
76
    /** @var Edit The first edit to the page. */
77
    protected $firstEdit;
78
79
    /** @var Edit The last edit to the page. */
80
    protected $lastEdit;
81
82
    /** @var Edit Edit that made the largest addition by number of bytes. */
83
    protected $maxAddition;
84
85
    /** @var Edit Edit that made the largest deletion by number of bytes. */
86
    protected $maxDeletion;
87
88
    /** @var int[] Number of in and outgoing links and redirects to the page. */
89
    protected $linksAndRedirects;
90
91
    /** @var string[] Assessments of the page (see Page::getAssessments). */
92
    protected $assessments;
93
94
    /**
95
     * Maximum number of edits that were created across all months. This is used as a comparison
96
     * for the bar charts in the months section.
97
     * @var int
98
     */
99
    protected $maxEditsPerMonth;
100
101
    /** @var string[] List of (semi-)automated tools that were used to edit the page. */
102
    protected $tools;
103
104
    /**
105
     * Total number of bytes added throughout the page's history. This is used as a comparison
106
     * when computing the top 10 editors by added text.
107
     * @var int
108
     */
109
    protected $addedBytes = 0;
110
111
    /** @var int Number of days between first and last edit. */
112
    protected $totalDays;
113
114
    /** @var int Number of minor edits to the page. */
115
    protected $minorCount = 0;
116
117
    /** @var int Number of anonymous edits to the page. */
118
    protected $anonCount = 0;
119
120
    /** @var int Number of automated edits to the page. */
121
    protected $automatedCount = 0;
122
123
    /** @var int Number of edits to the page that were reverted with the subsequent edit. */
124
    protected $revertCount = 0;
125
126
    /** @var int[] The "edits per <time>" counts. */
127
    protected $countHistory = [
128
        'day' => 0,
129
        'week' => 0,
130
        'month' => 0,
131
        'year' => 0,
132
    ];
133
134
    /** @var string[] List of wikidata and Checkwiki errors. */
135
    protected $bugs;
136
137
    /** @var array List of editors and the percentage of the current content that they authored. */
138
    protected $textshares;
139
140
    /** @var array Number of categories, templates and files on the page. */
141
    protected $transclusionData;
142
143
    /**
144
     * ArticleInfo constructor.
145
     * @param Page $page The page to process.
146
     * @param ContainerInterface $container The DI container.
147
     * @param false|int $start From what date to obtain records.
148
     * @param false|int $end To what date to obtain records.
149
     */
150 13
    public function __construct(Page $page, ContainerInterface $container, $start = false, $end = false)
151
    {
152 13
        $this->page = $page;
153 13
        $this->container = $container;
154 13
        $this->start = $start;
155 13
        $this->end = $end;
156 13
    }
157
158
    /**
159
     * Make the I18nHelper accessible to ArticleInfo.
160
     * @param I18nHelper $i18n
161
     * @codeCoverageIgnore
162
     */
163
    public function setI18nHelper(I18nHelper $i18n): void
164
    {
165
        $this->i18n = $i18n;
166
    }
167
168
    /**
169
     * Get date opening date range, formatted as this is used in the views.
170
     * @return string Blank if no value exists.
171
     */
172 1
    public function getStartDate(): string
173
    {
174 1
        return '' == $this->start ? '' : date('Y-m-d', $this->start);
0 ignored issues
show
Bug introduced by
It seems like $this->start can also be of type boolean and string; however, parameter $timestamp of date() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

174
        return '' == $this->start ? '' : date('Y-m-d', /** @scrutinizer ignore-type */ $this->start);
Loading history...
175
    }
176
177
    /**
178
     * Get date closing date range, formatted as this is used in the views.
179
     * @return string Blank if no value exists.
180
     */
181 1
    public function getEndDate(): string
182
    {
183 1
        return '' == $this->end ? '' : date('Y-m-d', $this->end);
0 ignored issues
show
Bug introduced by
It seems like $this->end can also be of type boolean and string; however, parameter $timestamp of date() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

183
        return '' == $this->end ? '' : date('Y-m-d', /** @scrutinizer ignore-type */ $this->end);
Loading history...
184
    }
185
186
    /**
187
     * Get the day of last date we should show in the month/year sections,
188
     * based on $this->end or the current date.
189
     * @return int As Unix timestamp.
190
     */
191 4
    private function getLastDay(): int
192
    {
193 4
        if (false !== $this->end) {
194
            return (new DateTime('@'.$this->end))
0 ignored issues
show
Bug introduced by
Are you sure $this->end of type integer|string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

194
            return (new DateTime('@'./** @scrutinizer ignore-type */ $this->end))
Loading history...
195
                ->modify('last day of this month')
196
                ->getTimestamp();
197
        } else {
198 4
            return strtotime('last day of this month');
199
        }
200
    }
201
202
    /**
203
     * Return the start/end date values as associative array, with YYYY-MM-DD as the date format.
204
     * This is used mainly as a helper to pass to the pageviews Twig macros.
205
     * @return array
206
     */
207 1
    public function getDateParams(): array
208
    {
209 1
        if (!$this->hasDateRange()) {
210
            return [];
211
        }
212
213
        $ret = [
214 1
            'start' => $this->firstEdit->getTimestamp()->format('Y-m-d'),
215 1
            'end' => $this->lastEdit->getTimestamp()->format('Y-m-d'),
216
        ];
217
218 1
        if (false !== $this->start) {
219 1
            $ret['start'] = date('Y-m-d', $this->start);
0 ignored issues
show
Bug introduced by
It seems like $this->start can also be of type string and true; however, parameter $timestamp of date() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

219
            $ret['start'] = date('Y-m-d', /** @scrutinizer ignore-type */ $this->start);
Loading history...
220
        }
221 1
        if (false !== $this->end) {
222 1
            $ret['end'] = date('Y-m-d', $this->end);
223
        }
224
225 1
        return $ret;
226
    }
227
228
    /**
229
     * Get the number of revisions belonging to the page.
230
     * @return int
231
     */
232 4
    public function getNumRevisions(): int
233
    {
234 4
        if (!isset($this->numRevisions)) {
235 4
            $this->numRevisions = $this->page->getNumRevisions(null, $this->start, $this->end);
0 ignored issues
show
Bug introduced by
It seems like $this->end can also be of type string; however, parameter $end of AppBundle\Model\Page::getNumRevisions() does only seem to accept false|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

235
            $this->numRevisions = $this->page->getNumRevisions(null, $this->start, /** @scrutinizer ignore-type */ $this->end);
Loading history...
Bug introduced by
It seems like $this->start can also be of type string; however, parameter $start of AppBundle\Model\Page::getNumRevisions() does only seem to accept false|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

235
            $this->numRevisions = $this->page->getNumRevisions(null, /** @scrutinizer ignore-type */ $this->start, $this->end);
Loading history...
236
        }
237 4
        return $this->numRevisions;
238
    }
239
240
    /**
241
     * Get the maximum number of revisions that we should process.
242
     * @return int
243
     */
244 3
    public function getMaxRevisions(): int
245
    {
246 3
        if (!isset($this->maxRevisions)) {
247 3
            $this->maxRevisions = (int) $this->container->getParameter('app.max_page_revisions');
248
        }
249 3
        return $this->maxRevisions;
250
    }
251
252
    /**
253
     * Get the number of revisions that are actually getting processed. This goes by the app.max_page_revisions
254
     * parameter, or the actual number of revisions, whichever is smaller.
255
     * @return int
256
     */
257 3
    public function getNumRevisionsProcessed(): int
258
    {
259 3
        if (isset($this->numRevisionsProcessed)) {
260 1
            return $this->numRevisionsProcessed;
261
        }
262
263 2
        if ($this->tooManyRevisions()) {
264 1
            $this->numRevisionsProcessed = $this->getMaxRevisions();
265
        } else {
266 1
            $this->numRevisionsProcessed = $this->getNumRevisions();
267
        }
268
269 2
        return $this->numRevisionsProcessed;
270
    }
271
272
    /**
273
     * Are there more revisions than we should process, based on the config?
274
     * @return bool
275
     */
276 3
    public function tooManyRevisions(): bool
277
    {
278 3
        return $this->getMaxRevisions() > 0 && $this->getNumRevisions() > $this->getMaxRevisions();
279
    }
280
281
    /**
282
     * Fetch and store all the data we need to show the ArticleInfo view.
283
     * @codeCoverageIgnore
284
     */
285
    public function prepareData(): void
286
    {
287
        $this->parseHistory();
288
        $this->setLogsEvents();
289
290
        // Bots need to be set before setting top 10 counts.
291
        $this->setBots();
292
293
        $this->doPostPrecessing();
294
    }
295
296
    /**
297
     * Get the number of editors that edited the page.
298
     * @return int
299
     */
300 1
    public function getNumEditors(): int
301
    {
302 1
        return count($this->editors);
303
    }
304
305
    /**
306
     * Get the number of bots that edited the page.
307
     * @return int
308
     */
309
    public function getNumBots(): int
310
    {
311
        return count($this->getBots());
312
    }
313
314
    /**
315
     * Get the number of days between the first and last edit.
316
     * @return int
317
     */
318 1
    public function getTotalDays(): int
319
    {
320 1
        if (isset($this->totalDays)) {
321 1
            return $this->totalDays;
322
        }
323 1
        $dateFirst = $this->firstEdit->getTimestamp();
324 1
        $dateLast = $this->lastEdit->getTimestamp();
325 1
        $interval = date_diff($dateLast, $dateFirst, true);
326 1
        $this->totalDays = (int)$interval->format('%a');
327 1
        return $this->totalDays;
328
    }
329
330
    /**
331
     * Returns length of the page.
332
     * @return int
333
     */
334 1
    public function getLength(): int
335
    {
336 1
        if ($this->hasDateRange()) {
337 1
            return $this->lastEdit->getLength();
338
        }
339
340
        return $this->page->getLength();
341
    }
342
343
    /**
344
     * Get the average number of days between edits to the page.
345
     * @return float
346
     */
347 1
    public function averageDaysPerEdit(): float
348
    {
349 1
        return round($this->getTotalDays() / $this->getNumRevisionsProcessed(), 1);
350
    }
351
352
    /**
353
     * Get the average number of edits per day to the page.
354
     * @return float
355
     */
356 1
    public function editsPerDay(): float
357
    {
358 1
        $editsPerDay = $this->getTotalDays()
359 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / (365 / 12 / 24))
360 1
            : 0;
361 1
        return round($editsPerDay, 1);
362
    }
363
364
    /**
365
     * Get the average number of edits per month to the page.
366
     * @return float
367
     */
368 1
    public function editsPerMonth(): float
369
    {
370 1
        $editsPerMonth = $this->getTotalDays()
371 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / (365 / 12))
372 1
            : 0;
373 1
        return min($this->getNumRevisionsProcessed(), round($editsPerMonth, 1));
374
    }
375
376
    /**
377
     * Get the average number of edits per year to the page.
378
     * @return float
379
     */
380 1
    public function editsPerYear(): float
381
    {
382 1
        $editsPerYear = $this->getTotalDays()
383 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / 365)
384 1
            : 0;
385 1
        return min($this->getNumRevisionsProcessed(), round($editsPerYear, 1));
386
    }
387
388
    /**
389
     * Get the average number of edits per editor.
390
     * @return float
391
     */
392 1
    public function editsPerEditor(): float
393
    {
394 1
        return round($this->getNumRevisionsProcessed() / count($this->editors), 1);
395
    }
396
397
    /**
398
     * Get the percentage of minor edits to the page.
399
     * @return float
400
     */
401 1
    public function minorPercentage(): float
402
    {
403 1
        return round(
404 1
            ($this->minorCount / $this->getNumRevisionsProcessed()) * 100,
405 1
            1
406
        );
407
    }
408
409
    /**
410
     * Get the percentage of anonymous edits to the page.
411
     * @return float
412
     */
413 1
    public function anonPercentage(): float
414
    {
415 1
        return round(
416 1
            ($this->anonCount / $this->getNumRevisionsProcessed()) * 100,
417 1
            1
418
        );
419
    }
420
421
    /**
422
     * Get the percentage of edits made by the top 10 editors.
423
     * @return float
424
     */
425 1
    public function topTenPercentage(): float
426
    {
427 1
        return round(($this->topTenCount / $this->getNumRevisionsProcessed()) * 100, 1);
428
    }
429
430
    /**
431
     * Get the number of times the page has been viewed in the given timeframe. If the ArticleInfo instance has a
432
     * date range, it is used instead of the value of the $latest parameter.
433
     * @param  int $latest Last N days.
434
     * @return int
435
     */
436
    public function getPageviews(int $latest): int
437
    {
438
        if (!$this->hasDateRange()) {
439
            return $this->page->getLastPageviews($latest);
440
        }
441
442
        $daterange = $this->getDateParams();
443
        return $this->page->getPageviews($daterange['start'], $daterange['end']);
444
    }
445
446
    /**
447
     * Get the page assessments of the page.
448
     * @see https://www.mediawiki.org/wiki/Extension:PageAssessments
449
     * @return string[]|false False if unsupported.
450
     * @codeCoverageIgnore
451
     */
452
    public function getAssessments()
453
    {
454
        if (!is_array($this->assessments)) {
0 ignored issues
show
introduced by
The condition is_array($this->assessments) is always true.
Loading history...
455
            $this->assessments = $this->page
456
                ->getProject()
457
                ->getPageAssessments()
458
                ->getAssessments($this->page);
459
        }
460
        return $this->assessments;
461
    }
462
463
    /**
464
     * Get the number of automated edits made to the page.
465
     * @return int
466
     */
467 1
    public function getAutomatedCount(): int
468
    {
469 1
        return $this->automatedCount;
470
    }
471
472
    /**
473
     * Get the number of edits to the page that were reverted with the subsequent edit.
474
     * @return int
475
     */
476 1
    public function getRevertCount(): int
477
    {
478 1
        return $this->revertCount;
479
    }
480
481
    /**
482
     * Get the number of edits to the page made by logged out users.
483
     * @return int
484
     */
485 1
    public function getAnonCount(): int
486
    {
487 1
        return $this->anonCount;
488
    }
489
490
    /**
491
     * Get the number of minor edits to the page.
492
     * @return int
493
     */
494 1
    public function getMinorCount(): int
495
    {
496 1
        return $this->minorCount;
497
    }
498
499
    /**
500
     * Get the number of edits to the page made in the past day, week, month and year.
501
     * @return int[] With keys 'day', 'week', 'month' and 'year'.
502
     */
503
    public function getCountHistory(): array
504
    {
505
        return $this->countHistory;
506
    }
507
508
    /**
509
     * Get the number of edits to the page made by the top 10 editors.
510
     * @return int
511
     */
512 1
    public function getTopTenCount(): int
513
    {
514 1
        return $this->topTenCount;
515
    }
516
517
    /**
518
     * Get the top editors to the page by edit count.
519
     * @param int $limit Default 20, maximum 1,000.
520
     * @param bool $noBots Set to non-false to exclude bots from the result.
521
     * @return array
522
     */
523
    public function getTopEditorsByEditCount(int $limit = 20, bool $noBots = false): array
524
    {
525
        // Quick cache, valid only for the same request.
526
        static $topEditors = null;
527
        if (null !== $topEditors) {
528
            return $topEditors;
529
        }
530
531
        $rows = $this->getRepository()->getTopEditorsByEditCount(
0 ignored issues
show
Bug introduced by
The method getTopEditorsByEditCount() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

531
        $rows = $this->getRepository()->/** @scrutinizer ignore-call */ getTopEditorsByEditCount(
Loading history...
532
            $this->page,
533
            $this->start,
534
            $this->end,
535
            min($limit, 1000),
536
            $noBots
537
        );
538
539
        $topEditors = [];
540
        $rank = 0;
541
        foreach ($rows as $row) {
542
            $topEditors[] = [
543
                'rank' => ++$rank,
544
                'username' => $row['username'],
545
                'count' => $row['count'],
546
                'minor' => $row['minor'],
547
                'first_edit' => [
548
                    'id' => $row['first_revid'],
549
                    'timestamp' => $row['first_timestamp'],
550
                ],
551
                'latest_edit' => [
552
                    'id' => $row['latest_revid'],
553
                    'timestamp' => $row['latest_timestamp'],
554
                ],
555
            ];
556
        }
557
558
        return $topEditors;
559
    }
560
561
    /**
562
     * Get the first edit to the page.
563
     * @return Edit
564
     */
565 1
    public function getFirstEdit(): Edit
566
    {
567 1
        return $this->firstEdit;
568
    }
569
570
    /**
571
     * Get the last edit to the page.
572
     * @return Edit
573
     */
574 1
    public function getLastEdit(): Edit
575
    {
576 1
        return $this->lastEdit;
577
    }
578
579
    /**
580
     * Get the edit that made the largest addition to the page (by number of bytes).
581
     * @return Edit|null
582
     */
583 1
    public function getMaxAddition(): ?Edit
584
    {
585 1
        return $this->maxAddition;
586
    }
587
588
    /**
589
     * Get the edit that made the largest removal to the page (by number of bytes).
590
     * @return Edit|null
591
     */
592 1
    public function getMaxDeletion(): ?Edit
593
    {
594 1
        return $this->maxDeletion;
595
    }
596
597
    /**
598
     * Get the list of editors to the page, including various statistics.
599
     * @return mixed[]
600
     */
601 1
    public function getEditors(): array
602
    {
603 1
        return $this->editors;
604
    }
605
606
    /**
607
     * Get the list of the top editors to the page (by edits), including various statistics.
608
     * @return mixed[]
609
     */
610 1
    public function topTenEditorsByEdits(): array
611
    {
612 1
        return $this->topTenEditorsByEdits;
613
    }
614
615
    /**
616
     * Get the list of the top editors to the page (by added text), including various statistics.
617
     * @return mixed[]
618
     */
619 1
    public function topTenEditorsByAdded(): array
620
    {
621 1
        return $this->topTenEditorsByAdded;
622
    }
623
624
    /**
625
     * Get various counts about each individual year and month of the page's history.
626
     * @return mixed[]
627
     */
628 2
    public function getYearMonthCounts(): array
629
    {
630 2
        return $this->yearMonthCounts;
631
    }
632
633
    /**
634
     * Get the localized labels for the 'Year counts' chart.
635
     * @return string[]
636
     */
637
    public function getYearLabels(): array
638
    {
639
        return $this->yearLabels;
640
    }
641
642
    /**
643
     * Get the localized labels for the 'Month counts' chart.
644
     * @return string[]
645
     */
646
    public function getMonthLabels(): array
647
    {
648
        return $this->monthLabels;
649
    }
650
651
    /**
652
     * Get the maximum number of edits that were created across all months. This is used as a
653
     * comparison for the bar charts in the months section.
654
     * @return int
655
     */
656 1
    public function getMaxEditsPerMonth(): int
657
    {
658 1
        return $this->maxEditsPerMonth;
659
    }
660
661
    /**
662
     * Get a list of (semi-)automated tools that were used to edit the page, including
663
     * the number of times they were used, and a link to the tool's homepage.
664
     * @return string[]
665
     */
666 1
    public function getTools(): array
667
    {
668 1
        return $this->tools;
669
    }
670
671
    /**
672
     * Get the list of page's wikidata and Checkwiki errors.
673
     * @see Page::getErrors()
674
     * @return string[]
675
     */
676
    public function getBugs(): array
677
    {
678
        if (!is_array($this->bugs)) {
0 ignored issues
show
introduced by
The condition is_array($this->bugs) is always true.
Loading history...
679
            $this->bugs = $this->page->getErrors();
680
        }
681
        return $this->bugs;
682
    }
683
684
    /**
685
     * Get the number of wikidata nad CheckWiki errors.
686
     * @return int
687
     */
688
    public function numBugs(): int
689
    {
690
        return count($this->getBugs());
691
    }
692
693
    /**
694
     * Get the number of external links on the page.
695
     * @return int
696
     */
697 1
    public function linksExtCount(): int
698
    {
699 1
        return $this->getLinksAndRedirects()['links_ext_count'];
700
    }
701
702
    /**
703
     * Get the number of incoming links to the page.
704
     * @return int
705
     */
706 1
    public function linksInCount(): int
707
    {
708 1
        return $this->getLinksAndRedirects()['links_in_count'];
709
    }
710
711
    /**
712
     * Get the number of outgoing links from the page.
713
     * @return int
714
     */
715 1
    public function linksOutCount(): int
716
    {
717 1
        return $this->getLinksAndRedirects()['links_out_count'];
718
    }
719
720
    /**
721
     * Get the number of redirects to the page.
722
     * @return int
723
     */
724 1
    public function redirectsCount(): int
725
    {
726 1
        return $this->getLinksAndRedirects()['redirects_count'];
727
    }
728
729
    /**
730
     * Get the number of external, incoming and outgoing links, along with the number of redirects to the page.
731
     * @return int[]
732
     * @codeCoverageIgnore
733
     */
734
    private function getLinksAndRedirects(): array
735
    {
736
        if (!is_array($this->linksAndRedirects)) {
0 ignored issues
show
introduced by
The condition is_array($this->linksAndRedirects) is always true.
Loading history...
737
            $this->linksAndRedirects = $this->page->countLinksAndRedirects();
738
        }
739
        return $this->linksAndRedirects;
740
    }
741
742
    /**
743
     * Parse the revision history, collecting our core statistics.
744
     *
745
     * Untestable because it relies on getting a PDO statement. All the important
746
     * logic lives in other methods which are tested.
747
     * @codeCoverageIgnore
748
     */
749
    private function parseHistory(): void
750
    {
751
        if ($this->tooManyRevisions()) {
752
            $limit = $this->getMaxRevisions();
753
        } else {
754
            $limit = null;
755
        }
756
757
        // Third parameter is ignored if $limit is null.
758
        $revStmt = $this->page->getRevisionsStmt(
759
            null,
760
            $limit,
761
            $this->getNumRevisions(),
762
            $this->start,
0 ignored issues
show
Bug introduced by
It seems like $this->start can also be of type string; however, parameter $start of AppBundle\Model\Page::getRevisionsStmt() does only seem to accept false|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

762
            /** @scrutinizer ignore-type */ $this->start,
Loading history...
763
            $this->end
0 ignored issues
show
Bug introduced by
It seems like $this->end can also be of type string; however, parameter $end of AppBundle\Model\Page::getRevisionsStmt() does only seem to accept false|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

763
            /** @scrutinizer ignore-type */ $this->end
Loading history...
764
        );
765
        $revCount = 0;
766
767
        /**
768
         * Data about previous edits so that we can use them as a basis for comparison.
769
         * @var Edit[]
770
         */
771
        $prevEdits = [
772
            // The previous Edit, used to discount content that was reverted.
773
            'prev' => null,
774
775
            // The SHA-1 of the edit *before* the previous edit. Used for more
776
            // accurate revert detection.
777
            'prevSha' => null,
778
779
            // The last edit deemed to be the max addition of content. This is kept track of
780
            // in case we find out the next edit was reverted (and was also a max edit),
781
            // in which case we'll want to discount it and use this one instead.
782
            'maxAddition' => null,
783
784
            // Same as with maxAddition, except the maximum amount of content deleted.
785
            // This is used to discount content that was reverted.
786
            'maxDeletion' => null,
787
        ];
788
789
        while ($rev = $revStmt->fetch()) {
790
            $edit = new Edit($this->page, $rev);
791
792
            if (0 === $revCount) {
793
                $this->firstEdit = $edit;
794
            }
795
796
            // Sometimes, with old revisions (2001 era), the revisions from 2002 come before 2001
797
            if ($edit->getTimestamp() < $this->firstEdit->getTimestamp()) {
798
                $this->firstEdit = $edit;
799
            }
800
801
            $prevEdits = $this->updateCounts($edit, $prevEdits);
802
803
            $revCount++;
804
        }
805
806
        $this->numRevisionsProcessed = $revCount;
807
808
        // Various sorts
809
        arsort($this->editors);
810
        ksort($this->yearMonthCounts);
811
        if ($this->tools) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->tools of type string[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
812
            arsort($this->tools);
813
        }
814
    }
815
816
    /**
817
     * Update various counts based on the current edit.
818
     * @param Edit $edit
819
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'
820
     * @return Edit[] Updated version of $prevEdits.
821
     */
822 4
    private function updateCounts(Edit $edit, array $prevEdits): array
823
    {
824
        // Update the counts for the year and month of the current edit.
825 4
        $this->updateYearMonthCounts($edit);
826
827
        // Update counts for the user who made the edit.
828 4
        $this->updateUserCounts($edit);
829
830
        // Update the year/month/user counts of anon and minor edits.
831 4
        $this->updateAnonMinorCounts($edit);
832
833
        // Update counts for automated tool usage, if applicable.
834 4
        $this->updateToolCounts($edit);
835
836
        // Increment "edits per <time>" counts
837 4
        $this->updateCountHistory($edit);
838
839
        // Update figures regarding content addition/removal, and the revert count.
840 4
        $prevEdits = $this->updateContentSizes($edit, $prevEdits);
841
842
        // Now that we've updated all the counts, we can reset
843
        // the prev and last edits, which are used for tracking.
844
        // But first, let's copy over the SHA of the actual previous edit
845
        // and put it in our $prevEdits['prev'], so that we'll know
846
        // that content added after $prevEdit['prev'] was reverted.
847 4
        if (null !== $prevEdits['prev']) {
848 4
            $prevEdits['prevSha'] = $prevEdits['prev']->getSha();
849
        }
850 4
        $prevEdits['prev'] = $edit;
851 4
        $this->lastEdit = $edit;
852
853 4
        return $prevEdits;
854
    }
855
856
    /**
857
     * Update various figures about content sizes based on the given edit.
858
     * @param Edit $edit
859
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
860
     * @return Edit[] Updated version of $prevEdits.
861
     */
862 4
    private function updateContentSizes(Edit &$edit, array $prevEdits): array
863
    {
864
        // Check if it was a revert
865 4
        if ($this->isRevert($edit, $prevEdits)) {
866 4
            $edit->setReverted(true);
867 4
            return $this->updateContentSizesRevert($prevEdits);
868
        } else {
869 4
            return $this->updateContentSizesNonRevert($edit, $prevEdits);
870
        }
871
    }
872
873
    /**
874
     * Is the given Edit a revert?
875
     * @param Edit $edit
876
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
877
     * @return bool
878
     */
879 4
    private function isRevert(Edit $edit, array $prevEdits): bool
880
    {
881 4
        return $edit->getSha() === $prevEdits['prevSha'] || $edit->isRevert($this->container);
882
    }
883
884
    /**
885
     * Updates the figures on content sizes assuming the given edit was a revert of the previous one.
886
     * In such a case, we don't want to treat the previous edit as legit content addition or removal.
887
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
888
     * @return Edit[] Updated version of $prevEdits, for tracking.
889
     */
890 4
    private function updateContentSizesRevert(array $prevEdits): array
891
    {
892 4
        $this->revertCount++;
893
894
        // Adjust addedBytes given this edit was a revert of the previous one.
895 4
        if ($prevEdits['prev'] && !$prevEdits['prev']->isReverted() && $prevEdits['prev']->getSize() > 0) {
896
            $this->addedBytes -= $prevEdits['prev']->getSize();
897
898
            // Also deduct from the user's individual added byte count.
899
            // We don't do this if the previous edit was reverted, since that would make the net bytes zero.
900
            if ($prevEdits['prev']->getUser()) {
901
                $username = $prevEdits['prev']->getUser()->getUsername();
902
                $this->editors[$username]['added'] -= $prevEdits['prev']->getSize();
903
            }
904
        }
905
906
        // @TODO: Test this against an edit war (use your sandbox).
907
        // Also remove as max added or deleted, if applicable.
908 4
        if ($this->maxAddition && $prevEdits['prev']->getId() === $this->maxAddition->getId()) {
909
            $this->maxAddition = $prevEdits['maxAddition'];
910
            $prevEdits['maxAddition'] = $prevEdits['prev']; // In the event of edit wars.
911 4
        } elseif ($this->maxDeletion && $prevEdits['prev']->getId() === $this->maxDeletion->getId()) {
912 4
            $this->maxDeletion = $prevEdits['maxDeletion'];
913 4
            $prevEdits['maxDeletion'] = $prevEdits['prev']; // In the event of edit wars.
914
        }
915
916 4
        return $prevEdits;
917
    }
918
919
    /**
920
     * Updates the figures on content sizes assuming the given edit was NOT a revert of the previous edit.
921
     * @param Edit $edit
922
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
923
     * @return Edit[] Updated version of $prevEdits, for tracking.
924
     */
925 4
    private function updateContentSizesNonRevert(Edit $edit, array $prevEdits): array
926
    {
927 4
        $editSize = $this->getEditSize($edit, $prevEdits);
928
929
        // Edit was not a revert, so treat size > 0 as content added.
930 4
        if ($editSize > 0) {
931 4
            $this->addedBytes += $editSize;
932
933 4
            if ($edit->getUser()) {
934 4
                $this->editors[$edit->getUser()->getUsername()]['added'] += $editSize;
935
            }
936
937
            // Keep track of edit with max addition.
938 4
            if (!$this->maxAddition || $editSize > $this->maxAddition->getSize()) {
939
                // Keep track of old maxAddition in case we find out the next $edit was reverted
940
                // (and was also a max edit), in which case we'll want to use this one ($edit).
941 4
                $prevEdits['maxAddition'] = $this->maxAddition;
942
943 4
                $this->maxAddition = $edit;
944
            }
945 4
        } elseif ($editSize < 0 && (!$this->maxDeletion || $editSize < $this->maxDeletion->getSize())) {
946
            // Keep track of old maxDeletion in case we find out the next edit was reverted
947
            // (and was also a max deletion), in which case we'll want to use this one.
948 4
            $prevEdits['maxDeletion'] = $this->maxDeletion;
949
950 4
            $this->maxDeletion = $edit;
951
        }
952
953 4
        return $prevEdits;
954
    }
955
956
    /**
957
     * Get the size of the given edit, based on the previous edit (if present).
958
     * We also don't return the actual edit size if last revision had a length of null.
959
     * This happens when the edit follows other edits that were revision-deleted.
960
     * @see T148857 for more information.
961
     * @todo Remove once T101631 is resolved.
962
     * @param Edit $edit
963
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
964
     * @return int
965
     */
966 4
    private function getEditSize(Edit $edit, array $prevEdits): int
967
    {
968 4
        if ($prevEdits['prev'] && null === $prevEdits['prev']->getLength()) {
0 ignored issues
show
introduced by
The condition null === $prevEdits['prev']->getLength() is always false.
Loading history...
969
            return 0;
970
        } else {
971 4
            return $edit->getSize();
972
        }
973
    }
974
975
    /**
976
     * Update counts of automated tool usage for the given edit.
977
     * @param Edit $edit
978
     */
979 4
    private function updateToolCounts(Edit $edit): void
980
    {
981 4
        $automatedTool = $edit->getTool($this->container);
982
983 4
        if (false === $automatedTool) {
984
            // Nothing to do.
985 4
            return;
986
        }
987
988 4
        $editYear = $edit->getYear();
989 4
        $editMonth = $edit->getMonth();
990
991 4
        $this->automatedCount++;
992 4
        $this->yearMonthCounts[$editYear]['automated']++;
993 4
        $this->yearMonthCounts[$editYear]['months'][$editMonth]['automated']++;
994
995 4
        if (!isset($this->tools[$automatedTool['name']])) {
996 4
            $this->tools[$automatedTool['name']] = [
997 4
                'count' => 1,
998 4
                'link' => $automatedTool['link'],
999
            ];
1000
        } else {
1001
            $this->tools[$automatedTool['name']]['count']++;
1002
        }
1003 4
    }
1004
1005
    /**
1006
     * Update various counts for the year and month of the given edit.
1007
     * @param Edit $edit
1008
     */
1009 4
    private function updateYearMonthCounts(Edit $edit): void
1010
    {
1011 4
        $editYear = $edit->getYear();
1012 4
        $editMonth = $edit->getMonth();
1013
1014
        // Fill in the blank arrays for the year and 12 months if needed.
1015 4
        if (!isset($this->yearMonthCounts[$editYear])) {
1016 4
            $this->addYearMonthCountEntry($edit);
1017
        }
1018
1019
        // Increment year and month counts for all edits
1020 4
        $this->yearMonthCounts[$editYear]['all']++;
1021 4
        $this->yearMonthCounts[$editYear]['months'][$editMonth]['all']++;
1022
        // This will ultimately be the size of the page by the end of the year
1023 4
        $this->yearMonthCounts[$editYear]['size'] = (int) $edit->getLength();
1024
1025
        // Keep track of which month had the most edits
1026 4
        $editsThisMonth = $this->yearMonthCounts[$editYear]['months'][$editMonth]['all'];
1027 4
        if ($editsThisMonth > $this->maxEditsPerMonth) {
1028 4
            $this->maxEditsPerMonth = $editsThisMonth;
1029
        }
1030 4
    }
1031
1032
    /**
1033
     * Add a new entry to $this->yearMonthCounts for the given year,
1034
     * with blank values for each month. This called during self::parseHistory().
1035
     * @param Edit $edit
1036
     */
1037 4
    private function addYearMonthCountEntry(Edit $edit): void
1038
    {
1039 4
        $this->yearLabels[] = $this->i18n->dateFormat($edit->getTimestamp(), 'yyyy');
1040 4
        $editYear = $edit->getYear();
1041
1042
        // Beginning of the month at 00:00:00.
1043 4
        $firstEditTime = mktime(0, 0, 0, (int)$this->firstEdit->getMonth(), 1, (int)$this->firstEdit->getYear());
1044
1045 4
        $this->yearMonthCounts[$editYear] = [
1046
            'all' => 0,
1047
            'minor' => 0,
1048
            'anon' => 0,
1049
            'automated' => 0,
1050
            'size' => 0, // Keep track of the size by the end of the year.
1051
            'events' => [],
1052
            'months' => [],
1053
        ];
1054
1055 4
        for ($i = 1; $i <= 12; $i++) {
1056 4
            $timeObj = mktime(0, 0, 0, $i, 1, (int)$editYear);
1057
1058
            // Don't show zeros for months before the first edit or after the current month.
1059 4
            if ($timeObj < $firstEditTime || $timeObj > $this->getLastDay()) {
1060 4
                continue;
1061
            }
1062
1063 4
            $this->monthLabels[] = $this->i18n->dateFormat($timeObj, 'yyyy-MM');
1064 4
            $this->yearMonthCounts[$editYear]['months'][sprintf('%02d', $i)] = [
1065
                'all' => 0,
1066
                'minor' => 0,
1067
                'anon' => 0,
1068
                'automated' => 0,
1069
            ];
1070
        }
1071 4
    }
1072
1073
    /**
1074
     * Update the counts of anon and minor edits for year, month, and user of the given edit.
1075
     * @param Edit $edit
1076
     */
1077 4
    private function updateAnonMinorCounts(Edit $edit): void
1078
    {
1079 4
        $editYear = $edit->getYear();
1080 4
        $editMonth = $edit->getMonth();
1081
1082
        // If anonymous, increase counts
1083 4
        if ($edit->isAnon()) {
1084 4
            $this->anonCount++;
1085 4
            $this->yearMonthCounts[$editYear]['anon']++;
1086 4
            $this->yearMonthCounts[$editYear]['months'][$editMonth]['anon']++;
1087
        }
1088
1089
        // If minor edit, increase counts
1090 4
        if ($edit->isMinor()) {
1091 4
            $this->minorCount++;
1092 4
            $this->yearMonthCounts[$editYear]['minor']++;
1093 4
            $this->yearMonthCounts[$editYear]['months'][$editMonth]['minor']++;
1094
        }
1095 4
    }
1096
1097
    /**
1098
     * Update various counts for the user of the given edit.
1099
     * @param Edit $edit
1100
     */
1101 4
    private function updateUserCounts(Edit $edit): void
1102
    {
1103 4
        if (!$edit->getUser()) {
1104
            return;
1105
        }
1106
1107 4
        $username = $edit->getUser()->getUsername();
1108
1109
        // Initialize various user stats if needed.
1110 4
        if (!isset($this->editors[$username])) {
1111 4
            $this->editors[$username] = [
1112 4
                'all' => 0,
1113 4
                'minor' => 0,
1114 4
                'minorPercentage' => 0,
1115 4
                'first' => $edit->getTimestamp(),
1116 4
                'firstId' => $edit->getId(),
1117
                'last' => null,
1118
                'atbe' => null,
1119 4
                'added' => 0,
1120
            ];
1121
        }
1122
1123
        // Increment user counts
1124 4
        $this->editors[$username]['all']++;
1125 4
        $this->editors[$username]['last'] = $edit->getTimestamp();
1126 4
        $this->editors[$username]['lastId'] = $edit->getId();
1127
1128
        // Increment minor counts for this user
1129 4
        if ($edit->isMinor()) {
1130 4
            $this->editors[$username]['minor']++;
1131
        }
1132 4
    }
1133
1134
    /**
1135
     * Increment "edits per <time>" counts based on the given edit.
1136
     * @param Edit $edit
1137
     */
1138 4
    private function updateCountHistory(Edit $edit): void
1139
    {
1140 4
        $editTimestamp = $edit->getTimestamp();
1141
1142 4
        if ($editTimestamp > new DateTime('-1 day')) {
1143
            $this->countHistory['day']++;
1144
        }
1145 4
        if ($editTimestamp > new DateTime('-1 week')) {
1146
            $this->countHistory['week']++;
1147
        }
1148 4
        if ($editTimestamp > new DateTime('-1 month')) {
1149
            $this->countHistory['month']++;
1150
        }
1151 4
        if ($editTimestamp > new DateTime('-1 year')) {
1152
            $this->countHistory['year']++;
1153
        }
1154 4
    }
1155
1156
    /**
1157
     * Get info about bots that edited the page.
1158
     * @return mixed[] Contains the bot's username, edit count to the page, and whether or not they are currently a bot.
1159
     */
1160 1
    public function getBots(): array
1161
    {
1162 1
        return $this->bots;
1163
    }
1164
1165
    /**
1166
     * Set info about bots that edited the page. This is done as a private setter because we need this information
1167
     * when computing the top 10 editors, where we don't want to include bots.
1168
     */
1169
    private function setBots(): void
1170
    {
1171
        // Parse the bot edits.
1172
        $bots = [];
1173
1174
        /** @var Statement $botData */
1175
        $botData = $this->getRepository()->getBotData($this->page, $this->start, $this->end);
0 ignored issues
show
Bug introduced by
The method getBotData() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1175
        $botData = $this->getRepository()->/** @scrutinizer ignore-call */ getBotData($this->page, $this->start, $this->end);
Loading history...
1176
        while ($bot = $botData->fetch()) {
1177
            $bots[$bot['username']] = [
1178
                'count' => (int)$bot['count'],
1179
                'current' => '1' === $bot['current'],
1180
            ];
1181
        }
1182
1183
        // Sort by edit count.
1184
        uasort($bots, function ($a, $b) {
1185
            return $b['count'] - $a['count'];
1186
        });
1187
1188
        $this->bots = $bots;
1189
    }
1190
1191
    /**
1192
     * Number of edits made to the page by current or former bots.
1193
     * @param string[] $bots Used only in unit tests, where we supply mock data for the bots that will get processed.
1194
     * @return int
1195
     */
1196 2
    public function getBotRevisionCount(?array $bots = null): int
1197
    {
1198 2
        if (isset($this->botRevisionCount)) {
1199
            return $this->botRevisionCount;
1200
        }
1201
1202 2
        if (null === $bots) {
1203 1
            $bots = $this->getBots();
1204
        }
1205
1206 2
        $count = 0;
1207
1208 2
        foreach (array_values($bots) as $data) {
1209 2
            $count += $data['count'];
1210
        }
1211
1212 2
        $this->botRevisionCount = $count;
1213 2
        return $count;
1214
    }
1215
1216
    /**
1217
     * Query for log events during each year of the article's history, and set the results in $this->yearMonthCounts.
1218
     */
1219 1
    private function setLogsEvents(): void
1220
    {
1221 1
        $logData = $this->getRepository()->getLogEvents(
0 ignored issues
show
Bug introduced by
The method getLogEvents() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1221
        $logData = $this->getRepository()->/** @scrutinizer ignore-call */ getLogEvents(
Loading history...
1222 1
            $this->page,
1223 1
            $this->start,
1224 1
            $this->end
1225
        );
1226
1227 1
        foreach ($logData as $event) {
1228 1
            $time = strtotime($event['timestamp']);
1229 1
            $year = date('Y', $time);
1230
1231 1
            if (!isset($this->yearMonthCounts[$year])) {
1232
                break;
1233
            }
1234
1235 1
            $yearEvents = $this->yearMonthCounts[$year]['events'];
1236
1237
            // Convert log type value to i18n key.
1238 1
            switch ($event['log_type']) {
1239 1
                case 'protect':
1240 1
                    $action = 'protections';
1241 1
                    break;
1242 1
                case 'delete':
1243 1
                    $action = 'deletions';
1244 1
                    break;
1245
                case 'move':
1246
                    $action = 'moves';
1247
                    break;
1248
                // count pending-changes protections along with normal protections.
1249
                case 'stable':
1250
                    $action = 'protections';
1251
                    break;
1252
            }
1253
1254 1
            if (empty($yearEvents[$action])) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $action does not seem to be defined for all execution paths leading up to this point.
Loading history...
1255 1
                $yearEvents[$action] = 1;
1256
            } else {
1257
                $yearEvents[$action]++;
1258
            }
1259
1260 1
            $this->yearMonthCounts[$year]['events'] = $yearEvents;
1261
        }
1262 1
    }
1263
1264
    /**
1265
     * Set statistics about the top 10 editors by added text and number of edits.
1266
     * This is ran *after* parseHistory() since we need the grand totals first.
1267
     * Various stats are also set for each editor in $this->editors to be used in the charts.
1268
     */
1269 4
    private function doPostPrecessing(): void
1270
    {
1271 4
        $topTenCount = $counter = 0;
1272 4
        $topTenEditorsByEdits = [];
1273
1274 4
        foreach ($this->editors as $editor => $info) {
1275
            // Count how many users are in the top 10% by number of edits, excluding bots.
1276 4
            if ($counter < 10 && !in_array($editor, array_keys($this->bots))) {
1277 4
                $topTenCount += $info['all'];
1278 4
                $counter++;
1279
1280
                // To be used in the Top Ten charts.
1281 4
                $topTenEditorsByEdits[] = [
1282 4
                    'label' => $editor,
1283 4
                    'value' => $info['all'],
1284
                ];
1285
            }
1286
1287
            // Compute the percentage of minor edits the user made.
1288 4
            $this->editors[$editor]['minorPercentage'] = $info['all']
1289 4
                ? ($info['minor'] / $info['all']) * 100
1290
                : 0;
1291
1292 4
            if ($info['all'] > 1) {
1293
                // Number of seconds/days between first and last edit.
1294 4
                $secs = $info['last']->getTimestamp() - $info['first']->getTimestamp();
1295 4
                $days = $secs / (60 * 60 * 24);
1296
1297
                // Average time between edits (in days).
1298 4
                $this->editors[$editor]['atbe'] = $days / $info['all'];
1299
            }
1300
        }
1301
1302
        // Loop through again and add percentages.
1303 4
        $this->topTenEditorsByEdits = array_map(function ($editor) use ($topTenCount) {
1304 4
            $editor['percentage'] = 100 * ($editor['value'] / $topTenCount);
1305 4
            return $editor;
1306 4
        }, $topTenEditorsByEdits);
1307
1308 4
        $this->topTenEditorsByAdded = $this->getTopTenByAdded();
1309
1310 4
        $this->topTenCount = $topTenCount;
1311 4
    }
1312
1313
    /**
1314
     * Get the top ten editors by added text.
1315
     * @return array With keys 'label', 'value' and 'percentage', ready to be used by the pieChart Twig helper.
1316
     */
1317 4
    private function getTopTenByAdded(): array
1318
    {
1319
        // First sort editors array by the amount of text they added.
1320 4
        $topTenEditorsByAdded = $this->editors;
1321 4
        uasort($topTenEditorsByAdded, function ($a, $b) {
1322 4
            if ($a['added'] === $b['added']) {
1323 4
                return 0;
1324
            }
1325 4
            return $a['added'] > $b['added'] ? -1 : 1;
1326 4
        });
1327
1328
        // Slice to the top 10.
1329 4
        $topTenEditorsByAdded = array_keys(array_slice($topTenEditorsByAdded, 0, 10, true));
1330
1331
        // // Get the sum of added text so that we can add in percentages.
1332
        // $topTenTotalAdded = array_sum(array_map(function ($editor) {
1333
        //     return $this->editors[$editor]['added'];
1334
        // }, $topTenEditorsByAdded));
1335
1336
        // Then build a new array of top 10 editors by added text in the data structure needed for the chart.
1337 4
        return array_map(function ($editor) {
1338 4
            $added = $this->editors[$editor]['added'];
1339
            return [
1340 4
                'label' => $editor,
1341 4
                'value' => $added,
1342 4
                'percentage' => 0 === $this->addedBytes
1343
                    ? 0
1344 4
                    : 100 * ($added / $this->addedBytes),
1345
            ];
1346 4
        }, $topTenEditorsByAdded);
1347
    }
1348
1349
    /**
1350
     * Get authorship attribution from the WikiWho API.
1351
     * @see https://f-squared.org/wikiwho/
1352
     * @param int $limit Max number of results.
1353
     * @return array
1354
     */
1355 1
    public function getTextshares(?int $limit = null): array
1356
    {
1357 1
        if (isset($this->textshares)) {
1358
            return $this->textshares;
1359
        }
1360
1361
        // TODO: check for failures. Should have a success:true
1362 1
        $ret = $this->getRepository()->getTextshares($this->page);
0 ignored issues
show
Bug introduced by
The method getTextshares() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1362
        $ret = $this->getRepository()->/** @scrutinizer ignore-call */ getTextshares($this->page);
Loading history...
1363
1364
        // If revision can't be found, return error message.
1365 1
        if (!isset($ret['revisions'][0])) {
1366
            return [
1367
                'error' => $ret['Error'] ?? 'Unknown',
1368
            ];
1369
        }
1370
1371 1
        $revId = array_keys($ret['revisions'][0])[0];
1372 1
        $tokens = $ret['revisions'][0][$revId]['tokens'];
1373
1374 1
        [$counts, $totalCount, $userIds] = $this->countTokens($tokens);
1375 1
        $usernameMap = $this->getUsernameMap($userIds);
1376
1377 1
        if (null !== $limit) {
1378 1
            $countsToProcess = array_slice($counts, 0, $limit, true);
1379
        } else {
1380
            $countsToProcess = $counts;
1381
        }
1382
1383 1
        $textshares = [];
1384
1385
        // Used to get the character count and percentage of the remaining N editors, after the top $limit.
1386 1
        $percentageSum = 0;
1387 1
        $countSum = 0;
1388 1
        $numEditors = 0;
1389
1390
        // Loop through once more, creating an array with the user names (or IP addresses)
1391
        // as the key, and the count and percentage as the value.
1392 1
        foreach ($countsToProcess as $editor => $count) {
1393 1
            if (isset($usernameMap[$editor])) {
1394 1
                $index = $usernameMap[$editor];
1395
            } else {
1396 1
                $index = $editor;
1397
            }
1398
1399 1
            $percentage = round(100 * ($count / $totalCount), 1);
1400
1401
            // If we are showing > 10 editors in the table, we still only want the top 10 for the chart.
1402 1
            if ($numEditors < 10) {
1403 1
                $percentageSum += $percentage;
1404 1
                $countSum += $count;
1405 1
                $numEditors++;
1406
            }
1407
1408 1
            $textshares[$index] = [
1409 1
                'count' => $count,
1410 1
                'percentage' => $percentage,
1411
            ];
1412
        }
1413
1414 1
        $this->textshares = [
1415 1
            'list' => $textshares,
1416 1
            'totalAuthors' => count($counts),
1417 1
            'totalCount' => $totalCount,
1418
        ];
1419
1420
        // Record character count and percentage for the remaining editors.
1421 1
        if ($percentageSum < 100) {
1422 1
            $this->textshares['others'] = [
1423 1
                'count' => $totalCount - $countSum,
1424 1
                'percentage' => round(100 - $percentageSum, 1),
1425 1
                'numEditors' => count($counts) - $numEditors,
1426
            ];
1427
        }
1428
1429 1
        return $this->textshares;
1430
    }
1431
1432
    /**
1433
     * Get a map of user IDs to usernames, given the IDs.
1434
     * @param int[] $userIds
1435
     * @return array IDs as keys, usernames as values.
1436
     */
1437 1
    private function getUsernameMap(array $userIds): array
1438
    {
1439 1
        if (empty($userIds)) {
1440
            return [];
1441
        }
1442
1443 1
        $userIdsNames = $this->getRepository()->getUsernamesFromIds(
0 ignored issues
show
Bug introduced by
The method getUsernamesFromIds() does not exist on AppBundle\Repository\Repository. It seems like you code against a sub-type of AppBundle\Repository\Repository such as AppBundle\Repository\ArticleInfoRepository. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1443
        $userIdsNames = $this->getRepository()->/** @scrutinizer ignore-call */ getUsernamesFromIds(
Loading history...
1444 1
            $this->page->getProject(),
1445 1
            $userIds
1446
        );
1447
1448 1
        $usernameMap = [];
1449 1
        foreach ($userIdsNames as $userIdName) {
1450 1
            $usernameMap[$userIdName['user_id']] = $userIdName['user_name'];
1451
        }
1452
1453 1
        return $usernameMap;
1454
    }
1455
1456
    /**
1457
     * Get counts of token lengths for each author. Used in self::getTextshares()
1458
     * @param array $tokens
1459
     * @return array [counts by user, total count, IDs of accounts]
1460
     */
1461 1
    private function countTokens(array $tokens): array
1462
    {
1463 1
        $counts = [];
1464 1
        $userIds = [];
1465 1
        $totalCount = 0;
1466
1467
        // Loop through the tokens, keeping totals (token length) for each author.
1468 1
        foreach ($tokens as $token) {
1469 1
            $editor = $token['editor'];
1470
1471
            // IPs are prefixed with '0|', otherwise it's the user ID.
1472 1
            if ('0|' === substr($editor, 0, 2)) {
1473 1
                $editor = substr($editor, 2);
1474
            } else {
1475 1
                $userIds[] = $editor;
1476
            }
1477
1478 1
            if (!isset($counts[$editor])) {
1479 1
                $counts[$editor] = 0;
1480
            }
1481
1482 1
            $counts[$editor] += strlen($token['str']);
1483 1
            $totalCount += strlen($token['str']);
1484
        }
1485
1486
        // Sort authors by count.
1487 1
        arsort($counts);
1488
1489 1
        return [$counts, $totalCount, $userIds];
1490
    }
1491
1492
    /**
1493
     * Get a list of wikis supported by WikiWho.
1494
     * @return string[]
1495
     * @codeCoverageIgnore
1496
     */
1497
    public function getTextshareWikis(): array
1498
    {
1499
        return self::TEXTSHARE_WIKIS;
1500
    }
1501
1502
    /**
1503
     * Get prose and reference information.
1504
     * @return array With keys 'characters', 'words', 'references', 'unique_references'
1505
     */
1506 1
    public function getProseStats(): array
1507
    {
1508 1
        $datetime = false !== $this->end ? new DateTime('@'.$this->end) : null;
0 ignored issues
show
Bug introduced by
Are you sure $this->end of type integer|string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1508
        $datetime = false !== $this->end ? new DateTime('@'./** @scrutinizer ignore-type */ $this->end) : null;
Loading history...
1509 1
        $html = $this->page->getHTMLContent($datetime);
1510
1511 1
        $crawler = new Crawler($html);
1512
1513 1
        [$chars, $words] = $this->countCharsAndWords($crawler, '#mw-content-text p');
1514
1515 1
        $refs = $crawler->filter('#mw-content-text .reference');
1516 1
        $refContent = [];
1517 1
        $refs->each(function ($ref) use (&$refContent): void {
1518 1
            $refContent[] = $ref->text();
1519 1
        });
1520 1
        $uniqueRefs = count(array_unique($refContent));
1521
1522 1
        $sections = count($crawler->filter('#mw-content-text .mw-headline'));
1523
1524
        return [
1525 1
            'characters' => $chars,
1526 1
            'words' => $words,
1527 1
            'references' => $refs->count(),
1528 1
            'unique_references' => $uniqueRefs,
1529 1
            'sections' => $sections,
1530
        ];
1531
    }
1532
1533
    /**
1534
     * Count the number of characters and words of the plain text within the DOM element matched by the given selector.
1535
     * @param Crawler $crawler
1536
     * @param string $selector HTML selector.
1537
     * @return array [num chars, num words]
1538
     */
1539 1
    private function countCharsAndWords(Crawler $crawler, string $selector): array
1540
    {
1541 1
        $totalChars = 0;
1542 1
        $totalWords = 0;
1543 1
        $paragraphs = $crawler->filter($selector);
1544 1
        $paragraphs->each(function ($node) use (&$totalChars, &$totalWords): void {
1545 1
            $text = preg_replace('/\[\d+\]/', '', trim($node->text()));
1546 1
            $totalChars += strlen($text);
1547 1
            $totalWords += count(explode(' ', $text));
1548 1
        });
1549
1550 1
        return [$totalChars, $totalWords];
1551
    }
1552
1553
    /**
1554
     * Fetch transclusion data (categories, templates and files) that are on the page.
1555
     * @return array With keys 'categories', 'templates' and 'files'.
1556
     */
1557 1
    private function getTransclusionData(): array
1558
    {
1559 1
        if (!is_array($this->transclusionData)) {
0 ignored issues
show
introduced by
The condition is_array($this->transclusionData) is always true.
Loading history...
1560 1
            $this->transclusionData = $this->getRepository()
1561 1
                ->getTransclusionData($this->page);
1562
        }
1563 1
        return $this->transclusionData;
1564
    }
1565
1566
    /**
1567
     * Get the number of categories that are on the page.
1568
     * @return int
1569
     */
1570 1
    public function getNumCategories(): int
1571
    {
1572 1
        return $this->getTransclusionData()['categories'];
1573
    }
1574
1575
    /**
1576
     * Get the number of templates that are on the page.
1577
     * @return int
1578
     */
1579 1
    public function getNumTemplates(): int
1580
    {
1581 1
        return $this->getTransclusionData()['templates'];
1582
    }
1583
1584
    /**
1585
     * Get the number of files that are on the page.
1586
     * @return int
1587
     */
1588 1
    public function getNumFiles(): int
1589
    {
1590 1
        return $this->getTransclusionData()['files'];
1591
    }
1592
}
1593