Passed
Push — master ( c88656...e5f428 )
by MusikAnimal
08:08
created

ArticleInfo::getTopTenByAdded()   A

Complexity

Conditions 3
Paths 1

Size

Total Lines 30
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 17
CRAP Score 3

Importance

Changes 0
Metric Value
cc 3
eloc 17
nc 1
nop 0
dl 0
loc 30
ccs 17
cts 17
cp 1
crap 3
rs 9.7
c 0
b 0
f 0
1
<?php
2
/**
3
 * This file contains only the ArticleInfo class.
4
 */
5
6
namespace Xtools;
7
8
use AppBundle\Helper\I18nHelper;
9
use DateTime;
10
use Symfony\Component\DependencyInjection\Container;
11
use Symfony\Component\DomCrawler\Crawler;
12
13
/**
14
 * An ArticleInfo provides statistics about a page on a project.
15
 */
16
class ArticleInfo extends Model
17
{
18
    /** @const string[] Domain names of wikis supported by WikiWho. */
19
    const TEXTSHARE_WIKIS = [
20
        'en.wikipedia.org',
21
        'de.wikipedia.org',
22
        'eu.wikipedia.org',
23
        'tr.wikipedia.org',
24
        'es.wikipedia.org',
25
    ];
26
27
    /** @var Container The application's DI container. */
28
    protected $container;
29
30
    /** @var I18nHelper For i18n and l10n. */
31
    protected $i18n;
32
33
    /** @var int Number of revisions that belong to the page. */
34
    protected $numRevisions;
35
36
    /** @var int Maximum number of revisions to process, as configured. */
37
    protected $maxRevisions;
38
39
    /** @var int Number of revisions that were actually processed. */
40
    protected $numRevisionsProcessed;
41
42
    /**
43
     * Various statistics about editors to the page. These are not User objects
44
     * so as to preserve memory.
45
     * @var mixed[]
46
     */
47
    protected $editors;
48
49
    /** @var mixed[] The top 10 editors to the page by number of edits. */
50
    protected $topTenEditorsByEdits;
51
52
    /** @var mixed[] The top 10 editors to the page by added text. */
53
    protected $topTenEditorsByAdded;
54
55
    /** @var int Number of edits made by the top 10 editors. */
56
    protected $topTenCount;
57
58
    /** @var mixed[] Various statistics about bots that edited the page. */
59
    protected $bots;
60
61
    /** @var int Number of edits made to the page by bots. */
62
    protected $botRevisionCount;
63
64
    /** @var mixed[] Various counts about each individual year and month of the page's history. */
65
    protected $yearMonthCounts;
66
67
    /** @var string[] Localized labels for the years, to be used in the 'Year counts' chart. */
68
    protected $yearLabels = [];
69
70
    /** @var string[] Localized labels for the months, to be used in the 'Month counts' chart. */
71
    protected $monthLabels = [];
72
73
    /** @var Edit The first edit to the page. */
74
    protected $firstEdit;
75
76
    /** @var Edit The last edit to the page. */
77
    protected $lastEdit;
78
79
    /** @var Edit Edit that made the largest addition by number of bytes. */
80
    protected $maxAddition;
81
82
    /** @var Edit Edit that made the largest deletion by number of bytes. */
83
    protected $maxDeletion;
84
85
    /** @var int[] Number of in and outgoing links and redirects to the page. */
86
    protected $linksAndRedirects;
87
88
    /** @var string[] Assessments of the page (see Page::getAssessments). */
89
    protected $assessments;
90
91
    /**
92
     * Maximum number of edits that were created across all months. This is used as a comparison
93
     * for the bar charts in the months section.
94
     * @var int
95
     */
96
    protected $maxEditsPerMonth;
97
98
    /** @var string[] List of (semi-)automated tools that were used to edit the page. */
99
    protected $tools;
100
101
    /**
102
     * Total number of bytes added throughout the page's history. This is used as a comparison
103
     * when computing the top 10 editors by added text.
104
     * @var int
105
     */
106
    protected $addedBytes = 0;
107
108
    /** @var int Number of days between first and last edit. */
109
    protected $totalDays;
110
111
    /** @var int Number of minor edits to the page. */
112
    protected $minorCount = 0;
113
114
    /** @var int Number of anonymous edits to the page. */
115
    protected $anonCount = 0;
116
117
    /** @var int Number of automated edits to the page. */
118
    protected $automatedCount = 0;
119
120
    /** @var int Number of edits to the page that were reverted with the subsequent edit. */
121
    protected $revertCount = 0;
122
123
    /** @var int[] The "edits per <time>" counts. */
124
    protected $countHistory = [
125
        'day' => 0,
126
        'week' => 0,
127
        'month' => 0,
128
        'year' => 0
129
    ];
130
131
    /** @var string[] List of wikidata and Checkwiki errors. */
132
    protected $bugs;
133
134
    /** @var array List of editors and the percentage of the current content that they authored. */
135
    protected $textshares;
136
137
    /** @var array Number of categories, templates and files on the page. */
138
    protected $transclusionData;
139
140
    /**
141
     * ArticleInfo constructor.
142
     * @param Page $page The page to process.
143
     * @param Container $container The DI container.
144
     * @param false|int $start From what date to obtain records.
145
     * @param false|int $end To what date to obtain records.
146
     */
147 13
    public function __construct(Page $page, Container $container, $start = false, $end = false)
148
    {
149 13
        $this->page = $page;
150 13
        $this->container = $container;
151 13
        $this->start = $start;
152 13
        $this->end = $end;
153 13
    }
154
155
    /**
156
     * Make the I18nHelper accessible to ArticleInfo.
157
     * @param I18nHelper $i18n
158
     * @codeCoverageIgnore
159
     */
160
    public function setI18nHelper(I18nHelper $i18n)
161
    {
162
        $this->i18n = $i18n;
163
    }
164
165
    /**
166
     * Get date opening date range, formatted as this is used in the views.
167
     * @return string Blank if no value exists.
168
     */
169 1
    public function getStartDate()
170
    {
171 1
        return $this->start == '' ? '' : date('Y-m-d', $this->start);
0 ignored issues
show
Bug introduced by
It seems like $this->start can also be of type string and boolean; however, parameter $timestamp of date() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

171
        return $this->start == '' ? '' : date('Y-m-d', /** @scrutinizer ignore-type */ $this->start);
Loading history...
172
    }
173
174
    /**
175
     * Get date closing date range, formatted as this is used in the views.
176
     * @return string Blank if no value exists.
177
     */
178 1
    public function getEndDate()
179
    {
180 1
        return $this->end == '' ? '' : date('Y-m-d', $this->end);
0 ignored issues
show
Bug introduced by
It seems like $this->end can also be of type string and boolean; however, parameter $timestamp of date() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

180
        return $this->end == '' ? '' : date('Y-m-d', /** @scrutinizer ignore-type */ $this->end);
Loading history...
181
    }
182
183
    /**
184
     * Get the day of last date we should show in the month/year sections,
185
     * based on $this->end or the current date.
186
     * @return int As Unix timestamp.
187
     */
188 4
    private function getLastDay()
189
    {
190 4
        if ($this->end !== false) {
191
            return (new DateTime('@'.$this->end))
0 ignored issues
show
Bug introduced by
Are you sure $this->end of type integer|string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

191
            return (new DateTime('@'./** @scrutinizer ignore-type */ $this->end))
Loading history...
192
                ->modify('last day of this month')
193
                ->getTimestamp();
194
        } else {
195 4
            return strtotime('last day of this month');
196
        }
197
    }
198
199
    /**
200
     * Return the start/end date values as associative array, with YYYY-MM-DD as the date format.
201
     * This is used mainly as a helper to pass to the pageviews Twig macros.
202
     * @return array
203
     */
204 1
    public function getDateParams()
205
    {
206 1
        if (!$this->hasDateRange()) {
207
            return [];
208
        }
209
210
        $ret = [
211 1
            'start' => $this->firstEdit->getTimestamp()->format('Y-m-d'),
212 1
            'end' => $this->lastEdit->getTimestamp()->format('Y-m-d'),
213
        ];
214
215 1
        if ($this->start !== false) {
216 1
            $ret['start'] = date('Y-m-d', $this->start);
0 ignored issues
show
Bug introduced by
It seems like $this->start can also be of type string and true; however, parameter $timestamp of date() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

216
            $ret['start'] = date('Y-m-d', /** @scrutinizer ignore-type */ $this->start);
Loading history...
217
        }
218 1
        if ($this->end !== false) {
219 1
            $ret['end'] = date('Y-m-d', $this->end);
220
        }
221
222 1
        return $ret;
223
    }
224
225
    /**
226
     * Get the number of revisions belonging to the page.
227
     * @return int
228
     */
229 4
    public function getNumRevisions()
230
    {
231 4
        if (!isset($this->numRevisions)) {
232 4
            $this->numRevisions = $this->page->getNumRevisions(null, $this->start, $this->end);
0 ignored issues
show
Bug introduced by
It seems like $this->end can also be of type string; however, parameter $end of Xtools\Page::getNumRevisions() does only seem to accept integer|false, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

232
            $this->numRevisions = $this->page->getNumRevisions(null, $this->start, /** @scrutinizer ignore-type */ $this->end);
Loading history...
Bug introduced by
It seems like $this->start can also be of type string; however, parameter $start of Xtools\Page::getNumRevisions() does only seem to accept integer|false, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

232
            $this->numRevisions = $this->page->getNumRevisions(null, /** @scrutinizer ignore-type */ $this->start, $this->end);
Loading history...
233
        }
234 4
        return $this->numRevisions;
235
    }
236
237
    /**
238
     * Get the maximum number of revisions that we should process.
239
     * @return int
240
     */
241 3
    public function getMaxRevisions()
242
    {
243 3
        if (!isset($this->maxRevisions)) {
244 3
            $this->maxRevisions = (int) $this->container->getParameter('app.max_page_revisions');
245
        }
246 3
        return $this->maxRevisions;
247
    }
248
249
    /**
250
     * Get the number of revisions that are actually getting processed. This goes by the app.max_page_revisions
251
     * parameter, or the actual number of revisions, whichever is smaller.
252
     * @return int
253
     */
254 3
    public function getNumRevisionsProcessed()
255
    {
256 3
        if (isset($this->numRevisionsProcessed)) {
257 1
            return $this->numRevisionsProcessed;
258
        }
259
260 2
        if ($this->tooManyRevisions()) {
261 1
            $this->numRevisionsProcessed = $this->getMaxRevisions();
262
        } else {
263 1
            $this->numRevisionsProcessed = $this->getNumRevisions();
264
        }
265
266 2
        return $this->numRevisionsProcessed;
267
    }
268
269
    /**
270
     * Are there more revisions than we should process, based on the config?
271
     * @return bool
272
     */
273 3
    public function tooManyRevisions()
274
    {
275 3
        return $this->getMaxRevisions() > 0 && $this->getNumRevisions() > $this->getMaxRevisions();
276
    }
277
278
    /**
279
     * Fetch and store all the data we need to show the ArticleInfo view.
280
     * @codeCoverageIgnore
281
     */
282
    public function prepareData()
283
    {
284
        $this->parseHistory();
285
        $this->setLogsEvents();
286
287
        // Bots need to be set before setting top 10 counts.
288
        $this->setBots();
289
290
        $this->doPostPrecessing();
291
    }
292
293
    /**
294
     * Get the number of editors that edited the page.
295
     * @return int
296
     */
297 1
    public function getNumEditors()
298
    {
299 1
        return count($this->editors);
300
    }
301
302
    /**
303
     * Get the number of bots that edited the page.
304
     * @return int
305
     */
306
    public function getNumBots()
307
    {
308
        return count($this->getBots());
309
    }
310
311
    /**
312
     * Get the number of days between the first and last edit.
313
     * @return int
314
     */
315 1
    public function getTotalDays()
316
    {
317 1
        if (isset($this->totalDays)) {
318 1
            return $this->totalDays;
319
        }
320 1
        $dateFirst = $this->firstEdit->getTimestamp();
321 1
        $dateLast = $this->lastEdit->getTimestamp();
322 1
        $interval = date_diff($dateLast, $dateFirst, true);
323 1
        $this->totalDays = $interval->format('%a');
0 ignored issues
show
Documentation Bug introduced by
The property $totalDays was declared of type integer, but $interval->format('%a') is of type string. Maybe add a type cast?

This check looks for assignments to scalar types that may be of the wrong type.

To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.

$answer = 42;

$correct = false;

$correct = (bool) $answer;
Loading history...
324 1
        return $this->totalDays;
325
    }
326
327
    /**
328
     * Returns length of the page.
329
     * @return int
330
     */
331 1
    public function getLength()
332
    {
333 1
        if ($this->hasDateRange()) {
334 1
            return $this->lastEdit->getLength();
335
        }
336
337
        return $this->page->getLength();
338
    }
339
340
    /**
341
     * Get the average number of days between edits to the page.
342
     * @return double
343
     */
344 1
    public function averageDaysPerEdit()
345
    {
346 1
        return round($this->getTotalDays() / $this->getNumRevisionsProcessed(), 1);
347
    }
348
349
    /**
350
     * Get the average number of edits per day to the page.
351
     * @return double
352
     */
353 1
    public function editsPerDay()
354
    {
355 1
        $editsPerDay = $this->getTotalDays()
356 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / (365 / 12 / 24))
357 1
            : 0;
358 1
        return round($editsPerDay, 1);
359
    }
360
361
    /**
362
     * Get the average number of edits per month to the page.
363
     * @return double
364
     */
365 1
    public function editsPerMonth()
366
    {
367 1
        $editsPerMonth = $this->getTotalDays()
368 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / (365 / 12))
369 1
            : 0;
370 1
        return min($this->getNumRevisionsProcessed(), round($editsPerMonth, 1));
371
    }
372
373
    /**
374
     * Get the average number of edits per year to the page.
375
     * @return double
376
     */
377 1
    public function editsPerYear()
378
    {
379 1
        $editsPerYear = $this->getTotalDays()
380 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / 365)
381 1
            : 0;
382 1
        return min($this->getNumRevisionsProcessed(), round($editsPerYear, 1));
383
    }
384
385
    /**
386
     * Get the average number of edits per editor.
387
     * @return double
388
     */
389 1
    public function editsPerEditor()
390
    {
391 1
        return round($this->getNumRevisionsProcessed() / count($this->editors), 1);
392
    }
393
394
    /**
395
     * Get the percentage of minor edits to the page.
396
     * @return double
397
     */
398 1
    public function minorPercentage()
399
    {
400 1
        return round(
401 1
            ($this->minorCount / $this->getNumRevisionsProcessed()) * 100,
402 1
            1
403
        );
404
    }
405
406
    /**
407
     * Get the percentage of anonymous edits to the page.
408
     * @return double
409
     */
410 1
    public function anonPercentage()
411
    {
412 1
        return round(
413 1
            ($this->anonCount / $this->getNumRevisionsProcessed()) * 100,
414 1
            1
415
        );
416
    }
417
418
    /**
419
     * Get the percentage of edits made by the top 10 editors.
420
     * @return double
421
     */
422 1
    public function topTenPercentage()
423
    {
424 1
        return round(($this->topTenCount / $this->getNumRevisionsProcessed()) * 100, 1);
425
    }
426
427
    /**
428
     * Get the number of times the page has been viewed in the given timeframe. If the ArticleInfo instance has a
429
     * date range, it is used instead of the value of the $latest parameter.
430
     * @param  int $latest Last N days.
431
     * @return int
432
     */
433
    public function getPageviews($latest)
434
    {
435
        if (!$this->hasDateRange()) {
436
            return $this->page->getLastPageviews($latest);
437
        }
438
439
        $daterange = $this->getDateParams();
440
        return $this->page->getPageviews($daterange['start'], $daterange['end']);
441
    }
442
443
    /**
444
     * Get the page assessments of the page.
445
     * @see https://www.mediawiki.org/wiki/Extension:PageAssessments
446
     * @return string[]|false False if unsupported.
447
     * @codeCoverageIgnore
448
     */
449
    public function getAssessments()
450
    {
451
        if (!is_array($this->assessments)) {
0 ignored issues
show
introduced by
The condition is_array($this->assessments) is always true.
Loading history...
452
            $this->assessments = $this->page
453
                ->getProject()
454
                ->getPageAssessments()
455
                ->getAssessments($this->page);
456
        }
457
        return $this->assessments;
458
    }
459
460
    /**
461
     * Get the number of automated edits made to the page.
462
     * @return int
463
     */
464 1
    public function getAutomatedCount()
465
    {
466 1
        return $this->automatedCount;
467
    }
468
469
    /**
470
     * Get the number of edits to the page that were reverted with the subsequent edit.
471
     * @return int
472
     */
473 1
    public function getRevertCount()
474
    {
475 1
        return $this->revertCount;
476
    }
477
478
    /**
479
     * Get the number of edits to the page made by logged out users.
480
     * @return int
481
     */
482 1
    public function getAnonCount()
483
    {
484 1
        return $this->anonCount;
485
    }
486
487
    /**
488
     * Get the number of minor edits to the page.
489
     * @return int
490
     */
491 1
    public function getMinorCount()
492
    {
493 1
        return $this->minorCount;
494
    }
495
496
    /**
497
     * Get the number of edits to the page made in the past day, week, month and year.
498
     * @return int[] With keys 'day', 'week', 'month' and 'year'.
499
     */
500
    public function getCountHistory()
501
    {
502
        return $this->countHistory;
503
    }
504
505
    /**
506
     * Get the number of edits to the page made by the top 10 editors.
507
     * @return int
508
     */
509 1
    public function getTopTenCount()
510
    {
511 1
        return $this->topTenCount;
512
    }
513
514
    /**
515
     * Get the top editors to the page by edit count.
516
     * @param int $limit Default 20, maximum 1,000.
517
     * @param bool $noBots Set to non-false to exclude bots from the result.
518
     * @return array
519
     */
520
    public function getTopEditorsByEditCount($limit = 20, $noBots = false)
521
    {
522
        // Quick cache, valid only for the same request.
523
        static $topEditors = null;
524
        if ($topEditors !== null) {
525
            return $topEditors;
526
        }
527
528
        $rows = $this->getRepository()->getTopEditorsByEditCount(
529
            $this->page,
530
            $this->start,
531
            $this->end,
532
            min($limit, 1000),
533
            $noBots
534
        );
535
536
        $topEditors = [];
537
        $rank = 0;
538
        foreach ($rows as $row) {
539
            $topEditors[] = [
540
                'rank' => ++$rank,
541
                'username' => $row['username'],
542
                'count' => $row['count'],
543
                'minor' => $row['minor'],
544
                'first_edit' => [
545
                    'id' => $row['first_revid'],
546
                    'timestamp' => $row['first_timestamp'],
547
                ],
548
                'latest_edit' => [
549
                    'id' => $row['latest_revid'],
550
                    'timestamp' => $row['latest_timestamp'],
551
                ],
552
            ];
553
        }
554
555
        return $topEditors;
556
    }
557
558
    /**
559
     * Get the first edit to the page.
560
     * @return Edit
561
     */
562 1
    public function getFirstEdit()
563
    {
564 1
        return $this->firstEdit;
565
    }
566
567
    /**
568
     * Get the last edit to the page.
569
     * @return Edit
570
     */
571 1
    public function getLastEdit()
572
    {
573 1
        return $this->lastEdit;
574
    }
575
576
    /**
577
     * Get the edit that made the largest addition to the page (by number of bytes).
578
     * @return Edit
579
     */
580 1
    public function getMaxAddition()
581
    {
582 1
        return $this->maxAddition;
583
    }
584
585
    /**
586
     * Get the edit that made the largest removal to the page (by number of bytes).
587
     * @return Edit
588
     */
589 1
    public function getMaxDeletion()
590
    {
591 1
        return $this->maxDeletion;
592
    }
593
594
    /**
595
     * Get the list of editors to the page, including various statistics.
596
     * @return mixed[]
597
     */
598 1
    public function getEditors()
599
    {
600 1
        return $this->editors;
601
    }
602
603
    /**
604
     * Get the list of the top editors to the page (by edits), including various statistics.
605
     * @return mixed[]
606
     */
607 1
    public function topTenEditorsByEdits()
608
    {
609 1
        return $this->topTenEditorsByEdits;
610
    }
611
612
    /**
613
     * Get the list of the top editors to the page (by added text), including various statistics.
614
     * @return mixed[]
615
     */
616 1
    public function topTenEditorsByAdded()
617
    {
618 1
        return $this->topTenEditorsByAdded;
619
    }
620
621
    /**
622
     * Get various counts about each individual year and month of the page's history.
623
     * @return mixed[]
624
     */
625 2
    public function getYearMonthCounts()
626
    {
627 2
        return $this->yearMonthCounts;
628
    }
629
630
    /**
631
     * Get the localized labels for the 'Year counts' chart.
632
     * @return string[]
633
     */
634
    public function getYearLabels()
635
    {
636
        return $this->yearLabels;
637
    }
638
639
    /**
640
     * Get the localized labels for the 'Month counts' chart.
641
     * @return string[]
642
     */
643
    public function getMonthLabels()
644
    {
645
        return $this->monthLabels;
646
    }
647
648
    /**
649
     * Get the maximum number of edits that were created across all months. This is used as a
650
     * comparison for the bar charts in the months section.
651
     * @return int
652
     */
653 1
    public function getMaxEditsPerMonth()
654
    {
655 1
        return $this->maxEditsPerMonth;
656
    }
657
658
    /**
659
     * Get a list of (semi-)automated tools that were used to edit the page, including
660
     * the number of times they were used, and a link to the tool's homepage.
661
     * @return string[]
662
     */
663 1
    public function getTools()
664
    {
665 1
        return $this->tools;
666
    }
667
668
    /**
669
     * Get the list of page's wikidata and Checkwiki errors.
670
     * @see Page::getErrors()
671
     * @return string[]
672
     */
673
    public function getBugs()
674
    {
675
        if (!is_array($this->bugs)) {
0 ignored issues
show
introduced by
The condition is_array($this->bugs) is always true.
Loading history...
676
            $this->bugs = $this->page->getErrors();
677
        }
678
        return $this->bugs;
679
    }
680
681
    /**
682
     * Get the number of wikidata nad CheckWiki errors.
683
     * @return int
684
     */
685
    public function numBugs()
686
    {
687
        return count($this->getBugs());
688
    }
689
690
    /**
691
     * Get the number of external links on the page.
692
     * @return int
693
     */
694 1
    public function linksExtCount()
695
    {
696 1
        return $this->getLinksAndRedirects()['links_ext_count'];
697
    }
698
699
    /**
700
     * Get the number of incoming links to the page.
701
     * @return int
702
     */
703 1
    public function linksInCount()
704
    {
705 1
        return $this->getLinksAndRedirects()['links_in_count'];
706
    }
707
708
    /**
709
     * Get the number of outgoing links from the page.
710
     * @return int
711
     */
712 1
    public function linksOutCount()
713
    {
714 1
        return $this->getLinksAndRedirects()['links_out_count'];
715
    }
716
717
    /**
718
     * Get the number of redirects to the page.
719
     * @return int
720
     */
721 1
    public function redirectsCount()
722
    {
723 1
        return $this->getLinksAndRedirects()['redirects_count'];
724
    }
725
726
    /**
727
     * Get the number of external, incoming and outgoing links, along with the number of redirects to the page.
728
     * @return int[]
729
     * @codeCoverageIgnore
730
     */
731
    private function getLinksAndRedirects()
732
    {
733
        if (!is_array($this->linksAndRedirects)) {
0 ignored issues
show
introduced by
The condition is_array($this->linksAndRedirects) is always true.
Loading history...
734
            $this->linksAndRedirects = $this->page->countLinksAndRedirects();
735
        }
736
        return $this->linksAndRedirects;
737
    }
738
739
    /**
740
     * Parse the revision history, collecting our core statistics.
741
     *
742
     * Untestable because it relies on getting a PDO statement. All the important
743
     * logic lives in other methods which are tested.
744
     * @codeCoverageIgnore
745
     */
746
    private function parseHistory()
747
    {
748
        if ($this->tooManyRevisions()) {
749
            $limit = $this->getMaxRevisions();
750
        } else {
751
            $limit = null;
752
        }
753
754
        // Third parameter is ignored if $limit is null.
755
        $revStmt = $this->page->getRevisionsStmt(
756
            null,
757
            $limit,
758
            $this->getNumRevisions(),
759
            $this->start,
0 ignored issues
show
Bug introduced by
It seems like $this->start can also be of type string; however, parameter $start of Xtools\Page::getRevisionsStmt() does only seem to accept integer|false, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

759
            /** @scrutinizer ignore-type */ $this->start,
Loading history...
760
            $this->end
0 ignored issues
show
Bug introduced by
It seems like $this->end can also be of type string; however, parameter $end of Xtools\Page::getRevisionsStmt() does only seem to accept integer|false, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

760
            /** @scrutinizer ignore-type */ $this->end
Loading history...
761
        );
762
        $revCount = 0;
763
764
        /**
765
         * Data about previous edits so that we can use them as a basis for comparison.
766
         * @var Edit[]
767
         */
768
        $prevEdits = [
769
            // The previous Edit, used to discount content that was reverted.
770
            'prev' => null,
771
772
            // The SHA-1 of the edit *before* the previous edit. Used for more
773
            // accurate revert detection.
774
            'prevSha' => null,
775
776
            // The last edit deemed to be the max addition of content. This is kept track of
777
            // in case we find out the next edit was reverted (and was also a max edit),
778
            // in which case we'll want to discount it and use this one instead.
779
            'maxAddition' => null,
780
781
            // Same as with maxAddition, except the maximum amount of content deleted.
782
            // This is used to discount content that was reverted.
783
            'maxDeletion' => null,
784
        ];
785
786
        while ($rev = $revStmt->fetch()) {
787
            $edit = new Edit($this->page, $rev);
788
789
            if ($revCount === 0) {
790
                $this->firstEdit = $edit;
791
            }
792
793
            // Sometimes, with old revisions (2001 era), the revisions from 2002 come before 2001
794
            if ($edit->getTimestamp() < $this->firstEdit->getTimestamp()) {
795
                $this->firstEdit = $edit;
796
            }
797
798
            $prevEdits = $this->updateCounts($edit, $prevEdits);
799
800
            $revCount++;
801
        }
802
803
        $this->numRevisionsProcessed = $revCount;
804
805
        // Various sorts
806
        arsort($this->editors);
807
        ksort($this->yearMonthCounts);
808
        if ($this->tools) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->tools of type string[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
809
            arsort($this->tools);
810
        }
811
    }
812
813
    /**
814
     * Update various counts based on the current edit.
815
     * @param Edit $edit
816
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'
817
     * @return Edit[] Updated version of $prevEdits.
818
     */
819 4
    private function updateCounts(Edit $edit, $prevEdits)
820
    {
821
        // Update the counts for the year and month of the current edit.
822 4
        $this->updateYearMonthCounts($edit);
823
824
        // Update counts for the user who made the edit.
825 4
        $this->updateUserCounts($edit);
826
827
        // Update the year/month/user counts of anon and minor edits.
828 4
        $this->updateAnonMinorCounts($edit);
829
830
        // Update counts for automated tool usage, if applicable.
831 4
        $this->updateToolCounts($edit);
832
833
        // Increment "edits per <time>" counts
834 4
        $this->updateCountHistory($edit);
835
836
        // Update figures regarding content addition/removal, and the revert count.
837 4
        $prevEdits = $this->updateContentSizes($edit, $prevEdits);
838
839
        // Now that we've updated all the counts, we can reset
840
        // the prev and last edits, which are used for tracking.
841
        // But first, let's copy over the SHA of the actual previous edit
842
        // and put it in our $prevEdits['prev'], so that we'll know
843
        // that content added after $prevEdit['prev'] was reverted.
844 4
        if ($prevEdits['prev'] !== null) {
845 4
            $prevEdits['prevSha'] = $prevEdits['prev']->getSha();
846
        }
847 4
        $prevEdits['prev'] = $edit;
848 4
        $this->lastEdit = $edit;
849
850 4
        return $prevEdits;
851
    }
852
853
    /**
854
     * Update various figures about content sizes based on the given edit.
855
     * @param Edit $edit
856
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
857
     * @return Edit[] Updated version of $prevEdits.
858
     */
859 4
    private function updateContentSizes(Edit $edit, array $prevEdits)
860
    {
861
        // Check if it was a revert
862 4
        if ($this->isRevert($edit, $prevEdits)) {
863 4
            return $this->updateContentSizesRevert($prevEdits);
864
        } else {
865 4
            return $this->updateContentSizesNonRevert($edit, $prevEdits);
866
        }
867
    }
868
869
    /**
870
     * Is the given Edit a revert?
871
     * @param Edit $edit
872
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
873
     * @return bool
874
     */
875 4
    private function isRevert(Edit $edit, array $prevEdits)
876
    {
877 4
        return $edit->getSha() === $prevEdits['prevSha'] || $edit->isRevert($this->container);
878
    }
879
880
    /**
881
     * Updates the figures on content sizes assuming the given edit was a revert of the previous one.
882
     * In such a case, we don't want to treat the previous edit as legit content addition or removal.
883
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
884
     * @return Edit[] Updated version of $prevEdits, for tracking.
885
     */
886 4
    private function updateContentSizesRevert(array $prevEdits)
887
    {
888 4
        $this->revertCount++;
889
890
        // Adjust addedBytes given this edit was a revert of the previous one.
891 4
        if ($prevEdits['prev'] && $prevEdits['prev']->getSize() > 0) {
892
            $this->addedBytes -= $prevEdits['prev']->getSize();
893
894
            // Also deduct from the user's individual added byte count.
895
            $username = $prevEdits['prev']->getUser()->getUsername();
896
            $this->editors[$username]['added'] -= $prevEdits['prev']->getSize();
897
        }
898
899
        // @TODO: Test this against an edit war (use your sandbox).
900
        // Also remove as max added or deleted, if applicable.
901 4
        if ($this->maxAddition && $prevEdits['prev']->getId() === $this->maxAddition->getId()) {
902
            $this->maxAddition = $prevEdits['maxAddition'];
903
            $prevEdits['maxAddition'] = $prevEdits['prev']; // In the event of edit wars.
904 4
        } elseif ($this->maxDeletion && $prevEdits['prev']->getId() === $this->maxDeletion->getId()) {
905 4
            $this->maxDeletion = $prevEdits['maxDeletion'];
906 4
            $prevEdits['maxDeletion'] = $prevEdits['prev']; // In the event of edit wars.
907
        }
908
909 4
        return $prevEdits;
910
    }
911
912
    /**
913
     * Updates the figures on content sizes assuming the given edit was NOT a revert of the previous edit.
914
     * @param Edit $edit
915
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
916
     * @return Edit[] Updated version of $prevEdits, for tracking.
917
     */
918 4
    private function updateContentSizesNonRevert(Edit $edit, $prevEdits)
919
    {
920 4
        $editSize = $this->getEditSize($edit, $prevEdits);
921
922
        // Edit was not a revert, so treat size > 0 as content added.
923 4
        if ($editSize > 0) {
924 4
            $this->addedBytes += $editSize;
925 4
            $this->editors[$edit->getUser()->getUsername()]['added'] += $editSize;
926
927
            // Keep track of edit with max addition.
928 4
            if (!$this->maxAddition || $editSize > $this->maxAddition->getSize()) {
929
                // Keep track of old maxAddition in case we find out the next $edit was reverted
930
                // (and was also a max edit), in which case we'll want to use this one ($edit).
931 4
                $prevEdits['maxAddition'] = $this->maxAddition;
932
933 4
                $this->maxAddition = $edit;
934
            }
935 4
        } elseif ($editSize < 0 && (!$this->maxDeletion || $editSize < $this->maxDeletion->getSize())) {
936
            // Keep track of old maxDeletion in case we find out the next edit was reverted
937
            // (and was also a max deletion), in which case we'll want to use this one.
938 4
            $prevEdits['maxDeletion'] = $this->maxDeletion;
939
940 4
            $this->maxDeletion = $edit;
941
        }
942
943 4
        return $prevEdits;
944
    }
945
946
    /**
947
     * Get the size of the given edit, based on the previous edit (if present).
948
     * We also don't return the actual edit size if last revision had a length of null.
949
     * This happens when the edit follows other edits that were revision-deleted.
950
     * @see T148857 for more information.
951
     * @todo Remove once T101631 is resolved.
952
     * @param Edit $edit
953
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
954
     * @return int
955
     */
956 4
    private function getEditSize(Edit $edit, $prevEdits)
957
    {
958 4
        if ($prevEdits['prev'] && $prevEdits['prev']->getLength() === null) {
0 ignored issues
show
introduced by
The condition $prevEdits['prev']->getLength() === null is always false.
Loading history...
959
            return 0;
960
        } else {
961 4
            return $edit->getSize();
962
        }
963
    }
964
965
    /**
966
     * Update counts of automated tool usage for the given edit.
967
     * @param Edit $edit
968
     */
969 4
    private function updateToolCounts(Edit $edit)
970
    {
971 4
        $automatedTool = $edit->getTool($this->container);
972
973 4
        if ($automatedTool === false) {
974
            // Nothing to do.
975 4
            return;
976
        }
977
978 4
        $editYear = $edit->getYear();
979 4
        $editMonth = $edit->getMonth();
980
981 4
        $this->automatedCount++;
982 4
        $this->yearMonthCounts[$editYear]['automated']++;
983 4
        $this->yearMonthCounts[$editYear]['months'][$editMonth]['automated']++;
984
985 4
        if (!isset($this->tools[$automatedTool['name']])) {
986 4
            $this->tools[$automatedTool['name']] = [
987 4
                'count' => 1,
988 4
                'link' => $automatedTool['link'],
989
            ];
990
        } else {
991
            $this->tools[$automatedTool['name']]['count']++;
992
        }
993 4
    }
994
995
    /**
996
     * Update various counts for the year and month of the given edit.
997
     * @param Edit $edit
998
     */
999 4
    private function updateYearMonthCounts(Edit $edit)
1000
    {
1001 4
        $editYear = $edit->getYear();
1002 4
        $editMonth = $edit->getMonth();
1003
1004
        // Fill in the blank arrays for the year and 12 months if needed.
1005 4
        if (!isset($this->yearMonthCounts[$editYear])) {
1006 4
            $this->addYearMonthCountEntry($edit);
1007
        }
1008
1009
        // Increment year and month counts for all edits
1010 4
        $this->yearMonthCounts[$editYear]['all']++;
1011 4
        $this->yearMonthCounts[$editYear]['months'][$editMonth]['all']++;
1012
        // This will ultimately be the size of the page by the end of the year
1013 4
        $this->yearMonthCounts[$editYear]['size'] = (int) $edit->getLength();
1014
1015
        // Keep track of which month had the most edits
1016 4
        $editsThisMonth = $this->yearMonthCounts[$editYear]['months'][$editMonth]['all'];
1017 4
        if ($editsThisMonth > $this->maxEditsPerMonth) {
1018 4
            $this->maxEditsPerMonth = $editsThisMonth;
1019
        }
1020 4
    }
1021
1022
    /**
1023
     * Add a new entry to $this->yearMonthCounts for the given year,
1024
     * with blank values for each month. This called during self::parseHistory().
1025
     * @param Edit $edit
1026
     */
1027 4
    private function addYearMonthCountEntry(Edit $edit)
1028
    {
1029 4
        $this->yearLabels[] = $this->i18n->dateFormat($edit->getTimestamp(), 'yyyy');
1030 4
        $editYear = $edit->getYear();
1031
1032
        // Beginning of the month at 00:00:00.
1033 4
        $firstEditTime = mktime(0, 0, 0, (int) $this->firstEdit->getMonth(), 1, $this->firstEdit->getYear());
0 ignored issues
show
Bug introduced by
$this->firstEdit->getYear() of type string is incompatible with the type integer expected by parameter $year of mktime(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1033
        $firstEditTime = mktime(0, 0, 0, (int) $this->firstEdit->getMonth(), 1, /** @scrutinizer ignore-type */ $this->firstEdit->getYear());
Loading history...
1034
1035 4
        $this->yearMonthCounts[$editYear] = [
1036
            'all' => 0,
1037
            'minor' => 0,
1038
            'anon' => 0,
1039
            'automated' => 0,
1040
            'size' => 0, // Keep track of the size by the end of the year.
1041
            'events' => [],
1042
            'months' => [],
1043
        ];
1044
1045 4
        for ($i = 1; $i <= 12; $i++) {
1046 4
            $timeObj = mktime(0, 0, 0, $i, 1, $editYear);
1047
1048
            // Don't show zeros for months before the first edit or after the current month.
1049 4
            if ($timeObj < $firstEditTime || $timeObj > $this->getLastDay()) {
1050 4
                continue;
1051
            }
1052
1053 4
            $this->monthLabels[] = $this->i18n->dateFormat($timeObj, 'yyyy-MM');
1054 4
            $this->yearMonthCounts[$editYear]['months'][sprintf('%02d', $i)] = [
1055
                'all' => 0,
1056
                'minor' => 0,
1057
                'anon' => 0,
1058
                'automated' => 0,
1059
            ];
1060
        }
1061 4
    }
1062
1063
    /**
1064
     * Update the counts of anon and minor edits for year, month, and user of the given edit.
1065
     * @param Edit $edit
1066
     */
1067 4
    private function updateAnonMinorCounts(Edit $edit)
1068
    {
1069 4
        $editYear = $edit->getYear();
1070 4
        $editMonth = $edit->getMonth();
1071
1072
        // If anonymous, increase counts
1073 4
        if ($edit->isAnon()) {
1074 4
            $this->anonCount++;
1075 4
            $this->yearMonthCounts[$editYear]['anon']++;
1076 4
            $this->yearMonthCounts[$editYear]['months'][$editMonth]['anon']++;
1077
        }
1078
1079
        // If minor edit, increase counts
1080 4
        if ($edit->isMinor()) {
1081 4
            $this->minorCount++;
1082 4
            $this->yearMonthCounts[$editYear]['minor']++;
1083 4
            $this->yearMonthCounts[$editYear]['months'][$editMonth]['minor']++;
1084
        }
1085 4
    }
1086
1087
    /**
1088
     * Update various counts for the user of the given edit.
1089
     * @param Edit $edit
1090
     */
1091 4
    private function updateUserCounts(Edit $edit)
1092
    {
1093 4
        $username = $edit->getUser()->getUsername();
1094
1095
        // Initialize various user stats if needed.
1096 4
        if (!isset($this->editors[$username])) {
1097 4
            $this->editors[$username] = [
1098 4
                'all' => 0,
1099 4
                'minor' => 0,
1100 4
                'minorPercentage' => 0,
1101 4
                'first' => $edit->getTimestamp(),
1102 4
                'firstId' => $edit->getId(),
1103
                'last' => null,
1104
                'atbe' => null,
1105 4
                'added' => 0,
1106
            ];
1107
        }
1108
1109
        // Increment user counts
1110 4
        $this->editors[$username]['all']++;
1111 4
        $this->editors[$username]['last'] = $edit->getTimestamp();
1112 4
        $this->editors[$username]['lastId'] = $edit->getId();
1113
1114
        // Increment minor counts for this user
1115 4
        if ($edit->isMinor()) {
1116 4
            $this->editors[$username]['minor']++;
1117
        }
1118 4
    }
1119
1120
    /**
1121
     * Increment "edits per <time>" counts based on the given edit.
1122
     * @param Edit $edit
1123
     */
1124 4
    private function updateCountHistory(Edit $edit)
1125
    {
1126 4
        $editTimestamp = $edit->getTimestamp();
1127
1128 4
        if ($editTimestamp > new DateTime('-1 day')) {
1129
            $this->countHistory['day']++;
1130
        }
1131 4
        if ($editTimestamp > new DateTime('-1 week')) {
1132
            $this->countHistory['week']++;
1133
        }
1134 4
        if ($editTimestamp > new DateTime('-1 month')) {
1135
            $this->countHistory['month']++;
1136
        }
1137 4
        if ($editTimestamp > new DateTime('-1 year')) {
1138
            $this->countHistory['year']++;
1139
        }
1140 4
    }
1141
1142
    /**
1143
     * Get info about bots that edited the page.
1144
     * @return mixed[] Contains the bot's username, edit count to the page, and whether or not they are currently a bot.
1145
     */
1146 1
    public function getBots()
1147
    {
1148 1
        return $this->bots;
1149
    }
1150
1151
    /**
1152
     * Set info about bots that edited the page. This is done as a private setter because we need this information
1153
     * when computing the top 10 editors, where we don't want to include bots.
1154
     */
1155
    private function setBots()
1156
    {
1157
        // Parse the bot edits.
1158
        $bots = [];
1159
        $botData = $this->getRepository()->getBotData($this->page, $this->start, $this->end);
1160
        while ($bot = $botData->fetch()) {
1161
            $bots[$bot['username']] = [
1162
                'count' => (int)$bot['count'],
1163
                'current' => $bot['current'] === 'bot',
1164
            ];
1165
        }
1166
1167
        // Sort by edit count.
1168
        uasort($bots, function ($a, $b) {
1169
            return $b['count'] - $a['count'];
1170
        });
1171
1172
        $this->bots = $bots;
1173
    }
1174
1175
    /**
1176
     * Number of edits made to the page by current or former bots.
1177
     * @param string[] $bots Used only in unit tests, where we supply mock data for the bots that will get processed.
1178
     * @return int
1179
     */
1180 2
    public function getBotRevisionCount($bots = null)
1181
    {
1182 2
        if (isset($this->botRevisionCount)) {
1183
            return $this->botRevisionCount;
1184
        }
1185
1186 2
        if ($bots === null) {
1187 1
            $bots = $this->getBots();
1188
        }
1189
1190 2
        $count = 0;
1191
1192 2
        foreach ($bots as $username => $data) {
1193 2
            $count += $data['count'];
1194
        }
1195
1196 2
        $this->botRevisionCount = $count;
1197 2
        return $count;
1198
    }
1199
1200
    /**
1201
     * Query for log events during each year of the article's history, and set the results in $this->yearMonthCounts.
1202
     */
1203 1
    private function setLogsEvents()
1204
    {
1205 1
        $logData = $this->getRepository()->getLogEvents(
1206 1
            $this->page,
1207 1
            $this->start,
1208 1
            $this->end
1209
        );
1210
1211 1
        foreach ($logData as $event) {
1212 1
            $time = strtotime($event['timestamp']);
1213 1
            $year = date('Y', $time);
1214
1215 1
            if (!isset($this->yearMonthCounts[$year])) {
1216
                break;
1217
            }
1218
1219 1
            $yearEvents = $this->yearMonthCounts[$year]['events'];
1220
1221
            // Convert log type value to i18n key.
1222 1
            switch ($event['log_type']) {
1223
                case 'protect':
1224 1
                    $action = 'protections';
1225 1
                    break;
1226
                case 'delete':
1227 1
                    $action = 'deletions';
1228 1
                    break;
1229
                case 'move':
1230
                    $action = 'moves';
1231
                    break;
1232
                // count pending-changes protections along with normal protections.
1233
                case 'stable':
1234
                    $action = 'protections';
1235
                    break;
1236
            }
1237
1238 1
            if (empty($yearEvents[$action])) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $action does not seem to be defined for all execution paths leading up to this point.
Loading history...
1239 1
                $yearEvents[$action] = 1;
1240
            } else {
1241
                $yearEvents[$action]++;
1242
            }
1243
1244 1
            $this->yearMonthCounts[$year]['events'] = $yearEvents;
1245
        }
1246 1
    }
1247
1248
    /**
1249
     * Set statistics about the top 10 editors by added text and number of edits.
1250
     * This is ran *after* parseHistory() since we need the grand totals first.
1251
     * Various stats are also set for each editor in $this->editors to be used in the charts.
1252
     */
1253 4
    private function doPostPrecessing()
1254
    {
1255 4
        $topTenCount = $counter = 0;
1256 4
        $topTenEditorsByEdits = [];
1257
1258 4
        foreach ($this->editors as $editor => $info) {
1259
            // Count how many users are in the top 10% by number of edits, excluding bots.
1260 4
            if ($counter < 10 && !in_array($editor, array_keys($this->bots))) {
1261 4
                $topTenCount += $info['all'];
1262 4
                $counter++;
1263
1264
                // To be used in the Top Ten charts.
1265 4
                $topTenEditorsByEdits[] = [
1266 4
                    'label' => $editor,
1267 4
                    'value' => $info['all'],
1268
                ];
1269
            }
1270
1271
            // Compute the percentage of minor edits the user made.
1272 4
            $this->editors[$editor]['minorPercentage'] = $info['all']
1273 4
                ? ($info['minor'] / $info['all']) * 100
1274
                : 0;
1275
1276 4
            if ($info['all'] > 1) {
1277
                // Number of seconds/days between first and last edit.
1278 4
                $secs = $info['last']->getTimestamp() - $info['first']->getTimestamp();
1279 4
                $days = $secs / (60 * 60 * 24);
1280
1281
                // Average time between edits (in days).
1282 4
                $this->editors[$editor]['atbe'] = $days / $info['all'];
1283
            }
1284
        }
1285
1286
        // Loop through again and add percentages.
1287 4
        $this->topTenEditorsByEdits = array_map(function ($editor) use ($topTenCount) {
1288 4
            $editor['percentage'] = 100 * ($editor['value'] / $topTenCount);
1289 4
            return $editor;
1290 4
        }, $topTenEditorsByEdits);
1291
1292 4
        $this->topTenEditorsByAdded = $this->getTopTenByAdded();
1293
1294 4
        $this->topTenCount = $topTenCount;
1295 4
    }
1296
1297
    /**
1298
     * Get the top ten editors by added text.
1299
     * @return array With keys 'label', 'value' and 'percentage', ready to be used by the pieChart Twig helper.
1300
     */
1301 4
    private function getTopTenByAdded()
1302
    {
1303
        // First sort editors array by the amount of text they added.
1304 4
        $topTenEditorsByAdded = $this->editors;
1305 4
        uasort($topTenEditorsByAdded, function ($a, $b) {
1306 4
            if ($a['added'] === $b['added']) {
1307 4
                return 0;
1308
            }
1309 4
            return $a['added'] > $b['added'] ? -1 : 1;
1310 4
        });
1311
1312
        // Slice to the top 10.
1313 4
        $topTenEditorsByAdded = array_keys(array_slice($topTenEditorsByAdded, 0, 10, true));
1314
1315
        // Get the sum of added text so that we can add in percentages.
1316 4
        $topTenTotalAdded = array_sum(array_map(function ($editor) {
1317 4
            return $this->editors[$editor]['added'];
1318 4
        }, $topTenEditorsByAdded));
1319
1320
        // Then build a new array of top 10 editors by added text in the data structure needed for the chart.
1321 4
        return array_map(function ($editor) use ($topTenTotalAdded) {
0 ignored issues
show
Unused Code introduced by
The import $topTenTotalAdded is not used and could be removed.

This check looks for imports that have been defined, but are not used in the scope.

Loading history...
1322 4
            $added = $this->editors[$editor]['added'];
1323
            return [
1324 4
                'label' => $editor,
1325 4
                'value' => $added,
1326
                'percentage' => (
1327 4
                    100 * ($added / $this->addedBytes)
1328
                ),
1329
            ];
1330 4
        }, $topTenEditorsByAdded);
1331
    }
1332
1333
    /**
1334
     * Get authorship attribution from the WikiWho API.
1335
     * @see https://f-squared.org/wikiwho/
1336
     * @param int $limit Max number of results.
1337
     * @return array
1338
     */
1339 1
    public function getTextshares($limit = null)
1340
    {
1341 1
        if (isset($this->textshares)) {
1342
            return $this->textshares;
1343
        }
1344
1345
        // TODO: check for failures. Should have a success:true
1346 1
        $ret = $this->getRepository()->getTextshares($this->page);
1347
1348
        // If revision can't be found, return error message.
1349 1
        if (!isset($ret['revisions'][0])) {
1350
            return [
1351
                'error' => isset($ret['Error']) ? $ret['Error'] : 'Unknown'
1352
            ];
1353
        }
1354
1355 1
        $revId = array_keys($ret['revisions'][0])[0];
1356 1
        $tokens = $ret['revisions'][0][$revId]['tokens'];
1357
1358 1
        list($counts, $totalCount, $userIds) = $this->countTokens($tokens);
1359 1
        $usernameMap = $this->getUsernameMap($userIds);
1360
1361 1
        if ($limit !== null) {
1362 1
            $countsToProcess = array_slice($counts, 0, $limit, true);
1363
        } else {
1364
            $countsToProcess = $counts;
1365
        }
1366
1367 1
        $textshares = [];
1368
1369
        // Loop through once more, creating an array with the user names (or IP address)
1370
        // as the key, and the count and percentage as the value.
1371 1
        foreach ($countsToProcess as $editor => $count) {
1372 1
            if (isset($usernameMap[$editor])) {
1373 1
                $index = $usernameMap[$editor];
1374
            } else {
1375 1
                $index = $editor;
1376
            }
1377 1
            $textshares[$index] = [
1378 1
                'count' => $count,
1379 1
                'percentage' => round(100 * ($count / $totalCount), 1)
1380
            ];
1381
        }
1382
1383 1
        $this->textshares = [
1384 1
            'list' => $textshares,
1385 1
            'totalAuthors' => count($counts),
1386 1
            'totalCount' => $totalCount,
1387
        ];
1388
1389 1
        return $this->textshares;
1390
    }
1391
1392
    /**
1393
     * Get a map of user IDs to usernames, given the IDs.
1394
     * @param int[] $userIds
1395
     * @return array IDs as keys, usernames as values.
1396
     */
1397 1
    private function getUsernameMap($userIds)
1398
    {
1399 1
        $userIdsNames = $this->getRepository()->getUsernamesFromIds(
1400 1
            $this->page->getProject(),
1401 1
            $userIds
1402
        );
1403
1404 1
        $usernameMap = [];
1405 1
        foreach ($userIdsNames as $userIdName) {
1406 1
            $usernameMap[$userIdName['user_id']] = $userIdName['user_name'];
1407
        }
1408
1409 1
        return $usernameMap;
1410
    }
1411
1412
    /**
1413
     * Get counts of token lengths for each author. Used in self::getTextshares()
1414
     * @param array $tokens
1415
     * @return array [counts by user, total count, IDs of accounts]
1416
     */
1417 1
    private function countTokens($tokens)
1418
    {
1419 1
        $counts = [];
1420 1
        $userIds = [];
1421 1
        $totalCount = 0;
1422
1423
        // Loop through the tokens, keeping totals (token length) for each author.
1424 1
        foreach ($tokens as $token) {
1425 1
            $editor = $token['editor'];
1426
1427
            // IPs are prefixed with '0|', otherwise it's the user ID.
1428 1
            if (substr($editor, 0, 2) === '0|') {
1429 1
                $editor = substr($editor, 2);
1430
            } else {
1431 1
                $userIds[] = $editor;
1432
            }
1433
1434 1
            if (!isset($counts[$editor])) {
1435 1
                $counts[$editor] = 0;
1436
            }
1437
1438 1
            $counts[$editor] += strlen($token['str']);
1439 1
            $totalCount += strlen($token['str']);
1440
        }
1441
1442
        // Sort authors by count.
1443 1
        arsort($counts);
1444
1445 1
        return [$counts, $totalCount, $userIds];
1446
    }
1447
1448
    /**
1449
     * Get a list of wikis supported by WikiWho.
1450
     * @return string[]
1451
     * @codeCoverageIgnore
1452
     */
1453
    public function getTextshareWikis()
1454
    {
1455
        return self::TEXTSHARE_WIKIS;
1456
    }
1457
1458
    /**
1459
     * Get prose and reference information.
1460
     * @return array With keys 'characters', 'words', 'references', 'unique_references'
1461
     */
1462 1
    public function getProseStats()
1463
    {
1464 1
        $datetime = $this->end !== false ? new DateTime('@'.$this->end) : null;
0 ignored issues
show
Bug introduced by
Are you sure $this->end of type integer|string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1464
        $datetime = $this->end !== false ? new DateTime('@'./** @scrutinizer ignore-type */ $this->end) : null;
Loading history...
1465 1
        $html = $this->page->getHTMLContent($datetime);
1466
1467 1
        $crawler = new Crawler($html);
1468
1469 1
        list($chars, $words) = $this->countCharsAndWords($crawler, '#mw-content-text p');
1470
1471 1
        $refs = $crawler->filter('#mw-content-text .reference');
1472 1
        $refContent = [];
1473 1
        $refs->each(function ($ref) use (&$refContent) {
1474 1
            $refContent[] = $ref->text();
1475 1
        });
1476 1
        $uniqueRefs = count(array_unique($refContent));
1477
1478 1
        $sections = count($crawler->filter('#mw-content-text .mw-headline'));
1479
1480
        return [
1481 1
            'characters' => $chars,
1482 1
            'words' => $words,
1483 1
            'references' => $refs->count(),
1484 1
            'unique_references' => $uniqueRefs,
1485 1
            'sections' => $sections,
1486
        ];
1487
    }
1488
1489
    /**
1490
     * Count the number of characters and words of the plain text within the DOM element matched by the given selector.
1491
     * @param Crawler $crawler
1492
     * @param string $selector HTML selector.
1493
     * @return array [num chars, num words]
1494
     */
1495 1
    private function countCharsAndWords($crawler, $selector)
1496
    {
1497 1
        $totalChars = 0;
1498 1
        $totalWords = 0;
1499 1
        $paragraphs = $crawler->filter($selector);
1500 1
        $paragraphs->each(function ($node) use (&$totalChars, &$totalWords) {
1501 1
            $text = preg_replace('/\[\d+\]/', '', trim($node->text()));
1502 1
            $totalChars += strlen($text);
1503 1
            $totalWords += count(explode(' ', $text));
1504 1
        });
1505
1506 1
        return [$totalChars, $totalWords];
1507
    }
1508
1509
    /**
1510
     * Fetch transclusion data (categories, templates and files) that are on the page.
1511
     * @return array With keys 'categories', 'templates' and 'files'.
1512
     */
1513 1
    private function getTransclusionData()
1514
    {
1515 1
        if (!is_array($this->transclusionData)) {
0 ignored issues
show
introduced by
The condition is_array($this->transclusionData) is always true.
Loading history...
1516 1
            $this->transclusionData = $this->getRepository()
1517 1
                ->getTransclusionData($this->page);
1518
        }
1519 1
        return $this->transclusionData;
1520
    }
1521
1522
    /**
1523
     * Get the number of categories that are on the page.
1524
     * @return int
1525
     */
1526 1
    public function getNumCategories()
1527
    {
1528 1
        return $this->getTransclusionData()['categories'];
1529
    }
1530
1531
    /**
1532
     * Get the number of templates that are on the page.
1533
     * @return int
1534
     */
1535 1
    public function getNumTemplates()
1536
    {
1537 1
        return $this->getTransclusionData()['templates'];
1538
    }
1539
1540
    /**
1541
     * Get the number of files that are on the page.
1542
     * @return int
1543
     */
1544 1
    public function getNumFiles()
1545
    {
1546 1
        return $this->getTransclusionData()['files'];
1547
    }
1548
}
1549