Passed
Push — master ( 738dcd...d9f1a8 )
by MusikAnimal
04:23
created

ArticleInfo::getTopEditorsByEditCount()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 36
Code Lines 24

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 12

Importance

Changes 0
Metric Value
cc 3
eloc 24
nc 3
nop 2
dl 0
loc 36
ccs 0
cts 23
cp 0
crap 12
rs 9.536
c 0
b 0
f 0
1
<?php
2
/**
3
 * This file contains only the ArticleInfo class.
4
 */
5
6
namespace Xtools;
7
8
use AppBundle\Helper\I18nHelper;
9
use DateTime;
10
use Symfony\Component\DependencyInjection\Container;
11
use Symfony\Component\DomCrawler\Crawler;
12
13
/**
14
 * An ArticleInfo provides statistics about a page on a project. This model does not
15
 * have a separate Repository because it needs to use individual SQL statements to
16
 * traverse the page's history, saving class instance variables along the way.
17
 */
18
class ArticleInfo extends Model
19
{
20
    /** @const string[] Domain names of wikis supported by WikiWho. */
21
    const TEXTSHARE_WIKIS = [
22
        'en.wikipedia.org',
23
        'de.wikipedia.org',
24
        'eu.wikipedia.org',
25
        'tr.wikipedia.org',
26
        'es.wikipedia.org',
27
    ];
28
29
    /** @var Container The application's DI container. */
30
    protected $container;
31
32
    /** @var Page The page. */
33
    protected $page;
34
35
    /** @var I18nHelper For i18n and l10n. */
36
    protected $i18n;
37
38
    /** @var false|int From what date to obtain records. */
39
    protected $startDate;
40
41
    /** @var false|int To what date to obtain records. */
42
    protected $endDate;
43
44
    /** @var int Number of revisions that belong to the page. */
45
    protected $numRevisions;
46
47
    /** @var int Maximum number of revisions to process, as configured. */
48
    protected $maxRevisions;
49
50
    /** @var int Number of revisions that were actually processed. */
51
    protected $numRevisionsProcessed;
52
53
    /**
54
     * Various statistics about editors to the page. These are not User objects
55
     * so as to preserve memory.
56
     * @var mixed[]
57
     */
58
    protected $editors;
59
60
    /** @var mixed[] The top 10 editors to the page by number of edits. */
61
    protected $topTenEditorsByEdits;
62
63
    /** @var mixed[] The top 10 editors to the page by added text. */
64
    protected $topTenEditorsByAdded;
65
66
    /** @var int Number of edits made by the top 10 editors. */
67
    protected $topTenCount;
68
69
    /** @var mixed[] Various statistics about bots that edited the page. */
70
    protected $bots;
71
72
    /** @var int Number of edits made to the page by bots. */
73
    protected $botRevisionCount;
74
75
    /** @var mixed[] Various counts about each individual year and month of the page's history. */
76
    protected $yearMonthCounts;
77
78
    /** @var string[] Localized labels for the years, to be used in the 'Year counts' chart. */
79
    protected $yearLabels = [];
80
81
    /** @var string[] Localized labels for the months, to be used in the 'Month counts' chart. */
82
    protected $monthLabels = [];
83
84
    /** @var Edit The first edit to the page. */
85
    protected $firstEdit;
86
87
    /** @var Edit The last edit to the page. */
88
    protected $lastEdit;
89
90
    /** @var Edit Edit that made the largest addition by number of bytes. */
91
    protected $maxAddition;
92
93
    /** @var Edit Edit that made the largest deletion by number of bytes. */
94
    protected $maxDeletion;
95
96
    /** @var int[] Number of in and outgoing links and redirects to the page. */
97
    protected $linksAndRedirects;
98
99
    /** @var string[] Assessments of the page (see Page::getAssessments). */
100
    protected $assessments;
101
102
    /**
103
     * Maximum number of edits that were created across all months. This is used as a comparison
104
     * for the bar charts in the months section.
105
     * @var int
106
     */
107
    protected $maxEditsPerMonth;
108
109
    /** @var string[] List of (semi-)automated tools that were used to edit the page. */
110
    protected $tools;
111
112
    /**
113
     * Total number of bytes added throughout the page's history. This is used as a comparison
114
     * when computing the top 10 editors by added text.
115
     * @var int
116
     */
117
    protected $addedBytes = 0;
118
119
    /** @var int Number of days between first and last edit. */
120
    protected $totalDays;
121
122
    /** @var int Number of minor edits to the page. */
123
    protected $minorCount = 0;
124
125
    /** @var int Number of anonymous edits to the page. */
126
    protected $anonCount = 0;
127
128
    /** @var int Number of automated edits to the page. */
129
    protected $automatedCount = 0;
130
131
    /** @var int Number of edits to the page that were reverted with the subsequent edit. */
132
    protected $revertCount = 0;
133
134
    /** @var int[] The "edits per <time>" counts. */
135
    protected $countHistory = [
136
        'day' => 0,
137
        'week' => 0,
138
        'month' => 0,
139
        'year' => 0
140
    ];
141
142
    /** @var string[] List of wikidata and Checkwiki errors. */
143
    protected $bugs;
144
145
    /** @var array List of editors and the percentage of the current content that they authored. */
146
    protected $textshares;
147
148
    /** @var array Number of categories, templates and files on the page. */
149
    protected $transclusionData;
150
151
    /**
152
     * ArticleInfo constructor.
153
     * @param Page $page The page to process.
154
     * @param Container $container The DI container.
155
     * @param false|int $start From what date to obtain records.
156
     * @param false|int $end To what date to obtain records.
157
     */
158 13
    public function __construct(Page $page, Container $container, $start = false, $end = false)
159
    {
160 13
        $this->page = $page;
161 13
        $this->container = $container;
162 13
        $this->startDate = $start;
163 13
        $this->endDate = $end;
164 13
    }
165
166
    /**
167
     * Make the I18nHelper accessible to ArticleInfo.
168
     * @param I18nHelper $i18n
169
     * @codeCoverageIgnore
170
     */
171
    public function setI18nHelper(I18nHelper $i18n)
172
    {
173
        $this->i18n = $i18n;
174
    }
175
176
    /**
177
     * Get date opening date range.
178
     * @return false|int
179
     */
180 1
    public function getStartDate()
181
    {
182 1
        return $this->startDate;
183
    }
184
185
    /**
186
     * Get date closing date range.
187
     * @return false|int
188
     */
189 1
    public function getEndDate()
190
    {
191 1
        return $this->endDate;
192
    }
193
194
    /**
195
     * Get the day of last date we should show in the month/year sections,
196
     * based on $this->endDate or the current date.
197
     * @return int As Unix timestamp.
198
     */
199 4
    private function getLastDay()
200
    {
201 4
        if ($this->endDate !== false) {
202
            return (new DateTime('@'.$this->endDate))
0 ignored issues
show
Bug introduced by
Are you sure $this->endDate of type integer|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

202
            return (new DateTime('@'./** @scrutinizer ignore-type */ $this->endDate))
Loading history...
203
                ->modify('last day of this month')
204
                ->getTimestamp();
205
        } else {
206 4
            return strtotime('last day of this month');
207
        }
208
    }
209
210
    /**
211
     * Has date range?
212
     * @return bool
213
     */
214 1
    public function hasDateRange()
215
    {
216 1
        return $this->startDate !== false || $this->endDate !== false;
217
    }
218
219
    /**
220
     * Return the start/end date values as associative array,
221
     * with YYYY-MM-DD as the date format. This is used mainly as
222
     * a helper to pass to the pageviews Twig macros.
223
     * @return array
224
     */
225 1
    public function getDateParams()
226
    {
227 1
        if (!$this->hasDateRange()) {
228
            return [];
229
        }
230
231
        $ret = [
232 1
            'start' => $this->firstEdit->getTimestamp()->format('Y-m-d'),
233 1
            'end' => $this->lastEdit->getTimestamp()->format('Y-m-d'),
234
        ];
235
236 1
        if ($this->startDate !== false) {
237 1
            $ret['start'] = date('Y-m-d', $this->startDate);
0 ignored issues
show
Bug introduced by
It seems like $this->startDate can also be of type true; however, parameter $timestamp of date() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

237
            $ret['start'] = date('Y-m-d', /** @scrutinizer ignore-type */ $this->startDate);
Loading history...
238
        }
239 1
        if ($this->endDate !== false) {
240 1
            $ret['end'] = date('Y-m-d', $this->endDate);
241
        }
242
243 1
        return $ret;
244
    }
245
246
    /**
247
     * Shorthand to get the page's project.
248
     * @return Project
249
     * @codeCoverageIgnore
250
     */
251
    public function getProject()
252
    {
253
        return $this->page->getProject();
254
    }
255
256
    /**
257
     * Get the number of revisions belonging to the page.
258
     * @return int
259
     */
260 4
    public function getNumRevisions()
261
    {
262 4
        if (!isset($this->numRevisions)) {
263 4
            $this->numRevisions = $this->page->getNumRevisions(null, $this->startDate, $this->endDate);
264
        }
265 4
        return $this->numRevisions;
266
    }
267
268
    /**
269
     * Get the maximum number of revisions that we should process.
270
     * @return int
271
     */
272 3
    public function getMaxRevisions()
273
    {
274 3
        if (!isset($this->maxRevisions)) {
275 3
            $this->maxRevisions = (int) $this->container->getParameter('app.max_page_revisions');
276
        }
277 3
        return $this->maxRevisions;
278
    }
279
280
    /**
281
     * Get the number of revisions that are actually getting processed.
282
     * This goes by the app.max_page_revisions parameter, or the actual
283
     * number of revisions, whichever is smaller.
284
     * @return int
285
     */
286 6
    public function getNumRevisionsProcessed()
287
    {
288 6
        if (isset($this->numRevisionsProcessed)) {
289 4
            return $this->numRevisionsProcessed;
290
        }
291
292 2
        if ($this->tooManyRevisions()) {
293 1
            $this->numRevisionsProcessed = $this->getMaxRevisions();
294
        } else {
295 1
            $this->numRevisionsProcessed = $this->getNumRevisions();
296
        }
297
298 2
        return $this->numRevisionsProcessed;
299
    }
300
301
    /**
302
     * Are there more revisions than we should process, based on the config?
303
     * @return bool
304
     */
305 3
    public function tooManyRevisions()
306
    {
307 3
        return $this->getMaxRevisions() > 0 && $this->getNumRevisions() > $this->getMaxRevisions();
308
    }
309
310
    /**
311
     * Fetch and store all the data we need to show the ArticleInfo view.
312
     * @codeCoverageIgnore
313
     */
314
    public function prepareData()
315
    {
316
        $this->parseHistory();
317
        $this->setLogsEvents();
318
319
        // Bots need to be set before setting top 10 counts.
320
        $this->setBots();
321
322
        $this->setTopTenCounts();
323
    }
324
325
    /**
326
     * Get the number of editors that edited the page.
327
     * @return int
328
     */
329 1
    public function getNumEditors()
330
    {
331 1
        return count($this->editors);
332
    }
333
334
    /**
335
     * Get the number of bots that edited the page.
336
     * @return int
337
     */
338
    public function getNumBots()
339
    {
340
        return count($this->getBots());
341
    }
342
343
    /**
344
     * Get the number of days between the first and last edit.
345
     * @return int
346
     */
347 1
    public function getTotalDays()
348
    {
349 1
        if (isset($this->totalDays)) {
350 1
            return $this->totalDays;
351
        }
352 1
        $dateFirst = $this->firstEdit->getTimestamp();
353 1
        $dateLast = $this->lastEdit->getTimestamp();
354 1
        $interval = date_diff($dateLast, $dateFirst, true);
355 1
        $this->totalDays = $interval->format('%a');
0 ignored issues
show
Documentation Bug introduced by
The property $totalDays was declared of type integer, but $interval->format('%a') is of type string. Maybe add a type cast?

This check looks for assignments to scalar types that may be of the wrong type.

To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.

$answer = 42;

$correct = false;

$correct = (bool) $answer;
Loading history...
356 1
        return $this->totalDays;
357
    }
358
359
    /**
360
     * Returns length of the page.
361
     * @return int
362
     */
363 1
    public function getLength()
364
    {
365 1
        if ($this->hasDateRange()) {
366 1
            return $this->lastEdit->getLength();
367
        }
368
369
        return $this->page->getLength();
370
    }
371
372
    /**
373
     * Get the average number of days between edits to the page.
374
     * @return double
375
     */
376 1
    public function averageDaysPerEdit()
377
    {
378 1
        return round($this->getTotalDays() / $this->getNumRevisionsProcessed(), 1);
379
    }
380
381
    /**
382
     * Get the average number of edits per day to the page.
383
     * @return double
384
     */
385 1
    public function editsPerDay()
386
    {
387 1
        $editsPerDay = $this->getTotalDays()
388 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / (365 / 12 / 24))
389 1
            : 0;
390 1
        return round($editsPerDay, 1);
391
    }
392
393
    /**
394
     * Get the average number of edits per month to the page.
395
     * @return double
396
     */
397 1
    public function editsPerMonth()
398
    {
399 1
        $editsPerMonth = $this->getTotalDays()
400 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / (365 / 12))
401 1
            : 0;
402 1
        return min($this->getNumRevisionsProcessed(), round($editsPerMonth, 1));
403
    }
404
405
    /**
406
     * Get the average number of edits per year to the page.
407
     * @return double
408
     */
409 1
    public function editsPerYear()
410
    {
411 1
        $editsPerYear = $this->getTotalDays()
412 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / 365)
413 1
            : 0;
414 1
        return min($this->getNumRevisionsProcessed(), round($editsPerYear, 1));
415
    }
416
417
    /**
418
     * Get the average number of edits per editor.
419
     * @return double
420
     */
421 1
    public function editsPerEditor()
422
    {
423 1
        return round($this->getNumRevisionsProcessed() / count($this->editors), 1);
424
    }
425
426
    /**
427
     * Get the percentage of minor edits to the page.
428
     * @return double
429
     */
430 1
    public function minorPercentage()
431
    {
432 1
        return round(
433 1
            ($this->minorCount / $this->getNumRevisionsProcessed()) * 100,
434 1
            1
435
        );
436
    }
437
438
    /**
439
     * Get the percentage of anonymous edits to the page.
440
     * @return double
441
     */
442 1
    public function anonPercentage()
443
    {
444 1
        return round(
445 1
            ($this->anonCount / $this->getNumRevisionsProcessed()) * 100,
446 1
            1
447
        );
448
    }
449
450
    /**
451
     * Get the percentage of edits made by the top 10 editors.
452
     * @return double
453
     */
454 1
    public function topTenPercentage()
455
    {
456 1
        return round(($this->topTenCount / $this->getNumRevisionsProcessed()) * 100, 1);
457
    }
458
459
    /**
460
     * Get the number of times the page has been viewed in the given timeframe.
461
     * If the ArticleInfo instance has a date range, it is used instead of the
462
     * value of the $latest parameter.
463
     * @param  int $latest Last N days.
464
     * @return int
465
     */
466
    public function getPageviews($latest)
467
    {
468
        if (!$this->hasDateRange()) {
469
            return $this->page->getLastPageviews($latest);
470
        }
471
472
        $daterange = $this->getDateParams();
473
        return $this->page->getPageviews($daterange['start'], $daterange['end']);
474
    }
475
476
    /**
477
     * Get the page assessments of the page.
478
     * @see https://www.mediawiki.org/wiki/Extension:PageAssessments
479
     * @return string[]|false False if unsupported.
480
     * @codeCoverageIgnore
481
     */
482
    public function getAssessments()
483
    {
484
        if (!is_array($this->assessments)) {
0 ignored issues
show
introduced by
The condition is_array($this->assessments) is always true.
Loading history...
485
            $this->assessments = $this->page
486
                ->getProject()
487
                ->getPageAssessments()
488
                ->getAssessments($this->page);
489
        }
490
        return $this->assessments;
491
    }
492
493
    /**
494
     * Get the number of automated edits made to the page.
495
     * @return int
496
     */
497 1
    public function getAutomatedCount()
498
    {
499 1
        return $this->automatedCount;
500
    }
501
502
    /**
503
     * Get the number of edits to the page that were reverted with the subsequent edit.
504
     * @return int
505
     */
506 1
    public function getRevertCount()
507
    {
508 1
        return $this->revertCount;
509
    }
510
511
    /**
512
     * Get the number of edits to the page made by logged out users.
513
     * @return int
514
     */
515 1
    public function getAnonCount()
516
    {
517 1
        return $this->anonCount;
518
    }
519
520
    /**
521
     * Get the number of minor edits to the page.
522
     * @return int
523
     */
524 1
    public function getMinorCount()
525
    {
526 1
        return $this->minorCount;
527
    }
528
529
    /**
530
     * Get the number of edits to the page made in the past day, week, month and year.
531
     * @return int[] With keys 'day', 'week', 'month' and 'year'.
532
     */
533
    public function getCountHistory()
534
    {
535
        return $this->countHistory;
536
    }
537
538
    /**
539
     * Get the number of edits to the page made by the top 10 editors.
540
     * @return int
541
     */
542 1
    public function getTopTenCount()
543
    {
544 1
        return $this->topTenCount;
545
    }
546
547
    /**
548
     * Get the top editors to the page by edit count.
549
     * @param int $limit Maximum 1,000.
550
     * @param bool $noBots Set to non-false to exclude bots from the result.
551
     * @return array
552
     */
553
    public function getTopEditorsByEditCount($limit = 20, $noBots = false)
554
    {
555
        // Quick cache, valid only for the same request.
556
        static $topEditors = null;
557
        if ($topEditors !== null) {
558
            return $topEditors;
559
        }
560
561
        $rows = $this->getRepository()->getTopEditorsByEditCount(
562
            $this->page,
563
            $this->startDate,
564
            $this->endDate,
565
            max($limit, 1000),
566
            $noBots
567
        );
568
569
        $topEditors = [];
570
        $rank = 0;
571
        foreach ($rows as $row) {
572
            $topEditors[] = [
573
                'rank' => ++$rank,
574
                'username' => $row['username'],
575
                'count' => $row['count'],
576
                'minor' => $row['minor'],
577
                'first_edit' => [
578
                    'id' => $row['first_revid'],
579
                    'timestamp' => $row['first_timestamp'],
580
                ],
581
                'latest_edit' => [
582
                    'id' => $row['latest_revid'],
583
                    'timestamp' => $row['latest_timestamp'],
584
                ],
585
            ];
586
        }
587
588
        return $topEditors;
589
    }
590
591
    /**
592
     * Get the first edit to the page.
593
     * @return Edit
594
     */
595 1
    public function getFirstEdit()
596
    {
597 1
        return $this->firstEdit;
598
    }
599
600
    /**
601
     * Get the last edit to the page.
602
     * @return Edit
603
     */
604 1
    public function getLastEdit()
605
    {
606 1
        return $this->lastEdit;
607
    }
608
609
    /**
610
     * Get the edit that made the largest addition to the page (by number of bytes).
611
     * @return Edit
612
     */
613 1
    public function getMaxAddition()
614
    {
615 1
        return $this->maxAddition;
616
    }
617
618
    /**
619
     * Get the edit that made the largest removal to the page (by number of bytes).
620
     * @return Edit
621
     */
622 1
    public function getMaxDeletion()
623
    {
624 1
        return $this->maxDeletion;
625
    }
626
627
    /**
628
     * Get the list of editors to the page, including various statistics.
629
     * @return mixed[]
630
     */
631 1
    public function getEditors()
632
    {
633 1
        return $this->editors;
634
    }
635
636
    /**
637
     * Get the list of the top editors to the page (by edits), including various statistics.
638
     * @return mixed[]
639
     */
640 1
    public function topTenEditorsByEdits()
641
    {
642 1
        return $this->topTenEditorsByEdits;
643
    }
644
645
    /**
646
     * Get the list of the top editors to the page (by added text), including various statistics.
647
     * @return mixed[]
648
     */
649 1
    public function topTenEditorsByAdded()
650
    {
651 1
        return $this->topTenEditorsByAdded;
652
    }
653
654
    /**
655
     * Get various counts about each individual year and month of the page's history.
656
     * @return mixed[]
657
     */
658 2
    public function getYearMonthCounts()
659
    {
660 2
        return $this->yearMonthCounts;
661
    }
662
663
    /**
664
     * Get the localized labels for the 'Year counts' chart.
665
     * @return string[]
666
     */
667
    public function getYearLabels()
668
    {
669
        return $this->yearLabels;
670
    }
671
672
    /**
673
     * Get the localized labels for the 'Month counts' chart.
674
     * @return string[]
675
     */
676
    public function getMonthLabels()
677
    {
678
        return $this->monthLabels;
679
    }
680
681
    /**
682
     * Get the maximum number of edits that were created across all months. This is used as a
683
     * comparison for the bar charts in the months section.
684
     * @return int
685
     */
686 1
    public function getMaxEditsPerMonth()
687
    {
688 1
        return $this->maxEditsPerMonth;
689
    }
690
691
    /**
692
     * Get a list of (semi-)automated tools that were used to edit the page, including
693
     * the number of times they were used, and a link to the tool's homepage.
694
     * @return string[]
695
     */
696 1
    public function getTools()
697
    {
698 1
        return $this->tools;
699
    }
700
701
    /**
702
     * Get the list of page's wikidata and Checkwiki errors.
703
     * @see Page::getErrors()
704
     * @return string[]
705
     */
706
    public function getBugs()
707
    {
708
        if (!is_array($this->bugs)) {
0 ignored issues
show
introduced by
The condition is_array($this->bugs) is always true.
Loading history...
709
            $this->bugs = $this->page->getErrors();
710
        }
711
        return $this->bugs;
712
    }
713
714
    /**
715
     * Get the number of wikidata nad CheckWiki errors.
716
     * @return int
717
     */
718
    public function numBugs()
719
    {
720
        return count($this->getBugs());
721
    }
722
723
    /**
724
     * Get the number of external links on the page.
725
     * @return int
726
     */
727 1
    public function linksExtCount()
728
    {
729 1
        return $this->getLinksAndRedirects()['links_ext_count'];
730
    }
731
732
    /**
733
     * Get the number of incoming links to the page.
734
     * @return int
735
     */
736 1
    public function linksInCount()
737
    {
738 1
        return $this->getLinksAndRedirects()['links_in_count'];
739
    }
740
741
    /**
742
     * Get the number of outgoing links from the page.
743
     * @return int
744
     */
745 1
    public function linksOutCount()
746
    {
747 1
        return $this->getLinksAndRedirects()['links_out_count'];
748
    }
749
750
    /**
751
     * Get the number of redirects to the page.
752
     * @return int
753
     */
754 1
    public function redirectsCount()
755
    {
756 1
        return $this->getLinksAndRedirects()['redirects_count'];
757
    }
758
759
    /**
760
     * Get the number of external, incoming and outgoing links, along with
761
     * the number of redirects to the page.
762
     * @return int[]
763
     * @codeCoverageIgnore
764
     */
765
    private function getLinksAndRedirects()
766
    {
767
        if (!is_array($this->linksAndRedirects)) {
0 ignored issues
show
introduced by
The condition is_array($this->linksAndRedirects) is always true.
Loading history...
768
            $this->linksAndRedirects = $this->page->countLinksAndRedirects();
769
        }
770
        return $this->linksAndRedirects;
771
    }
772
773
    /**
774
     * Parse the revision history, collecting our core statistics.
775
     *
776
     * Untestable because it relies on getting a PDO statement. All the important
777
     * logic lives in other methods which are tested.
778
     * @codeCoverageIgnore
779
     */
780
    private function parseHistory()
781
    {
782
        if ($this->tooManyRevisions()) {
783
            $limit = $this->getMaxRevisions();
784
        } else {
785
            $limit = null;
786
        }
787
788
        // Third parameter is ignored if $limit is null.
789
        $revStmt = $this->page->getRevisionsStmt(
790
            null,
791
            $limit,
792
            $this->getNumRevisions(),
793
            $this->startDate,
794
            $this->endDate
795
        );
796
        $revCount = 0;
797
798
        /**
799
         * Data about previous edits so that we can use them as a basis for comparison.
800
         * @var Edit[]
801
         */
802
        $prevEdits = [
803
            // The previous Edit, used to discount content that was reverted.
804
            'prev' => null,
805
806
            // The SHA-1 of the edit *before* the previous edit. Used for more
807
            // accurate revert detection.
808
            'prevSha' => null,
809
810
            // The last edit deemed to be the max addition of content. This is kept track of
811
            // in case we find out the next edit was reverted (and was also a max edit),
812
            // in which case we'll want to discount it and use this one instead.
813
            'maxAddition' => null,
814
815
            // Same as with maxAddition, except the maximum amount of content deleted.
816
            // This is used to discount content that was reverted.
817
            'maxDeletion' => null,
818
        ];
819
820
        while ($rev = $revStmt->fetch()) {
821
            $edit = new Edit($this->page, $rev);
822
823
            if ($revCount === 0) {
824
                $this->firstEdit = $edit;
825
            }
826
827
            // Sometimes, with old revisions (2001 era), the revisions from 2002 come before 2001
828
            if ($edit->getTimestamp() < $this->firstEdit->getTimestamp()) {
829
                $this->firstEdit = $edit;
830
            }
831
832
            $prevEdits = $this->updateCounts($edit, $prevEdits);
833
834
            $revCount++;
835
        }
836
837
        $this->numRevisionsProcessed = $revCount;
838
839
        // Various sorts
840
        arsort($this->editors);
841
        ksort($this->yearMonthCounts);
842
        if ($this->tools) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->tools of type string[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
843
            arsort($this->tools);
844
        }
845
    }
846
847
    /**
848
     * Update various counts based on the current edit.
849
     * @param  Edit   $edit
850
     * @param  Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'
851
     * @return Edit[] Updated version of $prevEdits.
852
     */
853 4
    private function updateCounts(Edit $edit, $prevEdits)
854
    {
855
        // Update the counts for the year and month of the current edit.
856 4
        $this->updateYearMonthCounts($edit);
857
858
        // Update counts for the user who made the edit.
859 4
        $this->updateUserCounts($edit);
860
861
        // Update the year/month/user counts of anon and minor edits.
862 4
        $this->updateAnonMinorCounts($edit);
863
864
        // Update counts for automated tool usage, if applicable.
865 4
        $this->updateToolCounts($edit);
866
867
        // Increment "edits per <time>" counts
868 4
        $this->updateCountHistory($edit);
869
870
        // Update figures regarding content addition/removal, and the revert count.
871 4
        $prevEdits = $this->updateContentSizes($edit, $prevEdits);
872
873
        // Now that we've updated all the counts, we can reset
874
        // the prev and last edits, which are used for tracking.
875
        // But first, let's copy over the SHA of the actual previous edit
876
        // and put it in our $prevEdits['prev'], so that we'll know
877
        // that content added after $prevEdit['prev'] was reverted.
878 4
        if ($prevEdits['prev'] !== null) {
879 4
            $prevEdits['prevSha'] = $prevEdits['prev']->getSha();
880
        }
881 4
        $prevEdits['prev'] = $edit;
882 4
        $this->lastEdit = $edit;
883
884 4
        return $prevEdits;
885
    }
886
887
    /**
888
     * Update various figures about content sizes based on the given edit.
889
     * @param Edit $edit
890
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
891
     * @return Edit[] Updated version of $prevEdits.
892
     */
893 4
    private function updateContentSizes(Edit $edit, $prevEdits)
894
    {
895
        // Check if it was a revert
896 4
        if ($this->isRevert($prevEdits, $edit)) {
897 4
            return $this->updateContentSizesRevert($prevEdits);
898
        } else {
899 4
            return $this->updateContentSizesNonRevert($edit, $prevEdits);
900
        }
901
    }
902
903
    /**
904
     * Is the given Edit a revert?
905
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
906
     * @param Edit $edit
907
     * @return bool
908
     */
909 4
    private function isRevert($prevEdits, $edit)
910
    {
911 4
        return $edit->getSha() === $prevEdits['prevSha'] || $edit->isRevert($this->container);
912
    }
913
914
    /**
915
     * Updates the figures on content sizes assuming the given edit was a revert of the previous one.
916
     * In such a case, we don't want to treat the previous edit as legit content addition or removal.
917
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
918
     * @return Edit[] Updated version of $prevEdits, for tracking.
919
     */
920 4
    private function updateContentSizesRevert($prevEdits)
921
    {
922 4
        $this->revertCount++;
923
924
        // Adjust addedBytes given this edit was a revert of the previous one.
925 4
        if ($prevEdits['prev'] && $prevEdits['prev']->getSize() > 0) {
926
            $this->addedBytes -= $prevEdits['prev']->getSize();
927
928
            // Also deduct from the user's individual added byte count.
929
            $username = $prevEdits['prev']->getUser()->getUsername();
930
            $this->editors[$username]['added'] -= $prevEdits['prev']->getSize();
931
        }
932
933
        // @TODO: Test this against an edit war (use your sandbox).
934
        // Also remove as max added or deleted, if applicable.
935 4
        if ($this->maxAddition && $prevEdits['prev']->getId() === $this->maxAddition->getId()) {
936
            $this->maxAddition = $prevEdits['maxAddition'];
937
            $prevEdits['maxAddition'] = $prevEdits['prev']; // In the event of edit wars.
938 4
        } elseif ($this->maxDeletion && $prevEdits['prev']->getId() === $this->maxDeletion->getId()) {
939 4
            $this->maxDeletion = $prevEdits['maxDeletion'];
940 4
            $prevEdits['maxDeletion'] = $prevEdits['prev']; // In the event of edit wars.
941
        }
942
943 4
        return $prevEdits;
944
    }
945
946
    /**
947
     * Updates the figures on content sizes assuming the given edit
948
     * was NOT a revert of the previous edit.
949
     * @param Edit $edit
950
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
951
     * @return Edit[] Updated version of $prevEdits, for tracking.
952
     */
953 4
    private function updateContentSizesNonRevert(Edit $edit, $prevEdits)
954
    {
955 4
        $editSize = $this->getEditSize($edit, $prevEdits);
956
957
        // Edit was not a revert, so treat size > 0 as content added.
958 4
        if ($editSize > 0) {
959 4
            $this->addedBytes += $editSize;
960 4
            $this->editors[$edit->getUser()->getUsername()]['added'] += $editSize;
961
962
            // Keep track of edit with max addition.
963 4
            if (!$this->maxAddition || $editSize > $this->maxAddition->getSize()) {
964
                // Keep track of old maxAddition in case we find out the next $edit was reverted
965
                // (and was also a max edit), in which case we'll want to use this one ($edit).
966 4
                $prevEdits['maxAddition'] = $this->maxAddition;
967
968 4
                $this->maxAddition = $edit;
969
            }
970 4
        } elseif ($editSize < 0 && (!$this->maxDeletion || $editSize < $this->maxDeletion->getSize())) {
971
            // Keep track of old maxDeletion in case we find out the next edit was reverted
972
            // (and was also a max deletion), in which case we'll want to use this one.
973 4
            $prevEdits['maxDeletion'] = $this->maxDeletion;
974
975 4
            $this->maxDeletion = $edit;
976
        }
977
978 4
        return $prevEdits;
979
    }
980
981
    /**
982
     * Get the size of the given edit, based on the previous edit (if present).
983
     * We also don't return the actual edit size if last revision had a length of null.
984
     * This happens when the edit follows other edits that were revision-deleted.
985
     * @see T148857 for more information.
986
     * @todo Remove once T101631 is resolved.
987
     * @param Edit $edit
988
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
989
     * @return int
990
     */
991 4
    private function getEditSize(Edit $edit, $prevEdits)
992
    {
993 4
        if ($prevEdits['prev'] && $prevEdits['prev']->getLength() === null) {
0 ignored issues
show
introduced by
The condition $prevEdits['prev']->getLength() === null is always false.
Loading history...
994
            return 0;
995
        } else {
996 4
            return $edit->getSize();
997
        }
998
    }
999
1000
    /**
1001
     * Update counts of automated tool usage for the given edit.
1002
     * @param Edit $edit
1003
     */
1004 4
    private function updateToolCounts(Edit $edit)
1005
    {
1006 4
        $automatedTool = $edit->getTool($this->container);
1007
1008 4
        if ($automatedTool === false) {
1009
            // Nothing to do.
1010 4
            return;
1011
        }
1012
1013 4
        $editYear = $edit->getYear();
1014 4
        $editMonth = $edit->getMonth();
1015
1016 4
        $this->automatedCount++;
1017 4
        $this->yearMonthCounts[$editYear]['automated']++;
1018 4
        $this->yearMonthCounts[$editYear]['months'][$editMonth]['automated']++;
1019
1020 4
        if (!isset($this->tools[$automatedTool['name']])) {
1021 4
            $this->tools[$automatedTool['name']] = [
1022 4
                'count' => 1,
1023 4
                'link' => $automatedTool['link'],
1024
            ];
1025
        } else {
1026
            $this->tools[$automatedTool['name']]['count']++;
1027
        }
1028 4
    }
1029
1030
    /**
1031
     * Update various counts for the year and month of the given edit.
1032
     * @param Edit $edit
1033
     */
1034 4
    private function updateYearMonthCounts(Edit $edit)
1035
    {
1036 4
        $editYear = $edit->getYear();
1037 4
        $editMonth = $edit->getMonth();
1038
1039
        // Fill in the blank arrays for the year and 12 months if needed.
1040 4
        if (!isset($this->yearMonthCounts[$editYear])) {
1041 4
            $this->addYearMonthCountEntry($edit);
1042
        }
1043
1044
        // Increment year and month counts for all edits
1045 4
        $this->yearMonthCounts[$editYear]['all']++;
1046 4
        $this->yearMonthCounts[$editYear]['months'][$editMonth]['all']++;
1047
        // This will ultimately be the size of the page by the end of the year
1048 4
        $this->yearMonthCounts[$editYear]['size'] = (int) $edit->getLength();
1049
1050
        // Keep track of which month had the most edits
1051 4
        $editsThisMonth = $this->yearMonthCounts[$editYear]['months'][$editMonth]['all'];
1052 4
        if ($editsThisMonth > $this->maxEditsPerMonth) {
1053 4
            $this->maxEditsPerMonth = $editsThisMonth;
1054
        }
1055 4
    }
1056
1057
    /**
1058
     * Add a new entry to $this->yearMonthCounts for the given year,
1059
     * with blank values for each month. This called during self::parseHistory().
1060
     * @param Edit $edit
1061
     */
1062 4
    private function addYearMonthCountEntry(Edit $edit)
1063
    {
1064 4
        $this->yearLabels[] = $this->i18n->dateFormat($edit->getTimestamp(), 'yyyy');
1065 4
        $editYear = $edit->getYear();
1066
1067
        // Beginning of the month at 00:00:00.
1068 4
        $firstEditTime = mktime(0, 0, 0, (int) $this->firstEdit->getMonth(), 1, $this->firstEdit->getYear());
0 ignored issues
show
Bug introduced by
$this->firstEdit->getYear() of type string is incompatible with the type integer expected by parameter $year of mktime(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1068
        $firstEditTime = mktime(0, 0, 0, (int) $this->firstEdit->getMonth(), 1, /** @scrutinizer ignore-type */ $this->firstEdit->getYear());
Loading history...
1069
1070 4
        $this->yearMonthCounts[$editYear] = [
1071
            'all' => 0,
1072
            'minor' => 0,
1073
            'anon' => 0,
1074
            'automated' => 0,
1075
            'size' => 0, // Keep track of the size by the end of the year.
1076
            'events' => [],
1077
            'months' => [],
1078
        ];
1079
1080 4
        for ($i = 1; $i <= 12; $i++) {
1081 4
            $timeObj = mktime(0, 0, 0, $i, 1, $editYear);
1082
1083
            // Don't show zeros for months before the first edit or after the current month.
1084 4
            if ($timeObj < $firstEditTime || $timeObj > $this->getLastDay()) {
1085 4
                continue;
1086
            }
1087
1088 4
            $this->monthLabels[] = $this->i18n->dateFormat($timeObj, 'yyyy-MM');
1089 4
            $this->yearMonthCounts[$editYear]['months'][sprintf('%02d', $i)] = [
1090
                'all' => 0,
1091
                'minor' => 0,
1092
                'anon' => 0,
1093
                'automated' => 0,
1094
            ];
1095
        }
1096 4
    }
1097
1098
    /**
1099
     * Update the counts of anon and minor edits for year, month,
1100
     * and user of the given edit.
1101
     * @param Edit $edit
1102
     */
1103 4
    private function updateAnonMinorCounts(Edit $edit)
1104
    {
1105 4
        $editYear = $edit->getYear();
1106 4
        $editMonth = $edit->getMonth();
1107
1108
        // If anonymous, increase counts
1109 4
        if ($edit->isAnon()) {
1110 4
            $this->anonCount++;
1111 4
            $this->yearMonthCounts[$editYear]['anon']++;
1112 4
            $this->yearMonthCounts[$editYear]['months'][$editMonth]['anon']++;
1113
        }
1114
1115
        // If minor edit, increase counts
1116 4
        if ($edit->isMinor()) {
1117 4
            $this->minorCount++;
1118 4
            $this->yearMonthCounts[$editYear]['minor']++;
1119 4
            $this->yearMonthCounts[$editYear]['months'][$editMonth]['minor']++;
1120
        }
1121 4
    }
1122
1123
    /**
1124
     * Update various counts for the user of the given edit.
1125
     * @param Edit $edit
1126
     */
1127 4
    private function updateUserCounts(Edit $edit)
1128
    {
1129 4
        $username = $edit->getUser()->getUsername();
1130
1131
        // Initialize various user stats if needed.
1132 4
        if (!isset($this->editors[$username])) {
1133 4
            $this->editors[$username] = [
1134 4
                'all' => 0,
1135 4
                'minor' => 0,
1136 4
                'minorPercentage' => 0,
1137 4
                'first' => $edit->getTimestamp(),
1138 4
                'firstId' => $edit->getId(),
1139
                'last' => null,
1140
                'atbe' => null,
1141 4
                'added' => 0,
1142
            ];
1143
        }
1144
1145
        // Increment user counts
1146 4
        $this->editors[$username]['all']++;
1147 4
        $this->editors[$username]['last'] = $edit->getTimestamp();
1148 4
        $this->editors[$username]['lastId'] = $edit->getId();
1149
1150
        // Increment minor counts for this user
1151 4
        if ($edit->isMinor()) {
1152 4
            $this->editors[$username]['minor']++;
1153
        }
1154 4
    }
1155
1156
    /**
1157
     * Increment "edits per <time>" counts based on the given edit.
1158
     * @param Edit $edit
1159
     */
1160 4
    private function updateCountHistory(Edit $edit)
1161
    {
1162 4
        $editTimestamp = $edit->getTimestamp();
1163
1164 4
        if ($editTimestamp > new DateTime('-1 day')) {
1165
            $this->countHistory['day']++;
1166
        }
1167 4
        if ($editTimestamp > new DateTime('-1 week')) {
1168
            $this->countHistory['week']++;
1169
        }
1170 4
        if ($editTimestamp > new DateTime('-1 month')) {
1171
            $this->countHistory['month']++;
1172
        }
1173 4
        if ($editTimestamp > new DateTime('-1 year')) {
1174
            $this->countHistory['year']++;
1175
        }
1176 4
    }
1177
1178
    /**
1179
     * Get info about bots that edited the page.
1180
     * @return mixed[] Contains the bot's username, edit count to the page,
1181
     *   and whether or not they are currently a bot.
1182
     */
1183 1
    public function getBots()
1184
    {
1185 1
        return $this->bots;
1186
    }
1187
1188
    /**
1189
     * Set info about bots that edited the page. This is done as a private setter
1190
     * because we need this information when computing the top 10 editors,
1191
     * where we don't want to include bots.
1192
     */
1193
    private function setBots()
1194
    {
1195
        // Parse the bot edits.
1196
        $bots = [];
1197
        $botData = $this->getRepository()->getBotData($this->page, $this->startDate, $this->endDate);
1198
        while ($bot = $botData->fetch()) {
1199
            $bots[$bot['username']] = [
1200
                'count' => (int) $bot['count'],
1201
                'current' => $bot['current'] === 'bot',
1202
            ];
1203
        }
1204
1205
        // Sort by edit count.
1206
        uasort($bots, function ($a, $b) {
1207
            return $b['count'] - $a['count'];
1208
        });
1209
1210
        $this->bots = $bots;
1211
    }
1212
1213
    /**
1214
     * Number of edits made to the page by current or former bots.
1215
     * @param string[] $bots Used only in unit tests, where we
1216
     *   supply mock data for the bots that will get processed.
1217
     * @return int
1218
     */
1219 2
    public function getBotRevisionCount($bots = null)
1220
    {
1221 2
        if (isset($this->botRevisionCount)) {
1222
            return $this->botRevisionCount;
1223
        }
1224
1225 2
        if ($bots === null) {
1226 1
            $bots = $this->getBots();
1227
        }
1228
1229 2
        $count = 0;
1230
1231 2
        foreach ($bots as $username => $data) {
1232 2
            $count += $data['count'];
1233
        }
1234
1235 2
        $this->botRevisionCount = $count;
1236 2
        return $count;
1237
    }
1238
1239
    /**
1240
     * Query for log events during each year of the article's history,
1241
     *   and set the results in $this->yearMonthCounts.
1242
     */
1243 1
    private function setLogsEvents()
1244
    {
1245 1
        $logData = $this->getRepository()->getLogEvents(
1246 1
            $this->page,
1247 1
            $this->startDate,
1248 1
            $this->endDate
1249
        );
1250
1251 1
        foreach ($logData as $event) {
1252 1
            $time = strtotime($event['timestamp']);
1253 1
            $year = date('Y', $time);
1254
1255 1
            if (!isset($this->yearMonthCounts[$year])) {
1256
                break;
1257
            }
1258
1259 1
            $yearEvents = $this->yearMonthCounts[$year]['events'];
1260
1261
            // Convert log type value to i18n key.
1262 1
            switch ($event['log_type']) {
1263 1
                case 'protect':
1264 1
                    $action = 'protections';
1265 1
                    break;
1266 1
                case 'delete':
1267 1
                    $action = 'deletions';
1268 1
                    break;
1269
                case 'move':
1270
                    $action = 'moves';
1271
                    break;
1272
                // count pending-changes protections along with normal protections.
1273
                case 'stable':
1274
                    $action = 'protections';
1275
                    break;
1276
            }
1277
1278 1
            if (empty($yearEvents[$action])) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $action does not seem to be defined for all execution paths leading up to this point.
Loading history...
1279 1
                $yearEvents[$action] = 1;
1280
            } else {
1281
                $yearEvents[$action]++;
1282
            }
1283
1284 1
            $this->yearMonthCounts[$year]['events'] = $yearEvents;
1285
        }
1286 1
    }
1287
1288
    /**
1289
     * Set statistics about the top 10 editors by added text and number of edits.
1290
     * This is ran *after* parseHistory() since we need the grand totals first.
1291
     * Various stats are also set for each editor in $this->editors to be used in the charts.
1292
     */
1293 4
    private function setTopTenCounts()
1294
    {
1295 4
        $topTenCount = $counter = 0;
1296 4
        $topTenEditors = [];
1297
1298 4
        foreach ($this->editors as $editor => $info) {
1299
            // Count how many users are in the top 10% by number of edits, excluding bots.
1300 4
            if ($counter < 10 && !in_array($editor, array_keys($this->bots))) {
1301 4
                $topTenCount += $info['all'];
1302 4
                $counter++;
1303
1304
                // To be used in the Top Ten charts.
1305 4
                $topTenEditors[] = [
1306 4
                    'label' => $editor,
1307 4
                    'value' => $info['all'],
1308
                    'percentage' => (
1309 4
                        100 * ($info['all'] / $this->getNumRevisionsProcessed())
1310
                    )
1311
                ];
1312
            }
1313
1314
            // Compute the percentage of minor edits the user made.
1315 4
            $this->editors[$editor]['minorPercentage'] = $info['all']
1316 4
                ? ($info['minor'] / $info['all']) * 100
1317
                : 0;
1318
1319 4
            if ($info['all'] > 1) {
1320
                // Number of seconds/days between first and last edit.
1321 4
                $secs = $info['last']->getTimestamp() - $info['first']->getTimestamp();
1322 4
                $days = $secs / (60 * 60 * 24);
1323
1324
                // Average time between edits (in days).
1325 4
                $this->editors[$editor]['atbe'] = $days / $info['all'];
1326
            }
1327
        }
1328
1329 4
        $this->topTenEditorsByEdits = $topTenEditors;
1330
1331
        // First sort editors array by the amount of text they added.
1332 4
        $topTenEditorsByAdded = $this->editors;
1333
        uasort($topTenEditorsByAdded, function ($a, $b) {
1334 4
            if ($a['added'] === $b['added']) {
1335 4
                return 0;
1336
            }
1337 4
            return $a['added'] > $b['added'] ? -1 : 1;
1338 4
        });
1339
1340
        // Then build a new array of top 10 editors by added text,
1341
        // in the data structure needed for the chart.
1342
        $this->topTenEditorsByAdded = array_map(function ($editor) {
1343 4
            $added = $this->editors[$editor]['added'];
1344
            return [
1345 4
                'label' => $editor,
1346 4
                'value' => $added,
1347
                'percentage' => (
1348 4
                    100 * ($added / $this->addedBytes)
1349
                )
1350
            ];
1351 4
        }, array_keys(array_slice($topTenEditorsByAdded, 0, 10, true)));
1352
1353 4
        $this->topTenCount = $topTenCount;
1354 4
    }
1355
1356
    /**
1357
     * Get authorship attribution from the WikiWho API.
1358
     * @see https://f-squared.org/wikiwho/
1359
     * @param int $limit Max number of results.
1360
     * @return array
1361
     */
1362 1
    public function getTextshares($limit = null)
1363
    {
1364 1
        if (isset($this->textshares)) {
1365
            return $this->textshares;
1366
        }
1367
1368
        // TODO: check for failures. Should have a success:true
1369 1
        $ret = $this->getRepository()->getTextshares($this->page);
1370
1371
        // If revision can't be found, return error message.
1372 1
        if (!isset($ret['revisions'][0])) {
1373
            return [
1374
                'error' => isset($ret['Error']) ? $ret['Error'] : 'Unknown'
1375
            ];
1376
        }
1377
1378 1
        $revId = array_keys($ret['revisions'][0])[0];
1379 1
        $tokens = $ret['revisions'][0][$revId]['tokens'];
1380
1381 1
        list($counts, $totalCount, $userIds) = $this->countTokens($tokens);
1382 1
        $usernameMap = $this->getUsernameMap($userIds);
1383
1384 1
        if ($limit !== null) {
1385 1
            $countsToProcess = array_slice($counts, 0, $limit, true);
1386
        } else {
1387
            $countsToProcess = $counts;
1388
        }
1389
1390 1
        $textshares = [];
1391
1392
        // Loop through once more, creating an array with the user names (or IP address)
1393
        // as the key, and the count and percentage as the value.
1394 1
        foreach ($countsToProcess as $editor => $count) {
1395 1
            if (isset($usernameMap[$editor])) {
1396 1
                $index = $usernameMap[$editor];
1397
            } else {
1398 1
                $index = $editor;
1399
            }
1400 1
            $textshares[$index] = [
1401 1
                'count' => $count,
1402 1
                'percentage' => round(100 * ($count / $totalCount), 1)
1403
            ];
1404
        }
1405
1406 1
        $this->textshares = [
1407 1
            'list' => $textshares,
1408 1
            'totalAuthors' => count($counts),
1409 1
            'totalCount' => $totalCount,
1410
        ];
1411
1412 1
        return $this->textshares;
1413
    }
1414
1415
    /**
1416
     * Get a map of user IDs to usernames, given the IDs.
1417
     * @param int[] $userIds
1418
     * @return array IDs as keys, usernames as values.
1419
     */
1420 1
    private function getUsernameMap($userIds)
1421
    {
1422 1
        $userIdsNames = $this->getRepository()->getUsernamesFromIds(
1423 1
            $this->page->getProject(),
1424 1
            $userIds
1425
        );
1426
1427 1
        $usernameMap = [];
1428 1
        foreach ($userIdsNames as $userIdName) {
1429 1
            $usernameMap[$userIdName['user_id']] = $userIdName['user_name'];
1430
        }
1431
1432 1
        return $usernameMap;
1433
    }
1434
1435
    /**
1436
     * Get counts of token lengths for each author. Used in self::getTextshares()
1437
     * @param array $tokens
1438
     * @return array [counts by user, total count, IDs of accounts]
1439
     */
1440 1
    private function countTokens($tokens)
1441
    {
1442 1
        $counts = [];
1443 1
        $userIds = [];
1444 1
        $totalCount = 0;
1445
1446
        // Loop through the tokens, keeping totals (token length) for each author.
1447 1
        foreach ($tokens as $token) {
1448 1
            $editor = $token['editor'];
1449
1450
            // IPs are prefixed with '0|', otherwise it's the user ID.
1451 1
            if (substr($editor, 0, 2) === '0|') {
1452 1
                $editor = substr($editor, 2);
1453
            } else {
1454 1
                $userIds[] = $editor;
1455
            }
1456
1457 1
            if (!isset($counts[$editor])) {
1458 1
                $counts[$editor] = 0;
1459
            }
1460
1461 1
            $counts[$editor] += strlen($token['str']);
1462 1
            $totalCount += strlen($token['str']);
1463
        }
1464
1465
        // Sort authors by count.
1466 1
        arsort($counts);
1467
1468 1
        return [$counts, $totalCount, $userIds];
1469
    }
1470
1471
    /**
1472
     * Get a list of wikis supported by WikiWho.
1473
     * @return string[]
1474
     * @codeCoverageIgnore
1475
     */
1476
    public function getTextshareWikis()
1477
    {
1478
        return self::TEXTSHARE_WIKIS;
1479
    }
1480
1481
    /**
1482
     * Get prose and reference information.
1483
     * @return array With keys 'characters', 'words', 'references', 'unique_references'
1484
     */
1485 1
    public function getProseStats()
1486
    {
1487 1
        $datetime = $this->endDate !== false ? new DateTime('@'.$this->endDate) : null;
0 ignored issues
show
Bug introduced by
Are you sure $this->endDate of type integer|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1487
        $datetime = $this->endDate !== false ? new DateTime('@'./** @scrutinizer ignore-type */ $this->endDate) : null;
Loading history...
1488 1
        $html = $this->page->getHTMLContent($datetime);
1489
1490 1
        $crawler = new Crawler($html);
1491
1492 1
        list($chars, $words) = $this->countCharsAndWords($crawler, '#mw-content-text p');
1493
1494 1
        $refs = $crawler->filter('#mw-content-text .reference');
1495 1
        $refContent = [];
1496
        $refs->each(function ($ref) use (&$refContent) {
1497 1
            $refContent[] = $ref->text();
1498 1
        });
1499 1
        $uniqueRefs = count(array_unique($refContent));
1500
1501 1
        $sections = count($crawler->filter('#mw-content-text .mw-headline'));
1502
1503
        return [
1504 1
            'characters' => $chars,
1505 1
            'words' => $words,
1506 1
            'references' => $refs->count(),
1507 1
            'unique_references' => $uniqueRefs,
1508 1
            'sections' => $sections,
1509
        ];
1510
    }
1511
1512
    /**
1513
     * Count the number of characters and words of the plain text
1514
     * within the DOM element matched by the given selector.
1515
     * @param Crawler $crawler
1516
     * @param string $selector HTML selector.
1517
     * @return array [num chars, num words]
1518
     */
1519 1
    private function countCharsAndWords($crawler, $selector)
1520
    {
1521 1
        $totalChars = 0;
1522 1
        $totalWords = 0;
1523 1
        $paragraphs = $crawler->filter($selector);
1524 1
        $paragraphs->each(function ($node) use (&$totalChars, &$totalWords) {
1525 1
            $text = preg_replace('/\[\d+\]/', '', trim($node->text()));
1526 1
            $totalChars += strlen($text);
1527 1
            $totalWords += count(explode(' ', $text));
1528 1
        });
1529
1530 1
        return [$totalChars, $totalWords];
1531
    }
1532
1533
    /**
1534
     * Fetch transclusion data (categories, templates and files)
1535
     * that are on the page.
1536
     * @return array With keys 'categories', 'templates' and 'files'.
1537
     */
1538 1
    private function getTransclusionData()
1539
    {
1540 1
        if (!is_array($this->transclusionData)) {
0 ignored issues
show
introduced by
The condition is_array($this->transclusionData) is always true.
Loading history...
1541 1
            $this->transclusionData = $this->getRepository()
1542 1
                ->getTransclusionData($this->page);
1543
        }
1544 1
        return $this->transclusionData;
1545
    }
1546
1547
    /**
1548
     * Get the number of categories that are on the page.
1549
     * @return int
1550
     */
1551 1
    public function getNumCategories()
1552
    {
1553 1
        return $this->getTransclusionData()['categories'];
1554
    }
1555
1556
    /**
1557
     * Get the number of templates that are on the page.
1558
     * @return int
1559
     */
1560 1
    public function getNumTemplates()
1561
    {
1562 1
        return $this->getTransclusionData()['templates'];
1563
    }
1564
1565
    /**
1566
     * Get the number of files that are on the page.
1567
     * @return int
1568
     */
1569 1
    public function getNumFiles()
1570
    {
1571 1
        return $this->getTransclusionData()['files'];
1572
    }
1573
}
1574