Passed
Push — master ( 197fa4...450121 )
by MusikAnimal
04:27
created

ArticleInfo::doPostPrecessing()   B

Complexity

Conditions 6
Paths 9

Size

Total Lines 42
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 21
CRAP Score 6.0033

Importance

Changes 0
Metric Value
cc 6
eloc 21
nc 9
nop 0
dl 0
loc 42
ccs 21
cts 22
cp 0.9545
crap 6.0033
rs 8.9617
c 0
b 0
f 0
1
<?php
2
/**
3
 * This file contains only the ArticleInfo class.
4
 */
5
6
namespace Xtools;
7
8
use AppBundle\Helper\I18nHelper;
9
use DateTime;
10
use Symfony\Component\DependencyInjection\Container;
11
use Symfony\Component\DomCrawler\Crawler;
12
13
/**
14
 * An ArticleInfo provides statistics about a page on a project. This model does not
15
 * have a separate Repository because it needs to use individual SQL statements to
16
 * traverse the page's history, saving class instance variables along the way.
17
 */
18
class ArticleInfo extends Model
19
{
20
    /** @const string[] Domain names of wikis supported by WikiWho. */
21
    const TEXTSHARE_WIKIS = [
22
        'en.wikipedia.org',
23
        'de.wikipedia.org',
24
        'eu.wikipedia.org',
25
        'tr.wikipedia.org',
26
        'es.wikipedia.org',
27
    ];
28
29
    /** @var Container The application's DI container. */
30
    protected $container;
31
32
    /** @var I18nHelper For i18n and l10n. */
33
    protected $i18n;
34
35
    /** @var int Number of revisions that belong to the page. */
36
    protected $numRevisions;
37
38
    /** @var int Maximum number of revisions to process, as configured. */
39
    protected $maxRevisions;
40
41
    /** @var int Number of revisions that were actually processed. */
42
    protected $numRevisionsProcessed;
43
44
    /**
45
     * Various statistics about editors to the page. These are not User objects
46
     * so as to preserve memory.
47
     * @var mixed[]
48
     */
49
    protected $editors;
50
51
    /** @var mixed[] The top 10 editors to the page by number of edits. */
52
    protected $topTenEditorsByEdits;
53
54
    /** @var mixed[] The top 10 editors to the page by added text. */
55
    protected $topTenEditorsByAdded;
56
57
    /** @var int Number of edits made by the top 10 editors. */
58
    protected $topTenCount;
59
60
    /** @var mixed[] Various statistics about bots that edited the page. */
61
    protected $bots;
62
63
    /** @var int Number of edits made to the page by bots. */
64
    protected $botRevisionCount;
65
66
    /** @var mixed[] Various counts about each individual year and month of the page's history. */
67
    protected $yearMonthCounts;
68
69
    /** @var string[] Localized labels for the years, to be used in the 'Year counts' chart. */
70
    protected $yearLabels = [];
71
72
    /** @var string[] Localized labels for the months, to be used in the 'Month counts' chart. */
73
    protected $monthLabels = [];
74
75
    /** @var Edit The first edit to the page. */
76
    protected $firstEdit;
77
78
    /** @var Edit The last edit to the page. */
79
    protected $lastEdit;
80
81
    /** @var Edit Edit that made the largest addition by number of bytes. */
82
    protected $maxAddition;
83
84
    /** @var Edit Edit that made the largest deletion by number of bytes. */
85
    protected $maxDeletion;
86
87
    /** @var int[] Number of in and outgoing links and redirects to the page. */
88
    protected $linksAndRedirects;
89
90
    /** @var string[] Assessments of the page (see Page::getAssessments). */
91
    protected $assessments;
92
93
    /**
94
     * Maximum number of edits that were created across all months. This is used as a comparison
95
     * for the bar charts in the months section.
96
     * @var int
97
     */
98
    protected $maxEditsPerMonth;
99
100
    /** @var string[] List of (semi-)automated tools that were used to edit the page. */
101
    protected $tools;
102
103
    /**
104
     * Total number of bytes added throughout the page's history. This is used as a comparison
105
     * when computing the top 10 editors by added text.
106
     * @var int
107
     */
108
    protected $addedBytes = 0;
109
110
    /** @var int Number of days between first and last edit. */
111
    protected $totalDays;
112
113
    /** @var int Number of minor edits to the page. */
114
    protected $minorCount = 0;
115
116
    /** @var int Number of anonymous edits to the page. */
117
    protected $anonCount = 0;
118
119
    /** @var int Number of automated edits to the page. */
120
    protected $automatedCount = 0;
121
122
    /** @var int Number of edits to the page that were reverted with the subsequent edit. */
123
    protected $revertCount = 0;
124
125
    /** @var int[] The "edits per <time>" counts. */
126
    protected $countHistory = [
127
        'day' => 0,
128
        'week' => 0,
129
        'month' => 0,
130
        'year' => 0
131
    ];
132
133
    /** @var string[] List of wikidata and Checkwiki errors. */
134
    protected $bugs;
135
136
    /** @var array List of editors and the percentage of the current content that they authored. */
137
    protected $textshares;
138
139
    /** @var array Number of categories, templates and files on the page. */
140
    protected $transclusionData;
141
142
    /**
143
     * ArticleInfo constructor.
144
     * @param Page $page The page to process.
145
     * @param Container $container The DI container.
146
     * @param false|int $start From what date to obtain records.
147
     * @param false|int $end To what date to obtain records.
148
     */
149 13
    public function __construct(Page $page, Container $container, $start = false, $end = false)
150
    {
151 13
        $this->page = $page;
152 13
        $this->container = $container;
153 13
        $this->start = $start;
154 13
        $this->end = $end;
155 13
    }
156
157
    /**
158
     * Make the I18nHelper accessible to ArticleInfo.
159
     * @param I18nHelper $i18n
160
     * @codeCoverageIgnore
161
     */
162
    public function setI18nHelper(I18nHelper $i18n)
163
    {
164
        $this->i18n = $i18n;
165
    }
166
167
    /**
168
     * Get date opening date range, formatted as this is used in the views.
169
     * @return string Blank if no value exists.
170
     */
171 1
    public function getStartDate()
172
    {
173 1
        return $this->start == '' ? '' : date('Y-m-d', $this->start);
0 ignored issues
show
Bug introduced by
It seems like $this->start can also be of type string and boolean; however, parameter $timestamp of date() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

173
        return $this->start == '' ? '' : date('Y-m-d', /** @scrutinizer ignore-type */ $this->start);
Loading history...
174
    }
175
176
    /**
177
     * Get date closing date range, formatted as this is used in the views.
178
     * @return string Blank if no value exists.
179
     */
180 1
    public function getEndDate()
181
    {
182 1
        return $this->end == '' ? '' : date('Y-m-d', $this->end);
0 ignored issues
show
Bug introduced by
It seems like $this->end can also be of type string and boolean; however, parameter $timestamp of date() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

182
        return $this->end == '' ? '' : date('Y-m-d', /** @scrutinizer ignore-type */ $this->end);
Loading history...
183
    }
184
185
    /**
186
     * Get the day of last date we should show in the month/year sections,
187
     * based on $this->end or the current date.
188
     * @return int As Unix timestamp.
189
     */
190 4
    private function getLastDay()
191
    {
192 4
        if ($this->end !== false) {
193
            return (new DateTime('@'.$this->end))
0 ignored issues
show
Bug introduced by
Are you sure $this->end of type integer|string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

193
            return (new DateTime('@'./** @scrutinizer ignore-type */ $this->end))
Loading history...
194
                ->modify('last day of this month')
195
                ->getTimestamp();
196
        } else {
197 4
            return strtotime('last day of this month');
198
        }
199
    }
200
201
    /**
202
     * Return the start/end date values as associative array,
203
     * with YYYY-MM-DD as the date format. This is used mainly as
204
     * a helper to pass to the pageviews Twig macros.
205
     * @return array
206
     */
207 1
    public function getDateParams()
208
    {
209 1
        if (!$this->hasDateRange()) {
210
            return [];
211
        }
212
213
        $ret = [
214 1
            'start' => $this->firstEdit->getTimestamp()->format('Y-m-d'),
215 1
            'end' => $this->lastEdit->getTimestamp()->format('Y-m-d'),
216
        ];
217
218 1
        if ($this->start !== false) {
219 1
            $ret['start'] = date('Y-m-d', $this->start);
0 ignored issues
show
Bug introduced by
It seems like $this->start can also be of type string and true; however, parameter $timestamp of date() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

219
            $ret['start'] = date('Y-m-d', /** @scrutinizer ignore-type */ $this->start);
Loading history...
220
        }
221 1
        if ($this->end !== false) {
222 1
            $ret['end'] = date('Y-m-d', $this->end);
223
        }
224
225 1
        return $ret;
226
    }
227
228
    /**
229
     * Get the number of revisions belonging to the page.
230
     * @return int
231
     */
232 4
    public function getNumRevisions()
233
    {
234 4
        if (!isset($this->numRevisions)) {
235 4
            $this->numRevisions = $this->page->getNumRevisions(null, $this->start, $this->end);
0 ignored issues
show
Bug introduced by
It seems like $this->end can also be of type string; however, parameter $end of Xtools\Page::getNumRevisions() does only seem to accept integer|false, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

235
            $this->numRevisions = $this->page->getNumRevisions(null, $this->start, /** @scrutinizer ignore-type */ $this->end);
Loading history...
Bug introduced by
It seems like $this->start can also be of type string; however, parameter $start of Xtools\Page::getNumRevisions() does only seem to accept integer|false, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

235
            $this->numRevisions = $this->page->getNumRevisions(null, /** @scrutinizer ignore-type */ $this->start, $this->end);
Loading history...
236
        }
237 4
        return $this->numRevisions;
238
    }
239
240
    /**
241
     * Get the maximum number of revisions that we should process.
242
     * @return int
243
     */
244 3
    public function getMaxRevisions()
245
    {
246 3
        if (!isset($this->maxRevisions)) {
247 3
            $this->maxRevisions = (int) $this->container->getParameter('app.max_page_revisions');
248
        }
249 3
        return $this->maxRevisions;
250
    }
251
252
    /**
253
     * Get the number of revisions that are actually getting processed.
254
     * This goes by the app.max_page_revisions parameter, or the actual
255
     * number of revisions, whichever is smaller.
256
     * @return int
257
     */
258 3
    public function getNumRevisionsProcessed()
259
    {
260 3
        if (isset($this->numRevisionsProcessed)) {
261 1
            return $this->numRevisionsProcessed;
262
        }
263
264 2
        if ($this->tooManyRevisions()) {
265 1
            $this->numRevisionsProcessed = $this->getMaxRevisions();
266
        } else {
267 1
            $this->numRevisionsProcessed = $this->getNumRevisions();
268
        }
269
270 2
        return $this->numRevisionsProcessed;
271
    }
272
273
    /**
274
     * Are there more revisions than we should process, based on the config?
275
     * @return bool
276
     */
277 3
    public function tooManyRevisions()
278
    {
279 3
        return $this->getMaxRevisions() > 0 && $this->getNumRevisions() > $this->getMaxRevisions();
280
    }
281
282
    /**
283
     * Fetch and store all the data we need to show the ArticleInfo view.
284
     * @codeCoverageIgnore
285
     */
286
    public function prepareData()
287
    {
288
        $this->parseHistory();
289
        $this->setLogsEvents();
290
291
        // Bots need to be set before setting top 10 counts.
292
        $this->setBots();
293
294
        $this->doPostPrecessing();
295
    }
296
297
    /**
298
     * Get the number of editors that edited the page.
299
     * @return int
300
     */
301 1
    public function getNumEditors()
302
    {
303 1
        return count($this->editors);
304
    }
305
306
    /**
307
     * Get the number of bots that edited the page.
308
     * @return int
309
     */
310
    public function getNumBots()
311
    {
312
        return count($this->getBots());
313
    }
314
315
    /**
316
     * Get the number of days between the first and last edit.
317
     * @return int
318
     */
319 1
    public function getTotalDays()
320
    {
321 1
        if (isset($this->totalDays)) {
322 1
            return $this->totalDays;
323
        }
324 1
        $dateFirst = $this->firstEdit->getTimestamp();
325 1
        $dateLast = $this->lastEdit->getTimestamp();
326 1
        $interval = date_diff($dateLast, $dateFirst, true);
327 1
        $this->totalDays = $interval->format('%a');
0 ignored issues
show
Documentation Bug introduced by
The property $totalDays was declared of type integer, but $interval->format('%a') is of type string. Maybe add a type cast?

This check looks for assignments to scalar types that may be of the wrong type.

To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.

$answer = 42;

$correct = false;

$correct = (bool) $answer;
Loading history...
328 1
        return $this->totalDays;
329
    }
330
331
    /**
332
     * Returns length of the page.
333
     * @return int
334
     */
335 1
    public function getLength()
336
    {
337 1
        if ($this->hasDateRange()) {
338 1
            return $this->lastEdit->getLength();
339
        }
340
341
        return $this->page->getLength();
342
    }
343
344
    /**
345
     * Get the average number of days between edits to the page.
346
     * @return double
347
     */
348 1
    public function averageDaysPerEdit()
349
    {
350 1
        return round($this->getTotalDays() / $this->getNumRevisionsProcessed(), 1);
351
    }
352
353
    /**
354
     * Get the average number of edits per day to the page.
355
     * @return double
356
     */
357 1
    public function editsPerDay()
358
    {
359 1
        $editsPerDay = $this->getTotalDays()
360 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / (365 / 12 / 24))
361 1
            : 0;
362 1
        return round($editsPerDay, 1);
363
    }
364
365
    /**
366
     * Get the average number of edits per month to the page.
367
     * @return double
368
     */
369 1
    public function editsPerMonth()
370
    {
371 1
        $editsPerMonth = $this->getTotalDays()
372 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / (365 / 12))
373 1
            : 0;
374 1
        return min($this->getNumRevisionsProcessed(), round($editsPerMonth, 1));
375
    }
376
377
    /**
378
     * Get the average number of edits per year to the page.
379
     * @return double
380
     */
381 1
    public function editsPerYear()
382
    {
383 1
        $editsPerYear = $this->getTotalDays()
384 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / 365)
385 1
            : 0;
386 1
        return min($this->getNumRevisionsProcessed(), round($editsPerYear, 1));
387
    }
388
389
    /**
390
     * Get the average number of edits per editor.
391
     * @return double
392
     */
393 1
    public function editsPerEditor()
394
    {
395 1
        return round($this->getNumRevisionsProcessed() / count($this->editors), 1);
396
    }
397
398
    /**
399
     * Get the percentage of minor edits to the page.
400
     * @return double
401
     */
402 1
    public function minorPercentage()
403
    {
404 1
        return round(
405 1
            ($this->minorCount / $this->getNumRevisionsProcessed()) * 100,
406 1
            1
407
        );
408
    }
409
410
    /**
411
     * Get the percentage of anonymous edits to the page.
412
     * @return double
413
     */
414 1
    public function anonPercentage()
415
    {
416 1
        return round(
417 1
            ($this->anonCount / $this->getNumRevisionsProcessed()) * 100,
418 1
            1
419
        );
420
    }
421
422
    /**
423
     * Get the percentage of edits made by the top 10 editors.
424
     * @return double
425
     */
426 1
    public function topTenPercentage()
427
    {
428 1
        return round(($this->topTenCount / $this->getNumRevisionsProcessed()) * 100, 1);
429
    }
430
431
    /**
432
     * Get the number of times the page has been viewed in the given timeframe.
433
     * If the ArticleInfo instance has a date range, it is used instead of the
434
     * value of the $latest parameter.
435
     * @param  int $latest Last N days.
436
     * @return int
437
     */
438
    public function getPageviews($latest)
439
    {
440
        if (!$this->hasDateRange()) {
441
            return $this->page->getLastPageviews($latest);
442
        }
443
444
        $daterange = $this->getDateParams();
445
        return $this->page->getPageviews($daterange['start'], $daterange['end']);
446
    }
447
448
    /**
449
     * Get the page assessments of the page.
450
     * @see https://www.mediawiki.org/wiki/Extension:PageAssessments
451
     * @return string[]|false False if unsupported.
452
     * @codeCoverageIgnore
453
     */
454
    public function getAssessments()
455
    {
456
        if (!is_array($this->assessments)) {
0 ignored issues
show
introduced by
The condition is_array($this->assessments) is always true.
Loading history...
457
            $this->assessments = $this->page
458
                ->getProject()
459
                ->getPageAssessments()
460
                ->getAssessments($this->page);
461
        }
462
        return $this->assessments;
463
    }
464
465
    /**
466
     * Get the number of automated edits made to the page.
467
     * @return int
468
     */
469 1
    public function getAutomatedCount()
470
    {
471 1
        return $this->automatedCount;
472
    }
473
474
    /**
475
     * Get the number of edits to the page that were reverted with the subsequent edit.
476
     * @return int
477
     */
478 1
    public function getRevertCount()
479
    {
480 1
        return $this->revertCount;
481
    }
482
483
    /**
484
     * Get the number of edits to the page made by logged out users.
485
     * @return int
486
     */
487 1
    public function getAnonCount()
488
    {
489 1
        return $this->anonCount;
490
    }
491
492
    /**
493
     * Get the number of minor edits to the page.
494
     * @return int
495
     */
496 1
    public function getMinorCount()
497
    {
498 1
        return $this->minorCount;
499
    }
500
501
    /**
502
     * Get the number of edits to the page made in the past day, week, month and year.
503
     * @return int[] With keys 'day', 'week', 'month' and 'year'.
504
     */
505
    public function getCountHistory()
506
    {
507
        return $this->countHistory;
508
    }
509
510
    /**
511
     * Get the number of edits to the page made by the top 10 editors.
512
     * @return int
513
     */
514 1
    public function getTopTenCount()
515
    {
516 1
        return $this->topTenCount;
517
    }
518
519
    /**
520
     * Get the top editors to the page by edit count.
521
     * @param int $limit Default 20, maximum 1,000.
522
     * @param bool $noBots Set to non-false to exclude bots from the result.
523
     * @return array
524
     */
525
    public function getTopEditorsByEditCount($limit = 20, $noBots = false)
526
    {
527
        // Quick cache, valid only for the same request.
528
        static $topEditors = null;
529
        if ($topEditors !== null) {
530
            return $topEditors;
531
        }
532
533
        $rows = $this->getRepository()->getTopEditorsByEditCount(
534
            $this->page,
535
            $this->start,
536
            $this->end,
537
            min($limit, 1000),
538
            $noBots
539
        );
540
541
        $topEditors = [];
542
        $rank = 0;
543
        foreach ($rows as $row) {
544
            $topEditors[] = [
545
                'rank' => ++$rank,
546
                'username' => $row['username'],
547
                'count' => $row['count'],
548
                'minor' => $row['minor'],
549
                'first_edit' => [
550
                    'id' => $row['first_revid'],
551
                    'timestamp' => $row['first_timestamp'],
552
                ],
553
                'latest_edit' => [
554
                    'id' => $row['latest_revid'],
555
                    'timestamp' => $row['latest_timestamp'],
556
                ],
557
            ];
558
        }
559
560
        return $topEditors;
561
    }
562
563
    /**
564
     * Get the first edit to the page.
565
     * @return Edit
566
     */
567 1
    public function getFirstEdit()
568
    {
569 1
        return $this->firstEdit;
570
    }
571
572
    /**
573
     * Get the last edit to the page.
574
     * @return Edit
575
     */
576 1
    public function getLastEdit()
577
    {
578 1
        return $this->lastEdit;
579
    }
580
581
    /**
582
     * Get the edit that made the largest addition to the page (by number of bytes).
583
     * @return Edit
584
     */
585 1
    public function getMaxAddition()
586
    {
587 1
        return $this->maxAddition;
588
    }
589
590
    /**
591
     * Get the edit that made the largest removal to the page (by number of bytes).
592
     * @return Edit
593
     */
594 1
    public function getMaxDeletion()
595
    {
596 1
        return $this->maxDeletion;
597
    }
598
599
    /**
600
     * Get the list of editors to the page, including various statistics.
601
     * @return mixed[]
602
     */
603 1
    public function getEditors()
604
    {
605 1
        return $this->editors;
606
    }
607
608
    /**
609
     * Get the list of the top editors to the page (by edits), including various statistics.
610
     * @return mixed[]
611
     */
612 1
    public function topTenEditorsByEdits()
613
    {
614 1
        return $this->topTenEditorsByEdits;
615
    }
616
617
    /**
618
     * Get the list of the top editors to the page (by added text), including various statistics.
619
     * @return mixed[]
620
     */
621 1
    public function topTenEditorsByAdded()
622
    {
623 1
        return $this->topTenEditorsByAdded;
624
    }
625
626
    /**
627
     * Get various counts about each individual year and month of the page's history.
628
     * @return mixed[]
629
     */
630 2
    public function getYearMonthCounts()
631
    {
632 2
        return $this->yearMonthCounts;
633
    }
634
635
    /**
636
     * Get the localized labels for the 'Year counts' chart.
637
     * @return string[]
638
     */
639
    public function getYearLabels()
640
    {
641
        return $this->yearLabels;
642
    }
643
644
    /**
645
     * Get the localized labels for the 'Month counts' chart.
646
     * @return string[]
647
     */
648
    public function getMonthLabels()
649
    {
650
        return $this->monthLabels;
651
    }
652
653
    /**
654
     * Get the maximum number of edits that were created across all months. This is used as a
655
     * comparison for the bar charts in the months section.
656
     * @return int
657
     */
658 1
    public function getMaxEditsPerMonth()
659
    {
660 1
        return $this->maxEditsPerMonth;
661
    }
662
663
    /**
664
     * Get a list of (semi-)automated tools that were used to edit the page, including
665
     * the number of times they were used, and a link to the tool's homepage.
666
     * @return string[]
667
     */
668 1
    public function getTools()
669
    {
670 1
        return $this->tools;
671
    }
672
673
    /**
674
     * Get the list of page's wikidata and Checkwiki errors.
675
     * @see Page::getErrors()
676
     * @return string[]
677
     */
678
    public function getBugs()
679
    {
680
        if (!is_array($this->bugs)) {
0 ignored issues
show
introduced by
The condition is_array($this->bugs) is always true.
Loading history...
681
            $this->bugs = $this->page->getErrors();
682
        }
683
        return $this->bugs;
684
    }
685
686
    /**
687
     * Get the number of wikidata nad CheckWiki errors.
688
     * @return int
689
     */
690
    public function numBugs()
691
    {
692
        return count($this->getBugs());
693
    }
694
695
    /**
696
     * Get the number of external links on the page.
697
     * @return int
698
     */
699 1
    public function linksExtCount()
700
    {
701 1
        return $this->getLinksAndRedirects()['links_ext_count'];
702
    }
703
704
    /**
705
     * Get the number of incoming links to the page.
706
     * @return int
707
     */
708 1
    public function linksInCount()
709
    {
710 1
        return $this->getLinksAndRedirects()['links_in_count'];
711
    }
712
713
    /**
714
     * Get the number of outgoing links from the page.
715
     * @return int
716
     */
717 1
    public function linksOutCount()
718
    {
719 1
        return $this->getLinksAndRedirects()['links_out_count'];
720
    }
721
722
    /**
723
     * Get the number of redirects to the page.
724
     * @return int
725
     */
726 1
    public function redirectsCount()
727
    {
728 1
        return $this->getLinksAndRedirects()['redirects_count'];
729
    }
730
731
    /**
732
     * Get the number of external, incoming and outgoing links, along with
733
     * the number of redirects to the page.
734
     * @return int[]
735
     * @codeCoverageIgnore
736
     */
737
    private function getLinksAndRedirects()
738
    {
739
        if (!is_array($this->linksAndRedirects)) {
0 ignored issues
show
introduced by
The condition is_array($this->linksAndRedirects) is always true.
Loading history...
740
            $this->linksAndRedirects = $this->page->countLinksAndRedirects();
741
        }
742
        return $this->linksAndRedirects;
743
    }
744
745
    /**
746
     * Parse the revision history, collecting our core statistics.
747
     *
748
     * Untestable because it relies on getting a PDO statement. All the important
749
     * logic lives in other methods which are tested.
750
     * @codeCoverageIgnore
751
     */
752
    private function parseHistory()
753
    {
754
        if ($this->tooManyRevisions()) {
755
            $limit = $this->getMaxRevisions();
756
        } else {
757
            $limit = null;
758
        }
759
760
        // Third parameter is ignored if $limit is null.
761
        $revStmt = $this->page->getRevisionsStmt(
762
            null,
763
            $limit,
764
            $this->getNumRevisions(),
765
            $this->start,
0 ignored issues
show
Bug introduced by
It seems like $this->start can also be of type string; however, parameter $start of Xtools\Page::getRevisionsStmt() does only seem to accept integer|false, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

765
            /** @scrutinizer ignore-type */ $this->start,
Loading history...
766
            $this->end
0 ignored issues
show
Bug introduced by
It seems like $this->end can also be of type string; however, parameter $end of Xtools\Page::getRevisionsStmt() does only seem to accept integer|false, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

766
            /** @scrutinizer ignore-type */ $this->end
Loading history...
767
        );
768
        $revCount = 0;
769
770
        /**
771
         * Data about previous edits so that we can use them as a basis for comparison.
772
         * @var Edit[]
773
         */
774
        $prevEdits = [
775
            // The previous Edit, used to discount content that was reverted.
776
            'prev' => null,
777
778
            // The SHA-1 of the edit *before* the previous edit. Used for more
779
            // accurate revert detection.
780
            'prevSha' => null,
781
782
            // The last edit deemed to be the max addition of content. This is kept track of
783
            // in case we find out the next edit was reverted (and was also a max edit),
784
            // in which case we'll want to discount it and use this one instead.
785
            'maxAddition' => null,
786
787
            // Same as with maxAddition, except the maximum amount of content deleted.
788
            // This is used to discount content that was reverted.
789
            'maxDeletion' => null,
790
        ];
791
792
        while ($rev = $revStmt->fetch()) {
793
            $edit = new Edit($this->page, $rev);
794
795
            if ($revCount === 0) {
796
                $this->firstEdit = $edit;
797
            }
798
799
            // Sometimes, with old revisions (2001 era), the revisions from 2002 come before 2001
800
            if ($edit->getTimestamp() < $this->firstEdit->getTimestamp()) {
801
                $this->firstEdit = $edit;
802
            }
803
804
            $prevEdits = $this->updateCounts($edit, $prevEdits);
805
806
            $revCount++;
807
        }
808
809
        $this->numRevisionsProcessed = $revCount;
810
811
        // Various sorts
812
        arsort($this->editors);
813
        ksort($this->yearMonthCounts);
814
        if ($this->tools) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->tools of type string[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
815
            arsort($this->tools);
816
        }
817
    }
818
819
    /**
820
     * Update various counts based on the current edit.
821
     * @param  Edit   $edit
822
     * @param  Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'
823
     * @return Edit[] Updated version of $prevEdits.
824
     */
825 4
    private function updateCounts(Edit $edit, $prevEdits)
826
    {
827
        // Update the counts for the year and month of the current edit.
828 4
        $this->updateYearMonthCounts($edit);
829
830
        // Update counts for the user who made the edit.
831 4
        $this->updateUserCounts($edit);
832
833
        // Update the year/month/user counts of anon and minor edits.
834 4
        $this->updateAnonMinorCounts($edit);
835
836
        // Update counts for automated tool usage, if applicable.
837 4
        $this->updateToolCounts($edit);
838
839
        // Increment "edits per <time>" counts
840 4
        $this->updateCountHistory($edit);
841
842
        // Update figures regarding content addition/removal, and the revert count.
843 4
        $prevEdits = $this->updateContentSizes($edit, $prevEdits);
844
845
        // Now that we've updated all the counts, we can reset
846
        // the prev and last edits, which are used for tracking.
847
        // But first, let's copy over the SHA of the actual previous edit
848
        // and put it in our $prevEdits['prev'], so that we'll know
849
        // that content added after $prevEdit['prev'] was reverted.
850 4
        if ($prevEdits['prev'] !== null) {
851 4
            $prevEdits['prevSha'] = $prevEdits['prev']->getSha();
852
        }
853 4
        $prevEdits['prev'] = $edit;
854 4
        $this->lastEdit = $edit;
855
856 4
        return $prevEdits;
857
    }
858
859
    /**
860
     * Update various figures about content sizes based on the given edit.
861
     * @param Edit $edit
862
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
863
     * @return Edit[] Updated version of $prevEdits.
864
     */
865 4
    private function updateContentSizes(Edit $edit, $prevEdits)
866
    {
867
        // Check if it was a revert
868 4
        if ($this->isRevert($prevEdits, $edit)) {
869 4
            return $this->updateContentSizesRevert($prevEdits);
870
        } else {
871 4
            return $this->updateContentSizesNonRevert($edit, $prevEdits);
872
        }
873
    }
874
875
    /**
876
     * Is the given Edit a revert?
877
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
878
     * @param Edit $edit
879
     * @return bool
880
     */
881 4
    private function isRevert($prevEdits, $edit)
882
    {
883 4
        return $edit->getSha() === $prevEdits['prevSha'] || $edit->isRevert($this->container);
884
    }
885
886
    /**
887
     * Updates the figures on content sizes assuming the given edit was a revert of the previous one.
888
     * In such a case, we don't want to treat the previous edit as legit content addition or removal.
889
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
890
     * @return Edit[] Updated version of $prevEdits, for tracking.
891
     */
892 4
    private function updateContentSizesRevert($prevEdits)
893
    {
894 4
        $this->revertCount++;
895
896
        // Adjust addedBytes given this edit was a revert of the previous one.
897 4
        if ($prevEdits['prev'] && $prevEdits['prev']->getSize() > 0) {
898
            $this->addedBytes -= $prevEdits['prev']->getSize();
899
900
            // Also deduct from the user's individual added byte count.
901
            $username = $prevEdits['prev']->getUser()->getUsername();
902
            $this->editors[$username]['added'] -= $prevEdits['prev']->getSize();
903
        }
904
905
        // @TODO: Test this against an edit war (use your sandbox).
906
        // Also remove as max added or deleted, if applicable.
907 4
        if ($this->maxAddition && $prevEdits['prev']->getId() === $this->maxAddition->getId()) {
908
            $this->maxAddition = $prevEdits['maxAddition'];
909
            $prevEdits['maxAddition'] = $prevEdits['prev']; // In the event of edit wars.
910 4
        } elseif ($this->maxDeletion && $prevEdits['prev']->getId() === $this->maxDeletion->getId()) {
911 4
            $this->maxDeletion = $prevEdits['maxDeletion'];
912 4
            $prevEdits['maxDeletion'] = $prevEdits['prev']; // In the event of edit wars.
913
        }
914
915 4
        return $prevEdits;
916
    }
917
918
    /**
919
     * Updates the figures on content sizes assuming the given edit
920
     * was NOT a revert of the previous edit.
921
     * @param Edit $edit
922
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
923
     * @return Edit[] Updated version of $prevEdits, for tracking.
924
     */
925 4
    private function updateContentSizesNonRevert(Edit $edit, $prevEdits)
926
    {
927 4
        $editSize = $this->getEditSize($edit, $prevEdits);
928
929
        // Edit was not a revert, so treat size > 0 as content added.
930 4
        if ($editSize > 0) {
931 4
            $this->addedBytes += $editSize;
932 4
            $this->editors[$edit->getUser()->getUsername()]['added'] += $editSize;
933
934
            // Keep track of edit with max addition.
935 4
            if (!$this->maxAddition || $editSize > $this->maxAddition->getSize()) {
936
                // Keep track of old maxAddition in case we find out the next $edit was reverted
937
                // (and was also a max edit), in which case we'll want to use this one ($edit).
938 4
                $prevEdits['maxAddition'] = $this->maxAddition;
939
940 4
                $this->maxAddition = $edit;
941
            }
942 4
        } elseif ($editSize < 0 && (!$this->maxDeletion || $editSize < $this->maxDeletion->getSize())) {
943
            // Keep track of old maxDeletion in case we find out the next edit was reverted
944
            // (and was also a max deletion), in which case we'll want to use this one.
945 4
            $prevEdits['maxDeletion'] = $this->maxDeletion;
946
947 4
            $this->maxDeletion = $edit;
948
        }
949
950 4
        return $prevEdits;
951
    }
952
953
    /**
954
     * Get the size of the given edit, based on the previous edit (if present).
955
     * We also don't return the actual edit size if last revision had a length of null.
956
     * This happens when the edit follows other edits that were revision-deleted.
957
     * @see T148857 for more information.
958
     * @todo Remove once T101631 is resolved.
959
     * @param Edit $edit
960
     * @param Edit[] $prevEdits With 'prev', 'prevSha', 'maxAddition' and 'maxDeletion'.
961
     * @return int
962
     */
963 4
    private function getEditSize(Edit $edit, $prevEdits)
964
    {
965 4
        if ($prevEdits['prev'] && $prevEdits['prev']->getLength() === null) {
0 ignored issues
show
introduced by
The condition $prevEdits['prev']->getLength() === null is always false.
Loading history...
966
            return 0;
967
        } else {
968 4
            return $edit->getSize();
969
        }
970
    }
971
972
    /**
973
     * Update counts of automated tool usage for the given edit.
974
     * @param Edit $edit
975
     */
976 4
    private function updateToolCounts(Edit $edit)
977
    {
978 4
        $automatedTool = $edit->getTool($this->container);
979
980 4
        if ($automatedTool === false) {
981
            // Nothing to do.
982 4
            return;
983
        }
984
985 4
        $editYear = $edit->getYear();
986 4
        $editMonth = $edit->getMonth();
987
988 4
        $this->automatedCount++;
989 4
        $this->yearMonthCounts[$editYear]['automated']++;
990 4
        $this->yearMonthCounts[$editYear]['months'][$editMonth]['automated']++;
991
992 4
        if (!isset($this->tools[$automatedTool['name']])) {
993 4
            $this->tools[$automatedTool['name']] = [
994 4
                'count' => 1,
995 4
                'link' => $automatedTool['link'],
996
            ];
997
        } else {
998
            $this->tools[$automatedTool['name']]['count']++;
999
        }
1000 4
    }
1001
1002
    /**
1003
     * Update various counts for the year and month of the given edit.
1004
     * @param Edit $edit
1005
     */
1006 4
    private function updateYearMonthCounts(Edit $edit)
1007
    {
1008 4
        $editYear = $edit->getYear();
1009 4
        $editMonth = $edit->getMonth();
1010
1011
        // Fill in the blank arrays for the year and 12 months if needed.
1012 4
        if (!isset($this->yearMonthCounts[$editYear])) {
1013 4
            $this->addYearMonthCountEntry($edit);
1014
        }
1015
1016
        // Increment year and month counts for all edits
1017 4
        $this->yearMonthCounts[$editYear]['all']++;
1018 4
        $this->yearMonthCounts[$editYear]['months'][$editMonth]['all']++;
1019
        // This will ultimately be the size of the page by the end of the year
1020 4
        $this->yearMonthCounts[$editYear]['size'] = (int) $edit->getLength();
1021
1022
        // Keep track of which month had the most edits
1023 4
        $editsThisMonth = $this->yearMonthCounts[$editYear]['months'][$editMonth]['all'];
1024 4
        if ($editsThisMonth > $this->maxEditsPerMonth) {
1025 4
            $this->maxEditsPerMonth = $editsThisMonth;
1026
        }
1027 4
    }
1028
1029
    /**
1030
     * Add a new entry to $this->yearMonthCounts for the given year,
1031
     * with blank values for each month. This called during self::parseHistory().
1032
     * @param Edit $edit
1033
     */
1034 4
    private function addYearMonthCountEntry(Edit $edit)
1035
    {
1036 4
        $this->yearLabels[] = $this->i18n->dateFormat($edit->getTimestamp(), 'yyyy');
1037 4
        $editYear = $edit->getYear();
1038
1039
        // Beginning of the month at 00:00:00.
1040 4
        $firstEditTime = mktime(0, 0, 0, (int) $this->firstEdit->getMonth(), 1, $this->firstEdit->getYear());
0 ignored issues
show
Bug introduced by
$this->firstEdit->getYear() of type string is incompatible with the type integer expected by parameter $year of mktime(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1040
        $firstEditTime = mktime(0, 0, 0, (int) $this->firstEdit->getMonth(), 1, /** @scrutinizer ignore-type */ $this->firstEdit->getYear());
Loading history...
1041
1042 4
        $this->yearMonthCounts[$editYear] = [
1043
            'all' => 0,
1044
            'minor' => 0,
1045
            'anon' => 0,
1046
            'automated' => 0,
1047
            'size' => 0, // Keep track of the size by the end of the year.
1048
            'events' => [],
1049
            'months' => [],
1050
        ];
1051
1052 4
        for ($i = 1; $i <= 12; $i++) {
1053 4
            $timeObj = mktime(0, 0, 0, $i, 1, $editYear);
1054
1055
            // Don't show zeros for months before the first edit or after the current month.
1056 4
            if ($timeObj < $firstEditTime || $timeObj > $this->getLastDay()) {
1057 4
                continue;
1058
            }
1059
1060 4
            $this->monthLabels[] = $this->i18n->dateFormat($timeObj, 'yyyy-MM');
1061 4
            $this->yearMonthCounts[$editYear]['months'][sprintf('%02d', $i)] = [
1062
                'all' => 0,
1063
                'minor' => 0,
1064
                'anon' => 0,
1065
                'automated' => 0,
1066
            ];
1067
        }
1068 4
    }
1069
1070
    /**
1071
     * Update the counts of anon and minor edits for year, month,
1072
     * and user of the given edit.
1073
     * @param Edit $edit
1074
     */
1075 4
    private function updateAnonMinorCounts(Edit $edit)
1076
    {
1077 4
        $editYear = $edit->getYear();
1078 4
        $editMonth = $edit->getMonth();
1079
1080
        // If anonymous, increase counts
1081 4
        if ($edit->isAnon()) {
1082 4
            $this->anonCount++;
1083 4
            $this->yearMonthCounts[$editYear]['anon']++;
1084 4
            $this->yearMonthCounts[$editYear]['months'][$editMonth]['anon']++;
1085
        }
1086
1087
        // If minor edit, increase counts
1088 4
        if ($edit->isMinor()) {
1089 4
            $this->minorCount++;
1090 4
            $this->yearMonthCounts[$editYear]['minor']++;
1091 4
            $this->yearMonthCounts[$editYear]['months'][$editMonth]['minor']++;
1092
        }
1093 4
    }
1094
1095
    /**
1096
     * Update various counts for the user of the given edit.
1097
     * @param Edit $edit
1098
     */
1099 4
    private function updateUserCounts(Edit $edit)
1100
    {
1101 4
        $username = $edit->getUser()->getUsername();
1102
1103
        // Initialize various user stats if needed.
1104 4
        if (!isset($this->editors[$username])) {
1105 4
            $this->editors[$username] = [
1106 4
                'all' => 0,
1107 4
                'minor' => 0,
1108 4
                'minorPercentage' => 0,
1109 4
                'first' => $edit->getTimestamp(),
1110 4
                'firstId' => $edit->getId(),
1111
                'last' => null,
1112
                'atbe' => null,
1113 4
                'added' => 0,
1114
            ];
1115
        }
1116
1117
        // Increment user counts
1118 4
        $this->editors[$username]['all']++;
1119 4
        $this->editors[$username]['last'] = $edit->getTimestamp();
1120 4
        $this->editors[$username]['lastId'] = $edit->getId();
1121
1122
        // Increment minor counts for this user
1123 4
        if ($edit->isMinor()) {
1124 4
            $this->editors[$username]['minor']++;
1125
        }
1126 4
    }
1127
1128
    /**
1129
     * Increment "edits per <time>" counts based on the given edit.
1130
     * @param Edit $edit
1131
     */
1132 4
    private function updateCountHistory(Edit $edit)
1133
    {
1134 4
        $editTimestamp = $edit->getTimestamp();
1135
1136 4
        if ($editTimestamp > new DateTime('-1 day')) {
1137
            $this->countHistory['day']++;
1138
        }
1139 4
        if ($editTimestamp > new DateTime('-1 week')) {
1140
            $this->countHistory['week']++;
1141
        }
1142 4
        if ($editTimestamp > new DateTime('-1 month')) {
1143
            $this->countHistory['month']++;
1144
        }
1145 4
        if ($editTimestamp > new DateTime('-1 year')) {
1146
            $this->countHistory['year']++;
1147
        }
1148 4
    }
1149
1150
    /**
1151
     * Get info about bots that edited the page.
1152
     * @return mixed[] Contains the bot's username, edit count to the page,
1153
     *   and whether or not they are currently a bot.
1154
     */
1155 1
    public function getBots()
1156
    {
1157 1
        return $this->bots;
1158
    }
1159
1160
    /**
1161
     * Set info about bots that edited the page. This is done as a private setter
1162
     * because we need this information when computing the top 10 editors,
1163
     * where we don't want to include bots.
1164
     */
1165
    private function setBots()
1166
    {
1167
        // Parse the bot edits.
1168
        $bots = [];
1169
        $botData = $this->getRepository()->getBotData($this->page, $this->start, $this->end);
1170
        while ($bot = $botData->fetch()) {
1171
            $bots[$bot['username']] = [
1172
                'count' => (int) $bot['count'],
1173
                'current' => $bot['current'] === 'bot',
1174
            ];
1175
        }
1176
1177
        // Sort by edit count.
1178
        uasort($bots, function ($a, $b) {
1179
            return $b['count'] - $a['count'];
1180
        });
1181
1182
        $this->bots = $bots;
1183
    }
1184
1185
    /**
1186
     * Number of edits made to the page by current or former bots.
1187
     * @param string[] $bots Used only in unit tests, where we
1188
     *   supply mock data for the bots that will get processed.
1189
     * @return int
1190
     */
1191 2
    public function getBotRevisionCount($bots = null)
1192
    {
1193 2
        if (isset($this->botRevisionCount)) {
1194
            return $this->botRevisionCount;
1195
        }
1196
1197 2
        if ($bots === null) {
1198 1
            $bots = $this->getBots();
1199
        }
1200
1201 2
        $count = 0;
1202
1203 2
        foreach ($bots as $username => $data) {
1204 2
            $count += $data['count'];
1205
        }
1206
1207 2
        $this->botRevisionCount = $count;
1208 2
        return $count;
1209
    }
1210
1211
    /**
1212
     * Query for log events during each year of the article's history,
1213
     *   and set the results in $this->yearMonthCounts.
1214
     */
1215 1
    private function setLogsEvents()
1216
    {
1217 1
        $logData = $this->getRepository()->getLogEvents(
1218 1
            $this->page,
1219 1
            $this->start,
1220 1
            $this->end
1221
        );
1222
1223 1
        foreach ($logData as $event) {
1224 1
            $time = strtotime($event['timestamp']);
1225 1
            $year = date('Y', $time);
1226
1227 1
            if (!isset($this->yearMonthCounts[$year])) {
1228
                break;
1229
            }
1230
1231 1
            $yearEvents = $this->yearMonthCounts[$year]['events'];
1232
1233
            // Convert log type value to i18n key.
1234 1
            switch ($event['log_type']) {
1235 1
                case 'protect':
1236 1
                    $action = 'protections';
1237 1
                    break;
1238 1
                case 'delete':
1239 1
                    $action = 'deletions';
1240 1
                    break;
1241
                case 'move':
1242
                    $action = 'moves';
1243
                    break;
1244
                // count pending-changes protections along with normal protections.
1245
                case 'stable':
1246
                    $action = 'protections';
1247
                    break;
1248
            }
1249
1250 1
            if (empty($yearEvents[$action])) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $action does not seem to be defined for all execution paths leading up to this point.
Loading history...
1251 1
                $yearEvents[$action] = 1;
1252
            } else {
1253
                $yearEvents[$action]++;
1254
            }
1255
1256 1
            $this->yearMonthCounts[$year]['events'] = $yearEvents;
1257
        }
1258 1
    }
1259
1260
    /**
1261
     * Set statistics about the top 10 editors by added text and number of edits.
1262
     * This is ran *after* parseHistory() since we need the grand totals first.
1263
     * Various stats are also set for each editor in $this->editors to be used in the charts.
1264
     */
1265 4
    private function doPostPrecessing()
1266
    {
1267 4
        $topTenCount = $counter = 0;
1268 4
        $topTenEditorsByEdits = [];
1269
1270 4
        foreach ($this->editors as $editor => $info) {
1271
            // Count how many users are in the top 10% by number of edits, excluding bots.
1272 4
            if ($counter < 10 && !in_array($editor, array_keys($this->bots))) {
1273 4
                $topTenCount += $info['all'];
1274 4
                $counter++;
1275
1276
                // To be used in the Top Ten charts.
1277 4
                $topTenEditorsByEdits[] = [
1278 4
                    'label' => $editor,
1279 4
                    'value' => $info['all'],
1280
                ];
1281
            }
1282
1283
            // Compute the percentage of minor edits the user made.
1284 4
            $this->editors[$editor]['minorPercentage'] = $info['all']
1285 4
                ? ($info['minor'] / $info['all']) * 100
1286
                : 0;
1287
1288 4
            if ($info['all'] > 1) {
1289
                // Number of seconds/days between first and last edit.
1290 4
                $secs = $info['last']->getTimestamp() - $info['first']->getTimestamp();
1291 4
                $days = $secs / (60 * 60 * 24);
1292
1293
                // Average time between edits (in days).
1294 4
                $this->editors[$editor]['atbe'] = $days / $info['all'];
1295
            }
1296
        }
1297
1298
        // Loop through again and add percentages.
1299
        $this->topTenEditorsByEdits = array_map(function ($editor) use ($topTenCount) {
1300 4
            $editor['percentage'] = 100 * ($editor['value'] / $topTenCount);
1301 4
            return $editor;
1302 4
        }, $topTenEditorsByEdits);
1303
1304 4
        $this->topTenEditorsByAdded = $this->getTopTenByAdded();
1305
1306 4
        $this->topTenCount = $topTenCount;
1307 4
    }
1308
1309
    /**
1310
     * Get the top ten editors by added text.
1311
     * @return array With keys 'label', 'value' and 'percentage', ready to be used by the pieChart Twig helper.
1312
     */
1313 4
    private function getTopTenByAdded()
1314
    {
1315
        // First sort editors array by the amount of text they added.
1316 4
        $topTenEditorsByAdded = $this->editors;
1317
        uasort($topTenEditorsByAdded, function ($a, $b) {
1318 4
            if ($a['added'] === $b['added']) {
1319 4
                return 0;
1320
            }
1321 4
            return $a['added'] > $b['added'] ? -1 : 1;
1322 4
        });
1323
1324
        // Slice to the top 10.
1325 4
        $topTenEditorsByAdded = array_keys(array_slice($topTenEditorsByAdded, 0, 10, true));
1326
1327
        // Get the sum of added text so that we can add in percentages.
1328
        $topTenTotalAdded = array_sum(array_map(function ($editor) {
1329 4
            return $this->editors[$editor]['added'];
1330 4
        }, $topTenEditorsByAdded));
1331
1332
        // Then build a new array of top 10 editors by added text in the data structure needed for the chart.
1333
        return array_map(function ($editor) use ($topTenTotalAdded) {
1334 4
            $added = $this->editors[$editor]['added'];
1335
            return [
1336 4
                'label' => $editor,
1337 4
                'value' => $added,
1338
                'percentage' => (
1339 4
                    100 * ($added / $topTenTotalAdded)
1340
                )
1341
            ];
1342 4
        }, $topTenEditorsByAdded);
1343
    }
1344
1345
    /**
1346
     * Get authorship attribution from the WikiWho API.
1347
     * @see https://f-squared.org/wikiwho/
1348
     * @param int $limit Max number of results.
1349
     * @return array
1350
     */
1351 1
    public function getTextshares($limit = null)
1352
    {
1353 1
        if (isset($this->textshares)) {
1354
            return $this->textshares;
1355
        }
1356
1357
        // TODO: check for failures. Should have a success:true
1358 1
        $ret = $this->getRepository()->getTextshares($this->page);
1359
1360
        // If revision can't be found, return error message.
1361 1
        if (!isset($ret['revisions'][0])) {
1362
            return [
1363
                'error' => isset($ret['Error']) ? $ret['Error'] : 'Unknown'
1364
            ];
1365
        }
1366
1367 1
        $revId = array_keys($ret['revisions'][0])[0];
1368 1
        $tokens = $ret['revisions'][0][$revId]['tokens'];
1369
1370 1
        list($counts, $totalCount, $userIds) = $this->countTokens($tokens);
1371 1
        $usernameMap = $this->getUsernameMap($userIds);
1372
1373 1
        if ($limit !== null) {
1374 1
            $countsToProcess = array_slice($counts, 0, $limit, true);
1375
        } else {
1376
            $countsToProcess = $counts;
1377
        }
1378
1379 1
        $textshares = [];
1380
1381
        // Loop through once more, creating an array with the user names (or IP address)
1382
        // as the key, and the count and percentage as the value.
1383 1
        foreach ($countsToProcess as $editor => $count) {
1384 1
            if (isset($usernameMap[$editor])) {
1385 1
                $index = $usernameMap[$editor];
1386
            } else {
1387 1
                $index = $editor;
1388
            }
1389 1
            $textshares[$index] = [
1390 1
                'count' => $count,
1391 1
                'percentage' => round(100 * ($count / $totalCount), 1)
1392
            ];
1393
        }
1394
1395 1
        $this->textshares = [
1396 1
            'list' => $textshares,
1397 1
            'totalAuthors' => count($counts),
1398 1
            'totalCount' => $totalCount,
1399
        ];
1400
1401 1
        return $this->textshares;
1402
    }
1403
1404
    /**
1405
     * Get a map of user IDs to usernames, given the IDs.
1406
     * @param int[] $userIds
1407
     * @return array IDs as keys, usernames as values.
1408
     */
1409 1
    private function getUsernameMap($userIds)
1410
    {
1411 1
        $userIdsNames = $this->getRepository()->getUsernamesFromIds(
1412 1
            $this->page->getProject(),
1413 1
            $userIds
1414
        );
1415
1416 1
        $usernameMap = [];
1417 1
        foreach ($userIdsNames as $userIdName) {
1418 1
            $usernameMap[$userIdName['user_id']] = $userIdName['user_name'];
1419
        }
1420
1421 1
        return $usernameMap;
1422
    }
1423
1424
    /**
1425
     * Get counts of token lengths for each author. Used in self::getTextshares()
1426
     * @param array $tokens
1427
     * @return array [counts by user, total count, IDs of accounts]
1428
     */
1429 1
    private function countTokens($tokens)
1430
    {
1431 1
        $counts = [];
1432 1
        $userIds = [];
1433 1
        $totalCount = 0;
1434
1435
        // Loop through the tokens, keeping totals (token length) for each author.
1436 1
        foreach ($tokens as $token) {
1437 1
            $editor = $token['editor'];
1438
1439
            // IPs are prefixed with '0|', otherwise it's the user ID.
1440 1
            if (substr($editor, 0, 2) === '0|') {
1441 1
                $editor = substr($editor, 2);
1442
            } else {
1443 1
                $userIds[] = $editor;
1444
            }
1445
1446 1
            if (!isset($counts[$editor])) {
1447 1
                $counts[$editor] = 0;
1448
            }
1449
1450 1
            $counts[$editor] += strlen($token['str']);
1451 1
            $totalCount += strlen($token['str']);
1452
        }
1453
1454
        // Sort authors by count.
1455 1
        arsort($counts);
1456
1457 1
        return [$counts, $totalCount, $userIds];
1458
    }
1459
1460
    /**
1461
     * Get a list of wikis supported by WikiWho.
1462
     * @return string[]
1463
     * @codeCoverageIgnore
1464
     */
1465
    public function getTextshareWikis()
1466
    {
1467
        return self::TEXTSHARE_WIKIS;
1468
    }
1469
1470
    /**
1471
     * Get prose and reference information.
1472
     * @return array With keys 'characters', 'words', 'references', 'unique_references'
1473
     */
1474 1
    public function getProseStats()
1475
    {
1476 1
        $datetime = $this->end !== false ? new DateTime('@'.$this->end) : null;
0 ignored issues
show
Bug introduced by
Are you sure $this->end of type integer|string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1476
        $datetime = $this->end !== false ? new DateTime('@'./** @scrutinizer ignore-type */ $this->end) : null;
Loading history...
1477 1
        $html = $this->page->getHTMLContent($datetime);
1478
1479 1
        $crawler = new Crawler($html);
1480
1481 1
        list($chars, $words) = $this->countCharsAndWords($crawler, '#mw-content-text p');
1482
1483 1
        $refs = $crawler->filter('#mw-content-text .reference');
1484 1
        $refContent = [];
1485
        $refs->each(function ($ref) use (&$refContent) {
1486 1
            $refContent[] = $ref->text();
1487 1
        });
1488 1
        $uniqueRefs = count(array_unique($refContent));
1489
1490 1
        $sections = count($crawler->filter('#mw-content-text .mw-headline'));
1491
1492
        return [
1493 1
            'characters' => $chars,
1494 1
            'words' => $words,
1495 1
            'references' => $refs->count(),
1496 1
            'unique_references' => $uniqueRefs,
1497 1
            'sections' => $sections,
1498
        ];
1499
    }
1500
1501
    /**
1502
     * Count the number of characters and words of the plain text
1503
     * within the DOM element matched by the given selector.
1504
     * @param Crawler $crawler
1505
     * @param string $selector HTML selector.
1506
     * @return array [num chars, num words]
1507
     */
1508 1
    private function countCharsAndWords($crawler, $selector)
1509
    {
1510 1
        $totalChars = 0;
1511 1
        $totalWords = 0;
1512 1
        $paragraphs = $crawler->filter($selector);
1513 1
        $paragraphs->each(function ($node) use (&$totalChars, &$totalWords) {
1514 1
            $text = preg_replace('/\[\d+\]/', '', trim($node->text()));
1515 1
            $totalChars += strlen($text);
1516 1
            $totalWords += count(explode(' ', $text));
1517 1
        });
1518
1519 1
        return [$totalChars, $totalWords];
1520
    }
1521
1522
    /**
1523
     * Fetch transclusion data (categories, templates and files)
1524
     * that are on the page.
1525
     * @return array With keys 'categories', 'templates' and 'files'.
1526
     */
1527 1
    private function getTransclusionData()
1528
    {
1529 1
        if (!is_array($this->transclusionData)) {
0 ignored issues
show
introduced by
The condition is_array($this->transclusionData) is always true.
Loading history...
1530 1
            $this->transclusionData = $this->getRepository()
1531 1
                ->getTransclusionData($this->page);
1532
        }
1533 1
        return $this->transclusionData;
1534
    }
1535
1536
    /**
1537
     * Get the number of categories that are on the page.
1538
     * @return int
1539
     */
1540 1
    public function getNumCategories()
1541
    {
1542 1
        return $this->getTransclusionData()['categories'];
1543
    }
1544
1545
    /**
1546
     * Get the number of templates that are on the page.
1547
     * @return int
1548
     */
1549 1
    public function getNumTemplates()
1550
    {
1551 1
        return $this->getTransclusionData()['templates'];
1552
    }
1553
1554
    /**
1555
     * Get the number of files that are on the page.
1556
     * @return int
1557
     */
1558 1
    public function getNumFiles()
1559
    {
1560 1
        return $this->getTransclusionData()['files'];
1561
    }
1562
}
1563