Passed
Push — master ( 6b32d9...9f4747 )
by MusikAnimal
02:02
created

ArticleInfo::updateAnonMinorCounts()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 17
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 11
CRAP Score 3

Importance

Changes 0
Metric Value
cc 3
eloc 10
nc 4
nop 1
dl 0
loc 17
ccs 11
cts 11
cp 1
crap 3
rs 9.4285
c 0
b 0
f 0
1
<?php
2
/**
3
 * This file contains only the ArticleInfo class.
4
 */
5
6
namespace Xtools;
7
8
use Symfony\Component\DependencyInjection\Container;
9
use DateTime;
10
11
/**
12
 * An ArticleInfo provides statistics about a page on a project. This model does not
13
 * have a separate Repository because it needs to use individual SQL statements to
14
 * traverse the page's history, saving class instance variables along the way.
15
 */
16
class ArticleInfo extends Model
17
{
18
    /** @const string[] Domain names of wikis supported by WikiWho. */
19
    const TEXTSHARE_WIKIS = [
20
        'en.wikipedia.org',
21
        'de.wikipedia.org',
22
        'eu.wikipedia.org',
23
        'tr.wikipedia.org',
24
        'es.wikipedia.org',
25
    ];
26
27
    /** @var Container The application's DI container. */
28
    protected $container;
29
30
    /** @var Page The page. */
31
    protected $page;
32
33
    /** @var false|int From what date to obtain records. */
34
    protected $startDate;
35
36
    /** @var false|int To what date to obtain records. */
37
    protected $endDate;
38
39
    /** @var int Number of revisions that belong to the page. */
40
    protected $numRevisions;
41
42
    /** @var int Maximum number of revisions to process, as configured. */
43
    protected $maxRevisions;
44
45
    /** @var int Number of revisions that were actually processed. */
46
    protected $numRevisionsProcessed;
47
48
    /**
49
     * Various statistics about editors to the page. These are not User objects
50
     * so as to preserve memory.
51
     * @var mixed[]
52
     */
53
    protected $editors;
54
55
    /** @var mixed[] The top 10 editors to the page by number of edits. */
56
    protected $topTenEditorsByEdits;
57
58
    /** @var mixed[] The top 10 editors to the page by added text. */
59
    protected $topTenEditorsByAdded;
60
61
    /** @var int Number of edits made by the top 10 editors. */
62
    protected $topTenCount;
63
64
    /** @var mixed[] Various statistics about bots that edited the page. */
65
    protected $bots;
66
67
    /** @var int Number of edits made to the page by bots. */
68
    protected $botRevisionCount;
69
70
    /** @var mixed[] Various counts about each individual year and month of the page's history. */
71
    protected $yearMonthCounts;
72
73
    /** @var Edit The first edit to the page. */
74
    protected $firstEdit;
75
76
    /** @var Edit The last edit to the page. */
77
    protected $lastEdit;
78
79
    /** @var Edit Edit that made the largest addition by number of bytes. */
80
    protected $maxAddition;
81
82
    /** @var Edit Edit that made the largest deletion by number of bytes. */
83
    protected $maxDeletion;
84
85
    /** @var int[] Number of in and outgoing links and redirects to the page. */
86
    protected $linksAndRedirects;
87
88
    /** @var string[] Assessments of the page (see Page::getAssessments). */
89
    protected $assessments;
90
91
    /**
92
     * Maximum number of edits that were created across all months. This is used as a comparison
93
     * for the bar charts in the months section.
94
     * @var int
95
     */
96
    protected $maxEditsPerMonth;
97
98
    /** @var string[] List of (semi-)automated tools that were used to edit the page. */
99
    protected $tools;
100
101
    /**
102
     * Total number of bytes added throughout the page's history. This is used as a comparison
103
     * when computing the top 10 editors by added text.
104
     * @var int
105
     */
106
    protected $addedBytes = 0;
107
108
    /** @var int Number of days between first and last edit. */
109
    protected $totalDays;
110
111
    /** @var int Number of minor edits to the page. */
112
    protected $minorCount = 0;
113
114
    /** @var int Number of anonymous edits to the page. */
115
    protected $anonCount = 0;
116
117
    /** @var int Number of automated edits to the page. */
118
    protected $automatedCount = 0;
119
120
    /** @var int Number of edits to the page that were reverted with the subsequent edit. */
121
    protected $revertCount = 0;
122
123
    /** @var int[] The "edits per <time>" counts. */
124
    protected $countHistory = [
125
        'day' => 0,
126
        'week' => 0,
127
        'month' => 0,
128
        'year' => 0
129
    ];
130
131
    /** @var string[] List of wikidata and Checkwiki errors. */
132
    protected $bugs;
133
134
    /** @var array List of editors and the percentage of the current content that they authored. */
135
    protected $textshares;
136
137
    /**
138
     * ArticleInfo constructor.
139
     * @param Page $page The page to process.
140
     * @param Container $container The DI container.
141
     * @param false|int $start From what date to obtain records.
142
     * @param false|int $end To what date to obtain records.
143
     */
144 10
    public function __construct(Page $page, Container $container, $start = false, $end = false)
145
    {
146 10
        $this->page = $page;
147 10
        $this->container = $container;
148 10
        $this->startDate = $start;
149 10
        $this->endDate = $end;
150 10
    }
151
152
    /**
153
     * Get date opening date range.
154
     * @return false|int
155
     */
156
    public function getStartDate()
157
    {
158
        return $this->startDate;
159
    }
160
161
    /**
162
     * Get date closing date range.
163
     * @return false|int
164
     */
165
    public function getEndDate()
166
    {
167
        return $this->endDate;
168
    }
169
170
    /**
171
     * Has date range?
172
     * @return bool
173
     */
174
    public function hasDateRange()
175
    {
176
        return $this->startDate !== false || $this->endDate !== false;
177
    }
178
179
    /**
180
     * Shorthand to get the page's project.
181
     * @return Project
182
     * @codeCoverageIgnore
183
     */
184
    public function getProject()
185
    {
186
        return $this->page->getProject();
187
    }
188
189
    /**
190
     * Get the number of revisions belonging to the page.
191
     * @return int
192
     */
193 4
    public function getNumRevisions()
194
    {
195 4
        if (!isset($this->numRevisions)) {
196 4
            $this->numRevisions = $this->page->getNumRevisions(null, $this->startDate, $this->endDate);
197
        }
198 4
        return $this->numRevisions;
199
    }
200
201
    /**
202
     * Get the maximum number of revisions that we should process.
203
     * @return int
204
     */
205 3
    public function getMaxRevisions()
206
    {
207 3
        if (!isset($this->maxRevisions)) {
208 3
            $this->maxRevisions = (int) $this->container->getParameter('app.max_page_revisions');
209
        }
210 3
        return $this->maxRevisions;
211
    }
212
213
    /**
214
     * Get the number of revisions that are actually getting processed.
215
     * This goes by the app.max_page_revisions parameter, or the actual
216
     * number of revisions, whichever is smaller.
217
     * @return int
218
     */
219 5
    public function getNumRevisionsProcessed()
220
    {
221 5
        if (isset($this->numRevisionsProcessed)) {
222 3
            return $this->numRevisionsProcessed;
223
        }
224
225 2
        if ($this->tooManyRevisions()) {
226 1
            $this->numRevisionsProcessed = $this->getMaxRevisions();
227
        } else {
228 1
            $this->numRevisionsProcessed = $this->getNumRevisions();
229
        }
230
231 2
        return $this->numRevisionsProcessed;
232
    }
233
234
    /**
235
     * Are there more revisions than we should process, based on the config?
236
     * @return bool
237
     */
238 3
    public function tooManyRevisions()
239
    {
240 3
        return $this->getMaxRevisions() > 0 && $this->getNumRevisions() > $this->getMaxRevisions();
241
    }
242
243
    /**
244
     * Fetch and store all the data we need to show the ArticleInfo view.
245
     * @codeCoverageIgnore
246
     */
247
    public function prepareData()
248
    {
249
        $this->parseHistory();
250
        $this->setLogsEvents();
251
252
        // Bots need to be set before setting top 10 counts.
253
        $this->setBots();
254
255
        $this->setTopTenCounts();
256
    }
257
258
    /**
259
     * Get the number of editors that edited the page.
260
     * @return int
261
     */
262 1
    public function getNumEditors()
263
    {
264 1
        return count($this->editors);
265
    }
266
267
    /**
268
     * Get the number of bots that edited the page.
269
     * @return int
270
     */
271
    public function getNumBots()
272
    {
273
        return count($this->getBots());
274
    }
275
276
    /**
277
     * Get the number of days between the first and last edit.
278
     * @return int
279
     */
280 1
    public function getTotalDays()
281
    {
282 1
        if (isset($this->totalDays)) {
283 1
            return $this->totalDays;
284
        }
285 1
        $dateFirst = $this->firstEdit->getTimestamp();
286 1
        $dateLast = $this->lastEdit->getTimestamp();
287 1
        $interval = date_diff($dateLast, $dateFirst, true);
288 1
        $this->totalDays = $interval->format('%a');
0 ignored issues
show
Documentation Bug introduced by
The property $totalDays was declared of type integer, but $interval->format('%a') is of type string. Maybe add a type cast?

This check looks for assignments to scalar types that may be of the wrong type.

To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.

$answer = 42;

$correct = false;

$correct = (bool) $answer;
Loading history...
289 1
        return $this->totalDays;
290
    }
291
292
    /**
293
     * Returns length of the page.
294
     * @return int
295
     */
296
    public function getLength()
297
    {
298
        if ($this->hasDateRange()) {
299
            return $this->lastEdit->getLength();
300
        }
301
302
        return $this->page->getLength();
303
    }
304
305
    /**
306
     * Get the average number of days between edits to the page.
307
     * @return double
308
     */
309 1
    public function averageDaysPerEdit()
310
    {
311 1
        return round($this->getTotalDays() / $this->getNumRevisionsProcessed(), 1);
312
    }
313
314
    /**
315
     * Get the average number of edits per day to the page.
316
     * @return double
317
     */
318 1
    public function editsPerDay()
319
    {
320 1
        $editsPerDay = $this->getTotalDays()
321 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / (365 / 12 / 24))
322 1
            : 0;
323 1
        return round($editsPerDay, 1);
324
    }
325
326
    /**
327
     * Get the average number of edits per month to the page.
328
     * @return double
329
     */
330 1
    public function editsPerMonth()
331
    {
332 1
        $editsPerMonth = $this->getTotalDays()
333 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / (365 / 12))
334 1
            : 0;
335 1
        return min($this->getNumRevisionsProcessed(), round($editsPerMonth, 1));
336
    }
337
338
    /**
339
     * Get the average number of edits per year to the page.
340
     * @return double
341
     */
342 1
    public function editsPerYear()
343
    {
344 1
        $editsPerYear = $this->getTotalDays()
345 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / 365)
346 1
            : 0;
347 1
        return min($this->getNumRevisionsProcessed(), round($editsPerYear, 1));
348
    }
349
350
    /**
351
     * Get the average number of edits per editor.
352
     * @return double
353
     */
354 1
    public function editsPerEditor()
355
    {
356 1
        return round($this->getNumRevisionsProcessed() / count($this->editors), 1);
357
    }
358
359
    /**
360
     * Get the percentage of minor edits to the page.
361
     * @return double
362
     */
363 1
    public function minorPercentage()
364
    {
365 1
        return round(
366 1
            ($this->minorCount / $this->getNumRevisionsProcessed()) * 100,
367 1
            1
368
        );
369
    }
370
371
    /**
372
     * Get the percentage of anonymous edits to the page.
373
     * @return double
374
     */
375 1
    public function anonPercentage()
376
    {
377 1
        return round(
378 1
            ($this->anonCount / $this->getNumRevisionsProcessed()) * 100,
379 1
            1
380
        );
381
    }
382
383
    /**
384
     * Get the percentage of edits made by the top 10 editors.
385
     * @return double
386
     */
387 1
    public function topTenPercentage()
388
    {
389 1
        return round(($this->topTenCount / $this->getNumRevisionsProcessed()) * 100, 1);
390
    }
391
392
    /**
393
     * Get the number of times the page has been viewed in the given timeframe.
394
     * @param  int $latest Last N days.
395
     * @return int
396
     */
397
    public function getPageviews($latest)
398
    {
399
        if (false === $this->startDate && false === $this->endDate) {
400
            return $this->page->getLastPageviews($latest);
401
        }
402
403
        list($start, $end) = $this->translateDatesToYYYYMMDD($this->startDate, $this->endDate);
404
        list($start, $end) = $this->applyDatesDefaults($start, $end);
405
406
        return $this->page->getPageviews($start, $end);
407
    }
408
409
    /**
410
     * "Translate" dates to YYYYMMDD format.
411
     *
412
     * @param false|string $start
413
     * @param false|string $end
414
     * @return array
415
     */
416
    private function translateDatesToYYYYMMDD($start, $end)
417
    {
418
        if (false !== $start) {
419
            $start = date('Ymd', $start);
0 ignored issues
show
Bug introduced by
$start of type string is incompatible with the type integer expected by parameter $timestamp of date(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

419
            $start = date('Ymd', /** @scrutinizer ignore-type */ $start);
Loading history...
420
        }
421
        if (false !== $end) {
422
            $end = date('Ymd', $end);
423
        }
424
425
        return [$start, $end];
426
    }
427
428
    /**
429
     * Apply defaults, that is $defaultDays days back for $start and current date for $end.
430
     *
431
     * @param false|string $start
432
     * @param false|string $end
433
     * @return array
434
     */
435
    private function applyDatesDefaults($start, $end)
436
    {
437
        if (false === $start && false === $end) {
438
            // [false, false] basically
439
            return [$start, $end];
440
        }
441
442
        if (false === $start) {
443
            // Remember, YYYYMMDD format.
444
            $start = date('Ymd', 0);
445
        }
446
        if (false === $end) {
447
            $end = date('Ymd', time());
448
        }
449
450
        return [$start, $end];
451
    }
452
453
    /**
454
     * Get the page assessments of the page.
455
     * @see https://www.mediawiki.org/wiki/Extension:PageAssessments
456
     * @return string[]|false False if unsupported.
457
     * @codeCoverageIgnore
458
     */
459
    public function getAssessments()
460
    {
461
        if (!is_array($this->assessments)) {
462
            $this->assessments = $this->page->getAssessments();
463
        }
464
        return $this->assessments;
465
    }
466
467
    /**
468
     * Get the number of automated edits made to the page.
469
     * @return int
470
     */
471 1
    public function getAutomatedCount()
472
    {
473 1
        return $this->automatedCount;
474
    }
475
476
    /**
477
     * Get the number of edits to the page that were reverted with the subsequent edit.
478
     * @return int
479
     */
480 1
    public function getRevertCount()
481
    {
482 1
        return $this->revertCount;
483
    }
484
485
    /**
486
     * Get the number of edits to the page made by logged out users.
487
     * @return int
488
     */
489 1
    public function getAnonCount()
490
    {
491 1
        return $this->anonCount;
492
    }
493
494
    /**
495
     * Get the number of minor edits to the page.
496
     * @return int
497
     */
498 1
    public function getMinorCount()
499
    {
500 1
        return $this->minorCount;
501
    }
502
503
    /**
504
     * Get the number of edits to the page made in the past day, week, month and year.
505
     * @return int[] With keys 'day', 'week', 'month' and 'year'.
506
     */
507
    public function getCountHistory()
508
    {
509
        return $this->countHistory;
510
    }
511
512
    /**
513
     * Get the number of edits to the page made by the top 10 editors.
514
     * @return int
515
     */
516 1
    public function getTopTenCount()
517
    {
518 1
        return $this->topTenCount;
519
    }
520
521
    /**
522
     * Get the first edit to the page.
523
     * @return Edit
524
     */
525
    public function getFirstEdit()
526
    {
527
        return $this->firstEdit;
528
    }
529
530
    /**
531
     * Get the last edit to the page.
532
     * @return Edit
533
     */
534 1
    public function getLastEdit()
535
    {
536 1
        return $this->lastEdit;
537
    }
538
539
    /**
540
     * Get the edit that made the largest addition to the page (by number of bytes).
541
     * @return Edit
542
     */
543 1
    public function getMaxAddition()
544
    {
545 1
        return $this->maxAddition;
546
    }
547
548
    /**
549
     * Get the edit that made the largest removal to the page (by number of bytes).
550
     * @return Edit
551
     */
552 1
    public function getMaxDeletion()
553
    {
554 1
        return $this->maxDeletion;
555
    }
556
557
    /**
558
     * Get the list of editors to the page, including various statistics.
559
     * @return mixed[]
560
     */
561 1
    public function getEditors()
562
    {
563 1
        return $this->editors;
564
    }
565
566
    /**
567
     * Get the list of the top editors to the page (by edits), including various statistics.
568
     * @return mixed[]
569
     */
570 1
    public function topTenEditorsByEdits()
571
    {
572 1
        return $this->topTenEditorsByEdits;
573
    }
574
575
    /**
576
     * Get the list of the top editors to the page (by added text), including various statistics.
577
     * @return mixed[]
578
     */
579 1
    public function topTenEditorsByAdded()
580
    {
581 1
        return $this->topTenEditorsByAdded;
582
    }
583
584
    /**
585
     * Get various counts about each individual year and month of the page's history.
586
     * @return mixed[]
587
     */
588 2
    public function getYearMonthCounts()
589
    {
590 2
        return $this->yearMonthCounts;
591
    }
592
593
    /**
594
     * Get the maximum number of edits that were created across all months. This is used as a
595
     * comparison for the bar charts in the months section.
596
     * @return int
597
     */
598 1
    public function getMaxEditsPerMonth()
599
    {
600 1
        return $this->maxEditsPerMonth;
601
    }
602
603
    /**
604
     * Get a list of (semi-)automated tools that were used to edit the page, including
605
     * the number of times they were used, and a link to the tool's homepage.
606
     * @return mixed[]
607
     */
608 1
    public function getTools()
609
    {
610 1
        return $this->tools;
611
    }
612
613
    /**
614
     * Get the list of page's wikidata and Checkwiki errors.
615
     * @see Page::getErrors()
616
     * @return string[]
617
     */
618
    public function getBugs()
619
    {
620
        if (!is_array($this->bugs)) {
621
            $this->bugs = $this->page->getErrors();
622
        }
623
        return $this->bugs;
624
    }
625
626
    /**
627
     * Get the number of wikidata nad CheckWiki errors.
628
     * @return int
629
     */
630
    public function numBugs()
631
    {
632
        return count($this->getBugs());
633
    }
634
635
    /**
636
     * Get the number of external links on the page.
637
     * @return int
638
     */
639 1
    public function linksExtCount()
640
    {
641 1
        return $this->getLinksAndRedirects()['links_ext_count'];
642
    }
643
644
    /**
645
     * Get the number of incoming links to the page.
646
     * @return int
647
     */
648 1
    public function linksInCount()
649
    {
650 1
        return $this->getLinksAndRedirects()['links_in_count'];
651
    }
652
653
    /**
654
     * Get the number of outgoing links from the page.
655
     * @return int
656
     */
657 1
    public function linksOutCount()
658
    {
659 1
        return $this->getLinksAndRedirects()['links_out_count'];
660
    }
661
662
    /**
663
     * Get the number of redirects to the page.
664
     * @return int
665
     */
666 1
    public function redirectsCount()
667
    {
668 1
        return $this->getLinksAndRedirects()['redirects_count'];
669
    }
670
671
    /**
672
     * Get the number of external, incoming and outgoing links, along with
673
     * the number of redirects to the page.
674
     * @return int
675
     * @codeCoverageIgnore
676
     */
677
    private function getLinksAndRedirects()
678
    {
679
        if (!is_array($this->linksAndRedirects)) {
680
            $this->linksAndRedirects = $this->page->countLinksAndRedirects();
681
        }
682
        return $this->linksAndRedirects;
683
    }
684
685
    /**
686
     * Parse the revision history, collecting our core statistics.
687
     * @return mixed[] Associative "master" array of metadata about the page.
688
     *
689
     * Untestable because it relies on getting a PDO statement. All the important
690
     * logic lives in other methods which are tested.
691
     * @codeCoverageIgnore
692
     */
693
    private function parseHistory()
694
    {
695
        if ($this->tooManyRevisions()) {
696
            $limit = $this->getMaxRevisions();
697
        } else {
698
            $limit = null;
699
        }
700
701
        // Third parameter is ignored if $limit is null.
702
        $revStmt = $this->page->getRevisionsStmt(
703
            null,
704
            $limit,
705
            $this->getNumRevisions(),
706
            $this->startDate,
707
            $this->endDate
708
        );
709
        $revCount = 0;
710
711
        /**
712
         * Data about previous edits so that we can use them as a basis for comparison.
713
         * @var Edit[]
714
         */
715
        $prevEdits = [
716
            // The previous Edit, used to discount content that was reverted.
717
            'prev' => null,
718
719
            // The last edit deemed to be the max addition of content. This is kept track of
720
            // in case we find out the next edit was reverted (and was also a max edit),
721
            // in which case we'll want to discount it and use this one instead.
722
            'maxAddition' => null,
723
724
            // Same as with maxAddition, except the maximum amount of content deleted.
725
            // This is used to discount content that was reverted.
726
            'maxDeletion' => null,
727
        ];
728
729
        while ($rev = $revStmt->fetch()) {
730
            $edit = new Edit($this->page, $rev);
731
732
            if ($revCount === 0) {
733
                $this->firstEdit = $edit;
734
            }
735
736
            // Sometimes, with old revisions (2001 era), the revisions from 2002 come before 2001
737
            if ($edit->getTimestamp() < $this->firstEdit->getTimestamp()) {
738
                $this->firstEdit = $edit;
739
            }
740
741
            $prevEdits = $this->updateCounts($edit, $prevEdits);
742
743
            $revCount++;
744
        }
745
746
        $this->numRevisionsProcessed = $revCount;
747
748
        // Various sorts
749
        arsort($this->editors);
750
        ksort($this->yearMonthCounts);
751
        if ($this->tools) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->tools of type string[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
752
            arsort($this->tools);
753
        }
754
    }
755
756
    /**
757
     * Update various counts based on the current edit.
758
     * @param  Edit   $edit
759
     * @param  Edit[] $prevEdits With 'prev', 'maxAddition' and 'maxDeletion'
760
     * @return Edit[] Updated version of $prevEdits.
761
     */
762 3
    private function updateCounts(Edit $edit, $prevEdits)
763
    {
764
        // Update the counts for the year and month of the current edit.
765 3
        $this->updateYearMonthCounts($edit);
766
767
        // Update counts for the user who made the edit.
768 3
        $this->updateUserCounts($edit);
769
770
        // Update the year/month/user counts of anon and minor edits.
771 3
        $this->updateAnonMinorCounts($edit);
772
773
        // Update counts for automated tool usage, if applicable.
774 3
        $this->updateToolCounts($edit);
775
776
        // Increment "edits per <time>" counts
777 3
        $this->updateCountHistory($edit);
778
779
        // Update figures regarding content addition/removal, and the revert count.
780 3
        $prevEdits = $this->updateContentSizes($edit, $prevEdits);
781
782
        // Now that we've updated all the counts, we can reset
783
        // the prev and last edits, which are used for tracking.
784 3
        $prevEdits['prev'] = $edit;
785 3
        $this->lastEdit = $edit;
786
787 3
        return $prevEdits;
788
    }
789
790
    /**
791
     * Update various figures about content sizes based on the given edit.
792
     * @param  Edit   $edit
793
     * @param  Edit[] $prevEdits With 'prev', 'maxAddition' and 'maxDeletion'
794
     * @return Edit[] Updated version of $prevEdits.
795
     */
796 3
    private function updateContentSizes(Edit $edit, $prevEdits)
797
    {
798
        // Check if it was a revert
799 3
        if ($edit->isRevert($this->container)) {
800 3
            return $this->updateContentSizesRevert($prevEdits);
801
        } else {
802 3
            return $this->updateContentSizesNonRevert($edit, $prevEdits);
803
        }
804
    }
805
806
    /**
807
     * Updates the figures on content sizes assuming the given edit was a revert of the previous one.
808
     * In such a case, we don't want to treat the previous edit as legit content addition or removal.
809
     * @param  Edit[] $prevEdits With 'prev', 'maxAddition' and 'maxDeletion'.
810
     * @return Edit[] Updated version of $prevEdits, for tracking.
811
     */
812 3
    private function updateContentSizesRevert($prevEdits)
813
    {
814 3
        $this->revertCount++;
815
816
        // Adjust addedBytes given this edit was a revert of the previous one.
817 3
        if ($prevEdits['prev'] && $prevEdits['prev']->getSize() > 0) {
818
            $this->addedBytes -= $prevEdits['prev']->getSize();
819
820
            // Also deduct from the user's individual added byte count.
821
            $username = $prevEdits['prev']->getUser()->getUsername();
822
            $this->editors[$username]['added'] -= $prevEdits['prev']->getSize();
823
        }
824
825
        // @TODO: Test this against an edit war (use your sandbox).
826
        // Also remove as max added or deleted, if applicable.
827 3
        if ($this->maxAddition && $prevEdits['prev']->getId() === $this->maxAddition->getId()) {
828
            // $this->editors[$prevEdits->getUser()->getUsername()]['sizes'] = $edit->getLength() / 1024;
829
            $this->maxAddition = $prevEdits['maxAddition'];
830
            $prevEdits['maxAddition'] = $prevEdits['prev']; // In the event of edit wars.
831 3
        } elseif ($this->maxDeletion && $prevEdits['prev']->getId() === $this->maxDeletion->getId()) {
832 3
            $this->maxDeletion = $prevEdits['maxDeletion'];
833 3
            $prevEdits['maxDeletion'] = $prevEdits['prev']; // In the event of edit wars.
834
        }
835
836 3
        return $prevEdits;
837
    }
838
839
    /**
840
     * Updates the figures on content sizes assuming the given edit
841
     * was NOT a revert of the previous edit.
842
     * @param  Edit   $edit
843
     * @param  Edit[] $prevEdits With 'prev', 'maxAddition' and 'maxDeletion'.
844
     * @return Edit[] Updated version of $prevEdits, for tracking.
845
     */
846 3
    private function updateContentSizesNonRevert(Edit $edit, $prevEdits)
847
    {
848 3
        $editSize = $this->getEditSize($edit, $prevEdits);
849
850
        // Edit was not a revert, so treat size > 0 as content added.
851 3
        if ($editSize > 0) {
852 3
            $this->addedBytes += $editSize;
853 3
            $this->editors[$edit->getUser()->getUsername()]['added'] += $editSize;
854
855
            // Keep track of edit with max addition.
856 3
            if (!$this->maxAddition || $editSize > $this->maxAddition->getSize()) {
857
                // Keep track of old maxAddition in case we find out the next $edit was reverted
858
                // (and was also a max edit), in which case we'll want to use this one ($edit).
859 3
                $prevEdits['maxAddition'] = $this->maxAddition;
860
861 3
                $this->maxAddition = $edit;
862
            }
863 3
        } elseif ($editSize < 0 && (!$this->maxDeletion || $editSize < $this->maxDeletion->getSize())) {
864
            // Keep track of old maxDeletion in case we find out the next edit was reverted
865
            // (and was also a max deletion), in which case we'll want to use this one.
866 3
            $prevEdits['maxDeletion'] = $this->maxDeletion;
867
868 3
            $this->maxDeletion = $edit;
869
        }
870
871 3
        return $prevEdits;
872
    }
873
874
    /**
875
     * Get the size of the given edit, based on the previous edit (if present).
876
     * We also don't return the actual edit size if last revision had a length of null.
877
     * This happens when the edit follows other edits that were revision-deleted.
878
     * @see T148857 for more information.
879
     * @todo Remove once T101631 is resolved.
880
     * @param  Edit   $edit
881
     * @param  Edit[] $prevEdits With 'prev', 'maxAddition' and 'maxDeletion'.
882
     * @return Edit[] Updated version of $prevEdits, for tracking.
883
     */
884 3
    private function getEditSize(Edit $edit, $prevEdits)
885
    {
886 3
        if ($prevEdits['prev'] && $prevEdits['prev']->getLength() === null) {
887
            return 0;
888
        } else {
889 3
            return $edit->getSize();
890
        }
891
    }
892
893
    /**
894
     * Update counts of automated tool usage for the given edit.
895
     * @param Edit $edit
896
     */
897 3
    private function updateToolCounts(Edit $edit)
898
    {
899 3
        $automatedTool = $edit->getTool($this->container);
900
901 3
        if ($automatedTool === false) {
902
            // Nothing to do.
903 3
            return;
904
        }
905
906 3
        $editYear = $edit->getYear();
907 3
        $editMonth = $edit->getMonth();
908
909 3
        $this->automatedCount++;
910 3
        $this->yearMonthCounts[$editYear]['automated']++;
911 3
        $this->yearMonthCounts[$editYear]['months'][$editMonth]['automated']++;
912
913 3
        if (!isset($this->tools[$automatedTool['name']])) {
914 3
            $this->tools[$automatedTool['name']] = [
915 3
                'count' => 1,
916 3
                'link' => $automatedTool['link'],
917
            ];
918
        } else {
919
            $this->tools[$automatedTool['name']]['count']++;
920
        }
921 3
    }
922
923
    /**
924
     * Update various counts for the year and month of the given edit.
925
     * @param Edit $edit
926
     */
927 3
    private function updateYearMonthCounts(Edit $edit)
928
    {
929 3
        $editYear = $edit->getYear();
930 3
        $editMonth = $edit->getMonth();
931
932
        // Fill in the blank arrays for the year and 12 months if needed.
933 3
        if (!isset($this->yearMonthCounts[$editYear])) {
934 3
            $this->addYearMonthCountEntry($edit);
935
        }
936
937
        // Increment year and month counts for all edits
938 3
        $this->yearMonthCounts[$editYear]['all']++;
939 3
        $this->yearMonthCounts[$editYear]['months'][$editMonth]['all']++;
940
        // This will ultimately be the size of the page by the end of the year
941 3
        $this->yearMonthCounts[$editYear]['size'] = (int) $edit->getLength();
942
943
        // Keep track of which month had the most edits
944 3
        $editsThisMonth = $this->yearMonthCounts[$editYear]['months'][$editMonth]['all'];
945 3
        if ($editsThisMonth > $this->maxEditsPerMonth) {
946 3
            $this->maxEditsPerMonth = $editsThisMonth;
947
        }
948 3
    }
949
950
    /**
951
     * Add a new entry to $this->yearMonthCounts for the given year,
952
     * with blank values for each month. This called during self::parseHistory().
953
     * @param Edit $edit
954
     */
955 3
    private function addYearMonthCountEntry(Edit $edit)
956
    {
957 3
        $editYear = $edit->getYear();
958
959
        // Beginning of the month at 00:00:00.
960 3
        $firstEditTime = mktime(0, 0, 0, (int) $this->firstEdit->getMonth(), 1, $this->firstEdit->getYear());
0 ignored issues
show
Bug introduced by
$this->firstEdit->getYear() of type string is incompatible with the type integer expected by parameter $year of mktime(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

960
        $firstEditTime = mktime(0, 0, 0, (int) $this->firstEdit->getMonth(), 1, /** @scrutinizer ignore-type */ $this->firstEdit->getYear());
Loading history...
961
962 3
        $this->yearMonthCounts[$editYear] = [
963
            'all' => 0,
964
            'minor' => 0,
965
            'anon' => 0,
966
            'automated' => 0,
967
            'size' => 0, // Keep track of the size by the end of the year.
968
            'events' => [],
969
            'months' => [],
970
        ];
971
972 3
        for ($i = 1; $i <= 12; $i++) {
973 3
            $timeObj = mktime(0, 0, 0, $i, 1, $editYear);
974
975 3
            $date = $editYear . sprintf('%02d', $i) . '01';
976 3
            if (false !== $this->startDate && $date < date('Ymd', $this->startDate)
977 3
                || false !== $this->endDate && $date > date('Ymd', $this->endDate)) {
978
                continue;
979
            }
980
981
            // Don't show zeros for months before the first edit or after the current month.
982 3
            if ($timeObj < $firstEditTime || $timeObj > strtotime('last day of this month')) {
983 3
                continue;
984
            }
985
986 3
            $this->yearMonthCounts[$editYear]['months'][sprintf('%02d', $i)] = [
987
                'all' => 0,
988
                'minor' => 0,
989
                'anon' => 0,
990
                'automated' => 0,
991
            ];
992
        }
993 3
    }
994
995
    /**
996
     * Update the counts of anon and minor edits for year, month,
997
     * and user of the given edit.
998
     * @param Edit $edit
999
     */
1000 3
    private function updateAnonMinorCounts(Edit $edit)
1001
    {
1002 3
        $editYear = $edit->getYear();
1003 3
        $editMonth = $edit->getMonth();
1004
1005
        // If anonymous, increase counts
1006 3
        if ($edit->isAnon()) {
1007 3
            $this->anonCount++;
1008 3
            $this->yearMonthCounts[$editYear]['anon']++;
1009 3
            $this->yearMonthCounts[$editYear]['months'][$editMonth]['anon']++;
1010
        }
1011
1012
        // If minor edit, increase counts
1013 3
        if ($edit->isMinor()) {
1014 3
            $this->minorCount++;
1015 3
            $this->yearMonthCounts[$editYear]['minor']++;
1016 3
            $this->yearMonthCounts[$editYear]['months'][$editMonth]['minor']++;
1017
        }
1018 3
    }
1019
1020
    /**
1021
     * Update various counts for the user of the given edit.
1022
     * @param Edit $edit
1023
     */
1024 3
    private function updateUserCounts(Edit $edit)
1025
    {
1026 3
        $username = $edit->getUser()->getUsername();
1027
1028
        // Initialize various user stats if needed.
1029 3
        if (!isset($this->editors[$username])) {
1030 3
            $this->editors[$username] = [
1031 3
                'all' => 0,
1032 3
                'minor' => 0,
1033 3
                'minorPercentage' => 0,
1034 3
                'first' => $edit->getTimestamp(),
1035 3
                'firstId' => $edit->getId(),
1036
                'last' => null,
1037
                'atbe' => null,
1038 3
                'added' => 0,
1039
                'sizes' => [],
1040
            ];
1041
        }
1042
1043
        // Increment user counts
1044 3
        $this->editors[$username]['all']++;
1045 3
        $this->editors[$username]['last'] = $edit->getTimestamp();
1046 3
        $this->editors[$username]['lastId'] = $edit->getId();
1047
1048
        // Store number of KB added with this edit
1049 3
        $this->editors[$username]['sizes'][] = $edit->getLength() / 1024;
1050
1051
        // Increment minor counts for this user
1052 3
        if ($edit->isMinor()) {
1053 3
            $this->editors[$username]['minor']++;
1054
        }
1055 3
    }
1056
1057
    /**
1058
     * Increment "edits per <time>" counts based on the given edit.
1059
     * @param Edit $edit
1060
     */
1061 3
    private function updateCountHistory(Edit $edit)
1062
    {
1063 3
        $editTimestamp = $edit->getTimestamp();
1064
1065 3
        if ($editTimestamp > new DateTime('-1 day')) {
1066
            $this->countHistory['day']++;
1067
        }
1068 3
        if ($editTimestamp > new DateTime('-1 week')) {
1069
            $this->countHistory['week']++;
1070
        }
1071 3
        if ($editTimestamp > new DateTime('-1 month')) {
1072
            $this->countHistory['month']++;
1073
        }
1074 3
        if ($editTimestamp > new DateTime('-1 year')) {
1075
            $this->countHistory['year']++;
1076
        }
1077 3
    }
1078
1079
    /**
1080
     * Get info about bots that edited the page.
1081
     * @return mixed[] Contains the bot's username, edit count to the page,
1082
     *   and whether or not they are currently a bot.
1083
     */
1084 1
    public function getBots()
1085
    {
1086 1
        return $this->bots;
1087
    }
1088
1089
    /**
1090
     * Set info about bots that edited the page. This is done as a private setter
1091
     * because we need this information when computing the top 10 editors,
1092
     * where we don't want to include bots.
1093
     */
1094
    private function setBots()
1095
    {
1096
        // Parse the botedits
1097
        $bots = [];
1098
        $botData = $this->getRepository()->getBotData($this->page, $this->startDate, $this->endDate);
1099
        while ($bot = $botData->fetch()) {
1100
            $bots[$bot['username']] = [
1101
                'count' => (int) $bot['count'],
1102
                'current' => $bot['current'] === 'bot',
1103
            ];
1104
        }
1105
1106
        // Sort by edit count.
1107
        uasort($bots, function ($a, $b) {
1108
            return $b['count'] - $a['count'];
1109
        });
1110
1111
        $this->bots = $bots;
1112
    }
1113
1114
    /**
1115
     * Number of edits made to the page by current or former bots.
1116
     * @param string[] $bots Used only in unit tests, where we
1117
     *   supply mock data for the bots that will get processed.
1118
     * @return int
1119
     */
1120 2
    public function getBotRevisionCount($bots = null)
1121
    {
1122 2
        if (isset($this->botRevisionCount)) {
1123
            return $this->botRevisionCount;
1124
        }
1125
1126 2
        if ($bots === null) {
1127 1
            $bots = $this->getBots();
1128
        }
1129
1130 2
        $count = 0;
1131
1132 2
        foreach ($bots as $username => $data) {
1133 2
            $count += $data['count'];
1134
        }
1135
1136 2
        $this->botRevisionCount = $count;
1137 2
        return $count;
1138
    }
1139
1140
    /**
1141
     * Query for log events during each year of the article's history,
1142
     *   and set the results in $this->yearMonthCounts.
1143
     */
1144 1
    private function setLogsEvents()
1145
    {
1146 1
        $logData = $this->getRepository()->getLogEvents(
0 ignored issues
show
Bug introduced by
The method getLogEvents() does not exist on Xtools\Repository. Did you maybe mean getLog()? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1146
        $logData = $this->getRepository()->/** @scrutinizer ignore-call */ getLogEvents(

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
1147 1
            $this->page,
1148 1
            $this->startDate,
1149 1
            $this->endDate
1150
        );
1151
1152 1
        foreach ($logData as $event) {
1153 1
            $time = strtotime($event['timestamp']);
1154 1
            $year = date('Y', $time);
1155
1156 1
            if (!isset($this->yearMonthCounts[$year])) {
1157
                break;
1158
            }
1159
1160 1
            $yearEvents = $this->yearMonthCounts[$year]['events'];
1161
1162
            // Convert log type value to i18n key.
1163 1
            switch ($event['log_type']) {
1164 1
                case 'protect':
1165 1
                    $action = 'protections';
1166 1
                    break;
1167 1
                case 'delete':
1168 1
                    $action = 'deletions';
1169 1
                    break;
1170
                case 'move':
1171
                    $action = 'moves';
1172
                    break;
1173
                // count pending-changes protections along with normal protections.
1174
                case 'stable':
1175
                    $action = 'protections';
1176
                    break;
1177
            }
1178
1179 1
            if (empty($yearEvents[$action])) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $action does not seem to be defined for all execution paths leading up to this point.
Loading history...
1180 1
                $yearEvents[$action] = 1;
1181
            } else {
1182
                $yearEvents[$action]++;
1183
            }
1184
1185 1
            $this->yearMonthCounts[$year]['events'] = $yearEvents;
1186
        }
1187 1
    }
1188
1189
    /**
1190
     * Set statistics about the top 10 editors by added text and number of edits.
1191
     * This is ran *after* parseHistory() since we need the grand totals first.
1192
     * Various stats are also set for each editor in $this->editors to be used in the charts.
1193
     * @return integer Number of edits
1194
     */
1195 3
    private function setTopTenCounts()
1196
    {
1197 3
        $topTenCount = $counter = 0;
1198 3
        $topTenEditors = [];
1199
1200 3
        foreach ($this->editors as $editor => $info) {
1201
            // Count how many users are in the top 10% by number of edits, excluding bots.
1202 3
            if ($counter < 10 && !in_array($editor, array_keys($this->bots))) {
1203 3
                $topTenCount += $info['all'];
1204 3
                $counter++;
1205
1206
                // To be used in the Top Ten charts.
1207 3
                $topTenEditors[] = [
1208 3
                    'label' => $editor,
1209 3
                    'value' => $info['all'],
1210
                    'percentage' => (
1211 3
                        100 * ($info['all'] / $this->getNumRevisionsProcessed())
1212
                    )
1213
                ];
1214
            }
1215
1216
            // Compute the percentage of minor edits the user made.
1217 3
            $this->editors[$editor]['minorPercentage'] = $info['all']
1218 3
                ? ($info['minor'] / $info['all']) * 100
1219
                : 0;
1220
1221 3
            if ($info['all'] > 1) {
1222
                // Number of seconds/days between first and last edit.
1223 3
                $secs = $info['last']->getTimestamp() - $info['first']->getTimestamp();
1224 3
                $days = $secs / (60 * 60 * 24);
1225
1226
                // Average time between edits (in days).
1227 3
                $this->editors[$editor]['atbe'] = $days / $info['all'];
1228
            }
1229
1230 3
            if (count($info['sizes'])) {
1231
                // Average Total KB divided by number of stored sizes (usually the user's edit count to this page).
1232 3
                $this->editors[$editor]['size'] = array_sum($info['sizes']) / count($info['sizes']);
1233
            } else {
1234 3
                $this->editors[$editor]['size'] = 0;
1235
            }
1236
        }
1237
1238 3
        $this->topTenEditorsByEdits = $topTenEditors;
1239
1240
        // First sort editors array by the amount of text they added.
1241 3
        $topTenEditorsByAdded = $this->editors;
1242
        uasort($topTenEditorsByAdded, function ($a, $b) {
1243 3
            if ($a['added'] === $b['added']) {
1244 3
                return 0;
1245
            }
1246 3
            return $a['added'] > $b['added'] ? -1 : 1;
1247 3
        });
1248
1249
        // Then build a new array of top 10 editors by added text,
1250
        // in the data structure needed for the chart.
1251 3
        $this->topTenEditorsByAdded = array_map(function ($editor) {
1252 3
            $added = $this->editors[$editor]['added'];
1253
            return [
1254 3
                'label' => $editor,
1255 3
                'value' => $added,
1256
                'percentage' => (
1257 3
                    100 * ($added / $this->addedBytes)
1258
                )
1259
            ];
1260 3
        }, array_keys(array_slice($topTenEditorsByAdded, 0, 10)));
1261
1262 3
        $this->topTenCount = $topTenCount;
1263 3
    }
1264
1265
    /**
1266
     * Get authorship attribution from the WikiWho API.
1267
     * @see https://f-squared.org/wikiwho/
1268
     * @param  int $limit Max number of results.
1269
     * @return array
1270
     */
1271 1
    public function getTextshares($limit = null)
1272
    {
1273 1
        if (isset($this->textshares)) {
1274
            return $this->textshares;
1275
        }
1276
1277
        // TODO: check for failures. Should have a success:true
1278 1
        $ret = $this->getRepository()->getTextshares($this->page);
1279 1
        $revId = array_keys($ret['revisions'][0])[0];
1280 1
        $tokens = $ret['revisions'][0][$revId]['tokens'];
1281
1282 1
        list($counts, $totalCount, $userIds) = $this->countTokens($tokens);
1283 1
        $usernameMap = $this->getUsernameMap($userIds);
1284
1285 1
        if ($limit !== null) {
1286 1
            $countsToProcess = array_slice($counts, 0, $limit, true);
1287
        } else {
1288
            $countsToProcess = $counts;
1289
        }
1290
1291 1
        $textshares = [];
1292
1293
        // Loop through once more, creating an array with the user names (or IP address)
1294
        // as the key, and the count and percentage as the value.
1295 1
        foreach ($countsToProcess as $editor => $count) {
1296 1
            if (isset($usernameMap[$editor])) {
1297 1
                $index = $usernameMap[$editor];
1298
            } else {
1299 1
                $index = $editor;
1300
            }
1301 1
            $textshares[$index] = [
1302 1
                'count' => $count,
1303 1
                'percentage' => round(100 * ($count / $totalCount), 1)
1304
            ];
1305
        }
1306
1307 1
        $this->textshares = [
1308 1
            'list' => $textshares,
1309 1
            'totalAuthors' => count($counts),
1310 1
            'totalCount' => $totalCount,
1311
        ];
1312
1313 1
        return $this->textshares;
1314
    }
1315
1316
    /**
1317
     * Get a map of user IDs to usernames, given the IDs.
1318
     * @param  int[] $userIds
1319
     * @return array IDs as keys, usernames as values.
1320
     */
1321 1
    private function getUsernameMap($userIds)
1322
    {
1323 1
        $userIdsNames = $this->getRepository()->getUsernamesFromIds(
1324 1
            $this->page->getProject(),
1325 1
            $userIds
1326
        );
1327
1328 1
        $usernameMap = [];
1329 1
        foreach ($userIdsNames as $userIdName) {
1330 1
            $usernameMap[$userIdName['user_id']] = $userIdName['user_name'];
1331
        }
1332
1333 1
        return $usernameMap;
1334
    }
1335
1336
    /**
1337
     * Get counts of token lengths for each author. Used in self::getTextshares()
1338
     * @param  array $tokens
1339
     * @return array [counts by user, total count, IDs of accounts]
1340
     */
1341 1
    private function countTokens($tokens)
1342
    {
1343 1
        $counts = [];
1344 1
        $userIds = [];
1345 1
        $totalCount = 0;
1346
1347
        // Loop through the tokens, keeping totals (token length) for each author.
1348 1
        foreach ($tokens as $token) {
1349 1
            $editor = $token['editor'];
1350
1351
            // IPs are prefixed with '0|', otherwise it's the user ID.
1352 1
            if (substr($editor, 0, 2) === '0|') {
1353 1
                $editor = substr($editor, 2);
1354
            } else {
1355 1
                $userIds[] = $editor;
1356
            }
1357
1358 1
            if (!isset($counts[$editor])) {
1359 1
                $counts[$editor] = 0;
1360
            }
1361
1362 1
            $counts[$editor] += strlen($token['str']);
1363 1
            $totalCount += strlen($token['str']);
1364
        }
1365
1366
        // Sort authors by count.
1367 1
        arsort($counts);
1368
1369 1
        return [$counts, $totalCount, $userIds];
1370
    }
1371
1372
    /**
1373
     * Get a list of wikis supported by WikiWho.
1374
     * @return string[]
1375
     */
1376
    public function getTextshareWikis()
1377
    {
1378
        return self::TEXTSHARE_WIKIS;
1379
    }
1380
}
1381