Passed
Push — master ( f43d54...b6518a )
by MusikAnimal
01:39
created

ArticleInfo::updateContentSizesNonRevert()   C

Complexity

Conditions 7
Paths 4

Size

Total Lines 26
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 12
CRAP Score 7

Importance

Changes 0
Metric Value
cc 7
eloc 11
nc 4
nop 2
dl 0
loc 26
ccs 12
cts 12
cp 1
crap 7
rs 6.7272
c 0
b 0
f 0
1
<?php
2
/**
3
 * This file contains only the ArticleInfo class.
4
 */
5
6
namespace Xtools;
7
8
use Symfony\Component\DependencyInjection\Container;
9
use Symfony\Component\DomCrawler\Crawler;
10
use DateTime;
11
12
/**
13
 * An ArticleInfo provides statistics about a page on a project. This model does not
14
 * have a separate Repository because it needs to use individual SQL statements to
15
 * traverse the page's history, saving class instance variables along the way.
16
 */
17
class ArticleInfo extends Model
18
{
19
    /** @const string[] Domain names of wikis supported by WikiWho. */
20
    const TEXTSHARE_WIKIS = [
21
        'en.wikipedia.org',
22
        'de.wikipedia.org',
23
        'eu.wikipedia.org',
24
        'tr.wikipedia.org',
25
        'es.wikipedia.org',
26
    ];
27
28
    /** @var Container The application's DI container. */
29
    protected $container;
30
31
    /** @var Page The page. */
32
    protected $page;
33
34
    /** @var false|int From what date to obtain records. */
35
    protected $startDate;
36
37
    /** @var false|int To what date to obtain records. */
38
    protected $endDate;
39
40
    /** @var int Number of revisions that belong to the page. */
41
    protected $numRevisions;
42
43
    /** @var int Maximum number of revisions to process, as configured. */
44
    protected $maxRevisions;
45
46
    /** @var int Number of revisions that were actually processed. */
47
    protected $numRevisionsProcessed;
48
49
    /**
50
     * Various statistics about editors to the page. These are not User objects
51
     * so as to preserve memory.
52
     * @var mixed[]
53
     */
54
    protected $editors;
55
56
    /** @var mixed[] The top 10 editors to the page by number of edits. */
57
    protected $topTenEditorsByEdits;
58
59
    /** @var mixed[] The top 10 editors to the page by added text. */
60
    protected $topTenEditorsByAdded;
61
62
    /** @var int Number of edits made by the top 10 editors. */
63
    protected $topTenCount;
64
65
    /** @var mixed[] Various statistics about bots that edited the page. */
66
    protected $bots;
67
68
    /** @var int Number of edits made to the page by bots. */
69
    protected $botRevisionCount;
70
71
    /** @var mixed[] Various counts about each individual year and month of the page's history. */
72
    protected $yearMonthCounts;
73
74
    /** @var Edit The first edit to the page. */
75
    protected $firstEdit;
76
77
    /** @var Edit The last edit to the page. */
78
    protected $lastEdit;
79
80
    /** @var Edit Edit that made the largest addition by number of bytes. */
81
    protected $maxAddition;
82
83
    /** @var Edit Edit that made the largest deletion by number of bytes. */
84
    protected $maxDeletion;
85
86
    /** @var int[] Number of in and outgoing links and redirects to the page. */
87
    protected $linksAndRedirects;
88
89
    /** @var string[] Assessments of the page (see Page::getAssessments). */
90
    protected $assessments;
91
92
    /**
93
     * Maximum number of edits that were created across all months. This is used as a comparison
94
     * for the bar charts in the months section.
95
     * @var int
96
     */
97
    protected $maxEditsPerMonth;
98
99
    /** @var string[] List of (semi-)automated tools that were used to edit the page. */
100
    protected $tools;
101
102
    /**
103
     * Total number of bytes added throughout the page's history. This is used as a comparison
104
     * when computing the top 10 editors by added text.
105
     * @var int
106
     */
107
    protected $addedBytes = 0;
108
109
    /** @var int Number of days between first and last edit. */
110
    protected $totalDays;
111
112
    /** @var int Number of minor edits to the page. */
113
    protected $minorCount = 0;
114
115
    /** @var int Number of anonymous edits to the page. */
116
    protected $anonCount = 0;
117
118
    /** @var int Number of automated edits to the page. */
119
    protected $automatedCount = 0;
120
121
    /** @var int Number of edits to the page that were reverted with the subsequent edit. */
122
    protected $revertCount = 0;
123
124
    /** @var int[] The "edits per <time>" counts. */
125
    protected $countHistory = [
126
        'day' => 0,
127
        'week' => 0,
128
        'month' => 0,
129
        'year' => 0
130
    ];
131
132
    /** @var string[] List of wikidata and Checkwiki errors. */
133
    protected $bugs;
134
135
    /** @var array List of editors and the percentage of the current content that they authored. */
136
    protected $textshares;
137
138
    /** @var array Number of categories, templates and files on the page. */
139
    protected $transclusionData;
140
141
    /**
142
     * ArticleInfo constructor.
143
     * @param Page $page The page to process.
144
     * @param Container $container The DI container.
145
     * @param false|int $start From what date to obtain records.
146
     * @param false|int $end To what date to obtain records.
147
     */
148 13
    public function __construct(Page $page, Container $container, $start = false, $end = false)
149
    {
150 13
        $this->page = $page;
151 13
        $this->container = $container;
152 13
        $this->startDate = $start;
153 13
        $this->endDate = $end;
154 13
    }
155
156
    /**
157
     * Get date opening date range.
158
     * @return false|int
159
     */
160 1
    public function getStartDate()
161
    {
162 1
        return $this->startDate;
163
    }
164
165
    /**
166
     * Get date closing date range.
167
     * @return false|int
168
     */
169 1
    public function getEndDate()
170
    {
171 1
        return $this->endDate;
172
    }
173
174
    /**
175
     * Has date range?
176
     * @return bool
177
     */
178 1
    public function hasDateRange()
179
    {
180 1
        return $this->startDate !== false || $this->endDate !== false;
181
    }
182
183
    /**
184
     * Return the start/end date values as associative array,
185
     * with YYYY-MM-DD as the date format. This is used mainly as
186
     * a helper to pass to the pageviews Twig macros.
187
     * @return array
188
     */
189 1
    public function getDateParams()
190
    {
191 1
        if (!$this->hasDateRange()) {
192
            return [];
193
        }
194
195
        $ret = [
196 1
            'start' => $this->firstEdit->getTimestamp()->format('Y-m-d'),
197 1
            'end' => $this->lastEdit->getTimestamp()->format('Y-m-d'),
198
        ];
199
200 1
        if ($this->startDate !== false) {
201 1
            $ret['start'] = date('Y-m-d', $this->startDate);
202
        }
203 1
        if ($this->endDate !== false) {
204 1
            $ret['end'] = date('Y-m-d', $this->endDate);
205
        }
206
207 1
        return $ret;
208
    }
209
210
    /**
211
     * Shorthand to get the page's project.
212
     * @return Project
213
     * @codeCoverageIgnore
214
     */
215
    public function getProject()
216
    {
217
        return $this->page->getProject();
218
    }
219
220
    /**
221
     * Get the number of revisions belonging to the page.
222
     * @return int
223
     */
224 4
    public function getNumRevisions()
225
    {
226 4
        if (!isset($this->numRevisions)) {
227 4
            $this->numRevisions = $this->page->getNumRevisions(null, $this->startDate, $this->endDate);
228
        }
229 4
        return $this->numRevisions;
230
    }
231
232
    /**
233
     * Get the maximum number of revisions that we should process.
234
     * @return int
235
     */
236 3
    public function getMaxRevisions()
237
    {
238 3
        if (!isset($this->maxRevisions)) {
239 3
            $this->maxRevisions = (int) $this->container->getParameter('app.max_page_revisions');
240
        }
241 3
        return $this->maxRevisions;
242
    }
243
244
    /**
245
     * Get the number of revisions that are actually getting processed.
246
     * This goes by the app.max_page_revisions parameter, or the actual
247
     * number of revisions, whichever is smaller.
248
     * @return int
249
     */
250 6
    public function getNumRevisionsProcessed()
251
    {
252 6
        if (isset($this->numRevisionsProcessed)) {
253 4
            return $this->numRevisionsProcessed;
254
        }
255
256 2
        if ($this->tooManyRevisions()) {
257 1
            $this->numRevisionsProcessed = $this->getMaxRevisions();
258
        } else {
259 1
            $this->numRevisionsProcessed = $this->getNumRevisions();
260
        }
261
262 2
        return $this->numRevisionsProcessed;
263
    }
264
265
    /**
266
     * Are there more revisions than we should process, based on the config?
267
     * @return bool
268
     */
269 3
    public function tooManyRevisions()
270
    {
271 3
        return $this->getMaxRevisions() > 0 && $this->getNumRevisions() > $this->getMaxRevisions();
272
    }
273
274
    /**
275
     * Fetch and store all the data we need to show the ArticleInfo view.
276
     * @codeCoverageIgnore
277
     */
278
    public function prepareData()
279
    {
280
        $this->parseHistory();
281
        $this->setLogsEvents();
282
283
        // Bots need to be set before setting top 10 counts.
284
        $this->setBots();
285
286
        $this->setTopTenCounts();
287
    }
288
289
    /**
290
     * Get the number of editors that edited the page.
291
     * @return int
292
     */
293 1
    public function getNumEditors()
294
    {
295 1
        return count($this->editors);
296
    }
297
298
    /**
299
     * Get the number of bots that edited the page.
300
     * @return int
301
     */
302
    public function getNumBots()
303
    {
304
        return count($this->getBots());
305
    }
306
307
    /**
308
     * Get the number of days between the first and last edit.
309
     * @return int
310
     */
311 1
    public function getTotalDays()
312
    {
313 1
        if (isset($this->totalDays)) {
314 1
            return $this->totalDays;
315
        }
316 1
        $dateFirst = $this->firstEdit->getTimestamp();
317 1
        $dateLast = $this->lastEdit->getTimestamp();
318 1
        $interval = date_diff($dateLast, $dateFirst, true);
319 1
        $this->totalDays = $interval->format('%a');
0 ignored issues
show
Documentation Bug introduced by
The property $totalDays was declared of type integer, but $interval->format('%a') is of type string. Maybe add a type cast?

This check looks for assignments to scalar types that may be of the wrong type.

To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.

$answer = 42;

$correct = false;

$correct = (bool) $answer;
Loading history...
320 1
        return $this->totalDays;
321
    }
322
323
    /**
324
     * Returns length of the page.
325
     * @return int
326
     */
327 1
    public function getLength()
328
    {
329 1
        if ($this->hasDateRange()) {
330 1
            return $this->lastEdit->getLength();
331
        }
332
333
        return $this->page->getLength();
334
    }
335
336
    /**
337
     * Get the average number of days between edits to the page.
338
     * @return double
339
     */
340 1
    public function averageDaysPerEdit()
341
    {
342 1
        return round($this->getTotalDays() / $this->getNumRevisionsProcessed(), 1);
343
    }
344
345
    /**
346
     * Get the average number of edits per day to the page.
347
     * @return double
348
     */
349 1
    public function editsPerDay()
350
    {
351 1
        $editsPerDay = $this->getTotalDays()
352 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / (365 / 12 / 24))
353 1
            : 0;
354 1
        return round($editsPerDay, 1);
355
    }
356
357
    /**
358
     * Get the average number of edits per month to the page.
359
     * @return double
360
     */
361 1
    public function editsPerMonth()
362
    {
363 1
        $editsPerMonth = $this->getTotalDays()
364 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / (365 / 12))
365 1
            : 0;
366 1
        return min($this->getNumRevisionsProcessed(), round($editsPerMonth, 1));
367
    }
368
369
    /**
370
     * Get the average number of edits per year to the page.
371
     * @return double
372
     */
373 1
    public function editsPerYear()
374
    {
375 1
        $editsPerYear = $this->getTotalDays()
376 1
            ? $this->getNumRevisionsProcessed() / ($this->getTotalDays() / 365)
377 1
            : 0;
378 1
        return min($this->getNumRevisionsProcessed(), round($editsPerYear, 1));
379
    }
380
381
    /**
382
     * Get the average number of edits per editor.
383
     * @return double
384
     */
385 1
    public function editsPerEditor()
386
    {
387 1
        return round($this->getNumRevisionsProcessed() / count($this->editors), 1);
388
    }
389
390
    /**
391
     * Get the percentage of minor edits to the page.
392
     * @return double
393
     */
394 1
    public function minorPercentage()
395
    {
396 1
        return round(
397 1
            ($this->minorCount / $this->getNumRevisionsProcessed()) * 100,
398 1
            1
399
        );
400
    }
401
402
    /**
403
     * Get the percentage of anonymous edits to the page.
404
     * @return double
405
     */
406 1
    public function anonPercentage()
407
    {
408 1
        return round(
409 1
            ($this->anonCount / $this->getNumRevisionsProcessed()) * 100,
410 1
            1
411
        );
412
    }
413
414
    /**
415
     * Get the percentage of edits made by the top 10 editors.
416
     * @return double
417
     */
418 1
    public function topTenPercentage()
419
    {
420 1
        return round(($this->topTenCount / $this->getNumRevisionsProcessed()) * 100, 1);
421
    }
422
423
    /**
424
     * Get the number of times the page has been viewed in the given timeframe.
425
     * If the ArticleInfo instance has a date range, it is used instead of the
426
     * value of the $latest parameter.
427
     * @param  int $latest Last N days.
428
     * @return int
429
     */
430
    public function getPageviews($latest)
431
    {
432
        if (!$this->hasDateRange()) {
433
            return $this->page->getLastPageviews($latest);
434
        }
435
436
        $daterange = $this->getDateParams();
437
        return $this->page->getPageviews($daterange['start'], $daterange['end']);
438
    }
439
440
    /**
441
     * Get the page assessments of the page.
442
     * @see https://www.mediawiki.org/wiki/Extension:PageAssessments
443
     * @return string[]|false False if unsupported.
444
     * @codeCoverageIgnore
445
     */
446
    public function getAssessments()
447
    {
448
        if (!is_array($this->assessments)) {
0 ignored issues
show
introduced by
The condition ! is_array($this->assessments) can never be true.
Loading history...
449
            $this->assessments = $this->page->getAssessments();
450
        }
451
        return $this->assessments;
452
    }
453
454
    /**
455
     * Get the number of automated edits made to the page.
456
     * @return int
457
     */
458 1
    public function getAutomatedCount()
459
    {
460 1
        return $this->automatedCount;
461
    }
462
463
    /**
464
     * Get the number of edits to the page that were reverted with the subsequent edit.
465
     * @return int
466
     */
467 1
    public function getRevertCount()
468
    {
469 1
        return $this->revertCount;
470
    }
471
472
    /**
473
     * Get the number of edits to the page made by logged out users.
474
     * @return int
475
     */
476 1
    public function getAnonCount()
477
    {
478 1
        return $this->anonCount;
479
    }
480
481
    /**
482
     * Get the number of minor edits to the page.
483
     * @return int
484
     */
485 1
    public function getMinorCount()
486
    {
487 1
        return $this->minorCount;
488
    }
489
490
    /**
491
     * Get the number of edits to the page made in the past day, week, month and year.
492
     * @return int[] With keys 'day', 'week', 'month' and 'year'.
493
     */
494
    public function getCountHistory()
495
    {
496
        return $this->countHistory;
497
    }
498
499
    /**
500
     * Get the number of edits to the page made by the top 10 editors.
501
     * @return int
502
     */
503 1
    public function getTopTenCount()
504
    {
505 1
        return $this->topTenCount;
506
    }
507
508
    /**
509
     * Get the first edit to the page.
510
     * @return Edit
511
     */
512 1
    public function getFirstEdit()
513
    {
514 1
        return $this->firstEdit;
515
    }
516
517
    /**
518
     * Get the last edit to the page.
519
     * @return Edit
520
     */
521 1
    public function getLastEdit()
522
    {
523 1
        return $this->lastEdit;
524
    }
525
526
    /**
527
     * Get the edit that made the largest addition to the page (by number of bytes).
528
     * @return Edit
529
     */
530 1
    public function getMaxAddition()
531
    {
532 1
        return $this->maxAddition;
533
    }
534
535
    /**
536
     * Get the edit that made the largest removal to the page (by number of bytes).
537
     * @return Edit
538
     */
539 1
    public function getMaxDeletion()
540
    {
541 1
        return $this->maxDeletion;
542
    }
543
544
    /**
545
     * Get the list of editors to the page, including various statistics.
546
     * @return mixed[]
547
     */
548 1
    public function getEditors()
549
    {
550 1
        return $this->editors;
551
    }
552
553
    /**
554
     * Get the list of the top editors to the page (by edits), including various statistics.
555
     * @return mixed[]
556
     */
557 1
    public function topTenEditorsByEdits()
558
    {
559 1
        return $this->topTenEditorsByEdits;
560
    }
561
562
    /**
563
     * Get the list of the top editors to the page (by added text), including various statistics.
564
     * @return mixed[]
565
     */
566 1
    public function topTenEditorsByAdded()
567
    {
568 1
        return $this->topTenEditorsByAdded;
569
    }
570
571
    /**
572
     * Get various counts about each individual year and month of the page's history.
573
     * @return mixed[]
574
     */
575 2
    public function getYearMonthCounts()
576
    {
577 2
        return $this->yearMonthCounts;
578
    }
579
580
    /**
581
     * Get the maximum number of edits that were created across all months. This is used as a
582
     * comparison for the bar charts in the months section.
583
     * @return int
584
     */
585 1
    public function getMaxEditsPerMonth()
586
    {
587 1
        return $this->maxEditsPerMonth;
588
    }
589
590
    /**
591
     * Get a list of (semi-)automated tools that were used to edit the page, including
592
     * the number of times they were used, and a link to the tool's homepage.
593
     * @return mixed[]
594
     */
595 1
    public function getTools()
596
    {
597 1
        return $this->tools;
598
    }
599
600
    /**
601
     * Get the list of page's wikidata and Checkwiki errors.
602
     * @see Page::getErrors()
603
     * @return string[]
604
     */
605
    public function getBugs()
606
    {
607
        if (!is_array($this->bugs)) {
0 ignored issues
show
introduced by
The condition ! is_array($this->bugs) can never be true.
Loading history...
608
            $this->bugs = $this->page->getErrors();
609
        }
610
        return $this->bugs;
611
    }
612
613
    /**
614
     * Get the number of wikidata nad CheckWiki errors.
615
     * @return int
616
     */
617
    public function numBugs()
618
    {
619
        return count($this->getBugs());
620
    }
621
622
    /**
623
     * Get the number of external links on the page.
624
     * @return int
625
     */
626 1
    public function linksExtCount()
627
    {
628 1
        return $this->getLinksAndRedirects()['links_ext_count'];
629
    }
630
631
    /**
632
     * Get the number of incoming links to the page.
633
     * @return int
634
     */
635 1
    public function linksInCount()
636
    {
637 1
        return $this->getLinksAndRedirects()['links_in_count'];
638
    }
639
640
    /**
641
     * Get the number of outgoing links from the page.
642
     * @return int
643
     */
644 1
    public function linksOutCount()
645
    {
646 1
        return $this->getLinksAndRedirects()['links_out_count'];
647
    }
648
649
    /**
650
     * Get the number of redirects to the page.
651
     * @return int
652
     */
653 1
    public function redirectsCount()
654
    {
655 1
        return $this->getLinksAndRedirects()['redirects_count'];
656
    }
657
658
    /**
659
     * Get the number of external, incoming and outgoing links, along with
660
     * the number of redirects to the page.
661
     * @return int
662
     * @codeCoverageIgnore
663
     */
664
    private function getLinksAndRedirects()
665
    {
666
        if (!is_array($this->linksAndRedirects)) {
0 ignored issues
show
introduced by
The condition ! is_array($this->linksAndRedirects) can never be true.
Loading history...
667
            $this->linksAndRedirects = $this->page->countLinksAndRedirects();
668
        }
669
        return $this->linksAndRedirects;
670
    }
671
672
    /**
673
     * Parse the revision history, collecting our core statistics.
674
     * @return mixed[] Associative "master" array of metadata about the page.
675
     *
676
     * Untestable because it relies on getting a PDO statement. All the important
677
     * logic lives in other methods which are tested.
678
     * @codeCoverageIgnore
679
     */
680
    private function parseHistory()
681
    {
682
        if ($this->tooManyRevisions()) {
683
            $limit = $this->getMaxRevisions();
684
        } else {
685
            $limit = null;
686
        }
687
688
        // Third parameter is ignored if $limit is null.
689
        $revStmt = $this->page->getRevisionsStmt(
690
            null,
691
            $limit,
692
            $this->getNumRevisions(),
693
            $this->startDate,
694
            $this->endDate
695
        );
696
        $revCount = 0;
697
698
        /**
699
         * Data about previous edits so that we can use them as a basis for comparison.
700
         * @var Edit[]
701
         */
702
        $prevEdits = [
703
            // The previous Edit, used to discount content that was reverted.
704
            'prev' => null,
705
706
            // The last edit deemed to be the max addition of content. This is kept track of
707
            // in case we find out the next edit was reverted (and was also a max edit),
708
            // in which case we'll want to discount it and use this one instead.
709
            'maxAddition' => null,
710
711
            // Same as with maxAddition, except the maximum amount of content deleted.
712
            // This is used to discount content that was reverted.
713
            'maxDeletion' => null,
714
        ];
715
716
        while ($rev = $revStmt->fetch()) {
717
            $edit = new Edit($this->page, $rev);
718
719
            if ($revCount === 0) {
0 ignored issues
show
introduced by
The condition $revCount === 0 can never be false.
Loading history...
720
                $this->firstEdit = $edit;
721
            }
722
723
            // Sometimes, with old revisions (2001 era), the revisions from 2002 come before 2001
724
            if ($edit->getTimestamp() < $this->firstEdit->getTimestamp()) {
725
                $this->firstEdit = $edit;
726
            }
727
728
            $prevEdits = $this->updateCounts($edit, $prevEdits);
729
730
            $revCount++;
731
        }
732
733
        $this->numRevisionsProcessed = $revCount;
734
735
        // Various sorts
736
        arsort($this->editors);
737
        ksort($this->yearMonthCounts);
738
        if ($this->tools) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->tools of type string[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
739
            arsort($this->tools);
740
        }
741
    }
742
743
    /**
744
     * Update various counts based on the current edit.
745
     * @param  Edit   $edit
746
     * @param  Edit[] $prevEdits With 'prev', 'maxAddition' and 'maxDeletion'
747
     * @return Edit[] Updated version of $prevEdits.
748
     */
749 4
    private function updateCounts(Edit $edit, $prevEdits)
750
    {
751
        // Update the counts for the year and month of the current edit.
752 4
        $this->updateYearMonthCounts($edit);
753
754
        // Update counts for the user who made the edit.
755 4
        $this->updateUserCounts($edit);
756
757
        // Update the year/month/user counts of anon and minor edits.
758 4
        $this->updateAnonMinorCounts($edit);
759
760
        // Update counts for automated tool usage, if applicable.
761 4
        $this->updateToolCounts($edit);
762
763
        // Increment "edits per <time>" counts
764 4
        $this->updateCountHistory($edit);
765
766
        // Update figures regarding content addition/removal, and the revert count.
767 4
        $prevEdits = $this->updateContentSizes($edit, $prevEdits);
768
769
        // Now that we've updated all the counts, we can reset
770
        // the prev and last edits, which are used for tracking.
771 4
        $prevEdits['prev'] = $edit;
772 4
        $this->lastEdit = $edit;
773
774 4
        return $prevEdits;
775
    }
776
777
    /**
778
     * Update various figures about content sizes based on the given edit.
779
     * @param  Edit   $edit
780
     * @param  Edit[] $prevEdits With 'prev', 'maxAddition' and 'maxDeletion'
781
     * @return Edit[] Updated version of $prevEdits.
782
     */
783 4
    private function updateContentSizes(Edit $edit, $prevEdits)
784
    {
785
        // Check if it was a revert
786 4
        if ($edit->isRevert($this->container)) {
787 4
            return $this->updateContentSizesRevert($prevEdits);
788
        } else {
789 4
            return $this->updateContentSizesNonRevert($edit, $prevEdits);
790
        }
791
    }
792
793
    /**
794
     * Updates the figures on content sizes assuming the given edit was a revert of the previous one.
795
     * In such a case, we don't want to treat the previous edit as legit content addition or removal.
796
     * @param  Edit[] $prevEdits With 'prev', 'maxAddition' and 'maxDeletion'.
797
     * @return Edit[] Updated version of $prevEdits, for tracking.
798
     */
799 4
    private function updateContentSizesRevert($prevEdits)
800
    {
801 4
        $this->revertCount++;
802
803
        // Adjust addedBytes given this edit was a revert of the previous one.
804 4
        if ($prevEdits['prev'] && $prevEdits['prev']->getSize() > 0) {
805
            $this->addedBytes -= $prevEdits['prev']->getSize();
806
807
            // Also deduct from the user's individual added byte count.
808
            $username = $prevEdits['prev']->getUser()->getUsername();
809
            $this->editors[$username]['added'] -= $prevEdits['prev']->getSize();
810
        }
811
812
        // @TODO: Test this against an edit war (use your sandbox).
813
        // Also remove as max added or deleted, if applicable.
814 4
        if ($this->maxAddition && $prevEdits['prev']->getId() === $this->maxAddition->getId()) {
815
            // $this->editors[$prevEdits->getUser()->getUsername()]['sizes'] = $edit->getLength() / 1024;
816
            $this->maxAddition = $prevEdits['maxAddition'];
817
            $prevEdits['maxAddition'] = $prevEdits['prev']; // In the event of edit wars.
818 4
        } elseif ($this->maxDeletion && $prevEdits['prev']->getId() === $this->maxDeletion->getId()) {
819 4
            $this->maxDeletion = $prevEdits['maxDeletion'];
820 4
            $prevEdits['maxDeletion'] = $prevEdits['prev']; // In the event of edit wars.
821
        }
822
823 4
        return $prevEdits;
824
    }
825
826
    /**
827
     * Updates the figures on content sizes assuming the given edit
828
     * was NOT a revert of the previous edit.
829
     * @param  Edit   $edit
830
     * @param  Edit[] $prevEdits With 'prev', 'maxAddition' and 'maxDeletion'.
831
     * @return Edit[] Updated version of $prevEdits, for tracking.
832
     */
833 4
    private function updateContentSizesNonRevert(Edit $edit, $prevEdits)
834
    {
835 4
        $editSize = $this->getEditSize($edit, $prevEdits);
836
837
        // Edit was not a revert, so treat size > 0 as content added.
838 4
        if ($editSize > 0) {
839 4
            $this->addedBytes += $editSize;
840 4
            $this->editors[$edit->getUser()->getUsername()]['added'] += $editSize;
841
842
            // Keep track of edit with max addition.
843 4
            if (!$this->maxAddition || $editSize > $this->maxAddition->getSize()) {
844
                // Keep track of old maxAddition in case we find out the next $edit was reverted
845
                // (and was also a max edit), in which case we'll want to use this one ($edit).
846 4
                $prevEdits['maxAddition'] = $this->maxAddition;
847
848 4
                $this->maxAddition = $edit;
849
            }
850 4
        } elseif ($editSize < 0 && (!$this->maxDeletion || $editSize < $this->maxDeletion->getSize())) {
851
            // Keep track of old maxDeletion in case we find out the next edit was reverted
852
            // (and was also a max deletion), in which case we'll want to use this one.
853 4
            $prevEdits['maxDeletion'] = $this->maxDeletion;
854
855 4
            $this->maxDeletion = $edit;
856
        }
857
858 4
        return $prevEdits;
859
    }
860
861
    /**
862
     * Get the size of the given edit, based on the previous edit (if present).
863
     * We also don't return the actual edit size if last revision had a length of null.
864
     * This happens when the edit follows other edits that were revision-deleted.
865
     * @see T148857 for more information.
866
     * @todo Remove once T101631 is resolved.
867
     * @param  Edit   $edit
868
     * @param  Edit[] $prevEdits With 'prev', 'maxAddition' and 'maxDeletion'.
869
     * @return Edit[] Updated version of $prevEdits, for tracking.
870
     */
871 4
    private function getEditSize(Edit $edit, $prevEdits)
872
    {
873 4
        if ($prevEdits['prev'] && $prevEdits['prev']->getLength() === null) {
0 ignored issues
show
introduced by
The condition $prevEdits['prev'] && $p...]->getLength() === null can never be true.
Loading history...
874
            return 0;
875
        } else {
876 4
            return $edit->getSize();
877
        }
878
    }
879
880
    /**
881
     * Update counts of automated tool usage for the given edit.
882
     * @param Edit $edit
883
     */
884 4
    private function updateToolCounts(Edit $edit)
885
    {
886 4
        $automatedTool = $edit->getTool($this->container);
887
888 4
        if ($automatedTool === false) {
889
            // Nothing to do.
890 4
            return;
891
        }
892
893 4
        $editYear = $edit->getYear();
894 4
        $editMonth = $edit->getMonth();
895
896 4
        $this->automatedCount++;
897 4
        $this->yearMonthCounts[$editYear]['automated']++;
898 4
        $this->yearMonthCounts[$editYear]['months'][$editMonth]['automated']++;
899
900 4
        if (!isset($this->tools[$automatedTool['name']])) {
901 4
            $this->tools[$automatedTool['name']] = [
902 4
                'count' => 1,
903 4
                'link' => $automatedTool['link'],
904
            ];
905
        } else {
906
            $this->tools[$automatedTool['name']]['count']++;
907
        }
908 4
    }
909
910
    /**
911
     * Update various counts for the year and month of the given edit.
912
     * @param Edit $edit
913
     */
914 4
    private function updateYearMonthCounts(Edit $edit)
915
    {
916 4
        $editYear = $edit->getYear();
917 4
        $editMonth = $edit->getMonth();
918
919
        // Fill in the blank arrays for the year and 12 months if needed.
920 4
        if (!isset($this->yearMonthCounts[$editYear])) {
921 4
            $this->addYearMonthCountEntry($edit);
922
        }
923
924
        // Increment year and month counts for all edits
925 4
        $this->yearMonthCounts[$editYear]['all']++;
926 4
        $this->yearMonthCounts[$editYear]['months'][$editMonth]['all']++;
927
        // This will ultimately be the size of the page by the end of the year
928 4
        $this->yearMonthCounts[$editYear]['size'] = (int) $edit->getLength();
929
930
        // Keep track of which month had the most edits
931 4
        $editsThisMonth = $this->yearMonthCounts[$editYear]['months'][$editMonth]['all'];
932 4
        if ($editsThisMonth > $this->maxEditsPerMonth) {
933 4
            $this->maxEditsPerMonth = $editsThisMonth;
934
        }
935 4
    }
936
937
    /**
938
     * Add a new entry to $this->yearMonthCounts for the given year,
939
     * with blank values for each month. This called during self::parseHistory().
940
     * @param Edit $edit
941
     */
942 4
    private function addYearMonthCountEntry(Edit $edit)
943
    {
944 4
        $editYear = $edit->getYear();
945
946
        // Beginning of the month at 00:00:00.
947 4
        $firstEditTime = mktime(0, 0, 0, (int) $this->firstEdit->getMonth(), 1, $this->firstEdit->getYear());
0 ignored issues
show
Bug introduced by
$this->firstEdit->getYear() of type string is incompatible with the type integer expected by parameter $year of mktime(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

947
        $firstEditTime = mktime(0, 0, 0, (int) $this->firstEdit->getMonth(), 1, /** @scrutinizer ignore-type */ $this->firstEdit->getYear());
Loading history...
948
949 4
        $this->yearMonthCounts[$editYear] = [
950
            'all' => 0,
951
            'minor' => 0,
952
            'anon' => 0,
953
            'automated' => 0,
954
            'size' => 0, // Keep track of the size by the end of the year.
955
            'events' => [],
956
            'months' => [],
957
        ];
958
959 4
        for ($i = 1; $i <= 12; $i++) {
960 4
            $timeObj = mktime(0, 0, 0, $i, 1, $editYear);
961
962 4
            $date = $editYear . sprintf('%02d', $i) . '01';
963 4
            if (false !== $this->startDate && $date < date('Ymd', $this->startDate)
964 4
                || false !== $this->endDate && $date > date('Ymd', $this->endDate)) {
965
                continue;
966
            }
967
968
            // Don't show zeros for months before the first edit or after the current month.
969 4
            if ($timeObj < $firstEditTime || $timeObj > strtotime('last day of this month')) {
970 4
                continue;
971
            }
972
973 4
            $this->yearMonthCounts[$editYear]['months'][sprintf('%02d', $i)] = [
974
                'all' => 0,
975
                'minor' => 0,
976
                'anon' => 0,
977
                'automated' => 0,
978
            ];
979
        }
980 4
    }
981
982
    /**
983
     * Update the counts of anon and minor edits for year, month,
984
     * and user of the given edit.
985
     * @param Edit $edit
986
     */
987 4
    private function updateAnonMinorCounts(Edit $edit)
988
    {
989 4
        $editYear = $edit->getYear();
990 4
        $editMonth = $edit->getMonth();
991
992
        // If anonymous, increase counts
993 4
        if ($edit->isAnon()) {
994 4
            $this->anonCount++;
995 4
            $this->yearMonthCounts[$editYear]['anon']++;
996 4
            $this->yearMonthCounts[$editYear]['months'][$editMonth]['anon']++;
997
        }
998
999
        // If minor edit, increase counts
1000 4
        if ($edit->isMinor()) {
1001 4
            $this->minorCount++;
1002 4
            $this->yearMonthCounts[$editYear]['minor']++;
1003 4
            $this->yearMonthCounts[$editYear]['months'][$editMonth]['minor']++;
1004
        }
1005 4
    }
1006
1007
    /**
1008
     * Update various counts for the user of the given edit.
1009
     * @param Edit $edit
1010
     */
1011 4
    private function updateUserCounts(Edit $edit)
1012
    {
1013 4
        $username = $edit->getUser()->getUsername();
1014
1015
        // Initialize various user stats if needed.
1016 4
        if (!isset($this->editors[$username])) {
1017 4
            $this->editors[$username] = [
1018 4
                'all' => 0,
1019 4
                'minor' => 0,
1020 4
                'minorPercentage' => 0,
1021 4
                'first' => $edit->getTimestamp(),
1022 4
                'firstId' => $edit->getId(),
1023
                'last' => null,
1024
                'atbe' => null,
1025 4
                'added' => 0,
1026
                'sizes' => [],
1027
            ];
1028
        }
1029
1030
        // Increment user counts
1031 4
        $this->editors[$username]['all']++;
1032 4
        $this->editors[$username]['last'] = $edit->getTimestamp();
1033 4
        $this->editors[$username]['lastId'] = $edit->getId();
1034
1035
        // Store number of KB added with this edit
1036 4
        $this->editors[$username]['sizes'][] = $edit->getLength() / 1024;
1037
1038
        // Increment minor counts for this user
1039 4
        if ($edit->isMinor()) {
1040 4
            $this->editors[$username]['minor']++;
1041
        }
1042 4
    }
1043
1044
    /**
1045
     * Increment "edits per <time>" counts based on the given edit.
1046
     * @param Edit $edit
1047
     */
1048 4
    private function updateCountHistory(Edit $edit)
1049
    {
1050 4
        $editTimestamp = $edit->getTimestamp();
1051
1052 4
        if ($editTimestamp > new DateTime('-1 day')) {
1053
            $this->countHistory['day']++;
1054
        }
1055 4
        if ($editTimestamp > new DateTime('-1 week')) {
1056
            $this->countHistory['week']++;
1057
        }
1058 4
        if ($editTimestamp > new DateTime('-1 month')) {
1059
            $this->countHistory['month']++;
1060
        }
1061 4
        if ($editTimestamp > new DateTime('-1 year')) {
1062
            $this->countHistory['year']++;
1063
        }
1064 4
    }
1065
1066
    /**
1067
     * Get info about bots that edited the page.
1068
     * @return mixed[] Contains the bot's username, edit count to the page,
1069
     *   and whether or not they are currently a bot.
1070
     */
1071 1
    public function getBots()
1072
    {
1073 1
        return $this->bots;
1074
    }
1075
1076
    /**
1077
     * Set info about bots that edited the page. This is done as a private setter
1078
     * because we need this information when computing the top 10 editors,
1079
     * where we don't want to include bots.
1080
     */
1081
    private function setBots()
1082
    {
1083
        // Parse the botedits
1084
        $bots = [];
1085
        $botData = $this->getRepository()->getBotData($this->page, $this->startDate, $this->endDate);
1086
        while ($bot = $botData->fetch()) {
1087
            $bots[$bot['username']] = [
1088
                'count' => (int) $bot['count'],
1089
                'current' => $bot['current'] === 'bot',
1090
            ];
1091
        }
1092
1093
        // Sort by edit count.
1094
        uasort($bots, function ($a, $b) {
1095
            return $b['count'] - $a['count'];
1096
        });
1097
1098
        $this->bots = $bots;
1099
    }
1100
1101
    /**
1102
     * Number of edits made to the page by current or former bots.
1103
     * @param string[] $bots Used only in unit tests, where we
1104
     *   supply mock data for the bots that will get processed.
1105
     * @return int
1106
     */
1107 2
    public function getBotRevisionCount($bots = null)
1108
    {
1109 2
        if (isset($this->botRevisionCount)) {
1110
            return $this->botRevisionCount;
1111
        }
1112
1113 2
        if ($bots === null) {
1114 1
            $bots = $this->getBots();
1115
        }
1116
1117 2
        $count = 0;
1118
1119 2
        foreach ($bots as $username => $data) {
1120 2
            $count += $data['count'];
1121
        }
1122
1123 2
        $this->botRevisionCount = $count;
1124 2
        return $count;
1125
    }
1126
1127
    /**
1128
     * Query for log events during each year of the article's history,
1129
     *   and set the results in $this->yearMonthCounts.
1130
     */
1131 1
    private function setLogsEvents()
1132
    {
1133 1
        $logData = $this->getRepository()->getLogEvents(
0 ignored issues
show
Bug introduced by
The method getLogEvents() does not exist on Xtools\Repository. Did you maybe mean getLog()? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1133
        $logData = $this->getRepository()->/** @scrutinizer ignore-call */ getLogEvents(

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
1134 1
            $this->page,
1135 1
            $this->startDate,
1136 1
            $this->endDate
1137
        );
1138
1139 1
        foreach ($logData as $event) {
1140 1
            $time = strtotime($event['timestamp']);
1141 1
            $year = date('Y', $time);
1142
1143 1
            if (!isset($this->yearMonthCounts[$year])) {
1144
                break;
1145
            }
1146
1147 1
            $yearEvents = $this->yearMonthCounts[$year]['events'];
1148
1149
            // Convert log type value to i18n key.
1150 1
            switch ($event['log_type']) {
1151 1
                case 'protect':
1152 1
                    $action = 'protections';
1153 1
                    break;
1154 1
                case 'delete':
1155 1
                    $action = 'deletions';
1156 1
                    break;
1157
                case 'move':
1158
                    $action = 'moves';
1159
                    break;
1160
                // count pending-changes protections along with normal protections.
1161
                case 'stable':
1162
                    $action = 'protections';
1163
                    break;
1164
            }
1165
1166 1
            if (empty($yearEvents[$action])) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $action does not seem to be defined for all execution paths leading up to this point.
Loading history...
1167 1
                $yearEvents[$action] = 1;
1168
            } else {
1169
                $yearEvents[$action]++;
1170
            }
1171
1172 1
            $this->yearMonthCounts[$year]['events'] = $yearEvents;
1173
        }
1174 1
    }
1175
1176
    /**
1177
     * Set statistics about the top 10 editors by added text and number of edits.
1178
     * This is ran *after* parseHistory() since we need the grand totals first.
1179
     * Various stats are also set for each editor in $this->editors to be used in the charts.
1180
     * @return integer Number of edits
1181
     */
1182 4
    private function setTopTenCounts()
1183
    {
1184 4
        $topTenCount = $counter = 0;
1185 4
        $topTenEditors = [];
1186
1187 4
        foreach ($this->editors as $editor => $info) {
1188
            // Count how many users are in the top 10% by number of edits, excluding bots.
1189 4
            if ($counter < 10 && !in_array($editor, array_keys($this->bots))) {
1190 4
                $topTenCount += $info['all'];
1191 4
                $counter++;
1192
1193
                // To be used in the Top Ten charts.
1194 4
                $topTenEditors[] = [
1195 4
                    'label' => $editor,
1196 4
                    'value' => $info['all'],
1197
                    'percentage' => (
1198 4
                        100 * ($info['all'] / $this->getNumRevisionsProcessed())
1199
                    )
1200
                ];
1201
            }
1202
1203
            // Compute the percentage of minor edits the user made.
1204 4
            $this->editors[$editor]['minorPercentage'] = $info['all']
1205 4
                ? ($info['minor'] / $info['all']) * 100
1206
                : 0;
1207
1208 4
            if ($info['all'] > 1) {
1209
                // Number of seconds/days between first and last edit.
1210 4
                $secs = $info['last']->getTimestamp() - $info['first']->getTimestamp();
1211 4
                $days = $secs / (60 * 60 * 24);
1212
1213
                // Average time between edits (in days).
1214 4
                $this->editors[$editor]['atbe'] = $days / $info['all'];
1215
            }
1216
1217 4
            if (count($info['sizes'])) {
1218
                // Average Total KB divided by number of stored sizes (usually the user's edit count to this page).
1219 4
                $this->editors[$editor]['size'] = array_sum($info['sizes']) / count($info['sizes']);
1220
            } else {
1221 4
                $this->editors[$editor]['size'] = 0;
1222
            }
1223
        }
1224
1225 4
        $this->topTenEditorsByEdits = $topTenEditors;
1226
1227
        // First sort editors array by the amount of text they added.
1228 4
        $topTenEditorsByAdded = $this->editors;
1229
        uasort($topTenEditorsByAdded, function ($a, $b) {
1230 4
            if ($a['added'] === $b['added']) {
1231 4
                return 0;
1232
            }
1233 4
            return $a['added'] > $b['added'] ? -1 : 1;
1234 4
        });
1235
1236
        // Then build a new array of top 10 editors by added text,
1237
        // in the data structure needed for the chart.
1238
        $this->topTenEditorsByAdded = array_map(function ($editor) {
1239 4
            $added = $this->editors[$editor]['added'];
1240
            return [
1241 4
                'label' => $editor,
1242 4
                'value' => $added,
1243
                'percentage' => (
1244 4
                    100 * ($added / $this->addedBytes)
1245
                )
1246
            ];
1247 4
        }, array_keys(array_slice($topTenEditorsByAdded, 0, 10)));
1248
1249 4
        $this->topTenCount = $topTenCount;
1250 4
    }
1251
1252
    /**
1253
     * Get authorship attribution from the WikiWho API.
1254
     * @see https://f-squared.org/wikiwho/
1255
     * @param  int $limit Max number of results.
1256
     * @return array
1257
     */
1258 1
    public function getTextshares($limit = null)
1259
    {
1260 1
        if (isset($this->textshares)) {
1261
            return $this->textshares;
1262
        }
1263
1264
        // TODO: check for failures. Should have a success:true
1265 1
        $ret = $this->getRepository()->getTextshares($this->page);
1266
1267
        // If revision can't be found, return error message.
1268 1
        if (!isset($ret['revisions'][0])) {
1269
            return [
1270
                'error' => isset($ret['Error']) ? $ret['Error'] : 'Unknown'
1271
            ];
1272
        }
1273
1274 1
        $revId = array_keys($ret['revisions'][0])[0];
1275 1
        $tokens = $ret['revisions'][0][$revId]['tokens'];
1276
1277 1
        list($counts, $totalCount, $userIds) = $this->countTokens($tokens);
1278 1
        $usernameMap = $this->getUsernameMap($userIds);
1279
1280 1
        if ($limit !== null) {
1281 1
            $countsToProcess = array_slice($counts, 0, $limit, true);
1282
        } else {
1283
            $countsToProcess = $counts;
1284
        }
1285
1286 1
        $textshares = [];
1287
1288
        // Loop through once more, creating an array with the user names (or IP address)
1289
        // as the key, and the count and percentage as the value.
1290 1
        foreach ($countsToProcess as $editor => $count) {
1291 1
            if (isset($usernameMap[$editor])) {
1292 1
                $index = $usernameMap[$editor];
1293
            } else {
1294 1
                $index = $editor;
1295
            }
1296 1
            $textshares[$index] = [
1297 1
                'count' => $count,
1298 1
                'percentage' => round(100 * ($count / $totalCount), 1)
1299
            ];
1300
        }
1301
1302 1
        $this->textshares = [
1303 1
            'list' => $textshares,
1304 1
            'totalAuthors' => count($counts),
1305 1
            'totalCount' => $totalCount,
1306
        ];
1307
1308 1
        return $this->textshares;
1309
    }
1310
1311
    /**
1312
     * Get a map of user IDs to usernames, given the IDs.
1313
     * @param  int[] $userIds
1314
     * @return array IDs as keys, usernames as values.
1315
     */
1316 1
    private function getUsernameMap($userIds)
1317
    {
1318 1
        $userIdsNames = $this->getRepository()->getUsernamesFromIds(
1319 1
            $this->page->getProject(),
1320 1
            $userIds
1321
        );
1322
1323 1
        $usernameMap = [];
1324 1
        foreach ($userIdsNames as $userIdName) {
1325 1
            $usernameMap[$userIdName['user_id']] = $userIdName['user_name'];
1326
        }
1327
1328 1
        return $usernameMap;
1329
    }
1330
1331
    /**
1332
     * Get counts of token lengths for each author. Used in self::getTextshares()
1333
     * @param  array $tokens
1334
     * @return array [counts by user, total count, IDs of accounts]
1335
     */
1336 1
    private function countTokens($tokens)
1337
    {
1338 1
        $counts = [];
1339 1
        $userIds = [];
1340 1
        $totalCount = 0;
1341
1342
        // Loop through the tokens, keeping totals (token length) for each author.
1343 1
        foreach ($tokens as $token) {
1344 1
            $editor = $token['editor'];
1345
1346
            // IPs are prefixed with '0|', otherwise it's the user ID.
1347 1
            if (substr($editor, 0, 2) === '0|') {
1348 1
                $editor = substr($editor, 2);
1349
            } else {
1350 1
                $userIds[] = $editor;
1351
            }
1352
1353 1
            if (!isset($counts[$editor])) {
1354 1
                $counts[$editor] = 0;
1355
            }
1356
1357 1
            $counts[$editor] += strlen($token['str']);
1358 1
            $totalCount += strlen($token['str']);
1359
        }
1360
1361
        // Sort authors by count.
1362 1
        arsort($counts);
1363
1364 1
        return [$counts, $totalCount, $userIds];
1365
    }
1366
1367
    /**
1368
     * Get a list of wikis supported by WikiWho.
1369
     * @return string[]
1370
     * @codeCoverageIgnore
1371
     */
1372
    public function getTextshareWikis()
1373
    {
1374
        return self::TEXTSHARE_WIKIS;
1375
    }
1376
1377
    /**
1378
     * Get prose and reference information.
1379
     * @return array With keys 'characters', 'words', 'references', 'unique_references'
1380
     */
1381 1
    public function getProseStats()
1382
    {
1383 1
        $datetime = $this->endDate !== false ? new DateTime('@'.$this->endDate) : null;
1384 1
        $html = $this->page->getHTMLContent($datetime);
1385
1386 1
        $crawler = new Crawler($html);
1387
1388 1
        list($chars, $words) = $this->countCharsAndWords($crawler, '#mw-content-text p');
1389
1390 1
        $refs = $crawler->filter('#mw-content-text .reference');
1391 1
        $refContent = [];
1392
        $refs->each(function ($ref) use (&$refContent) {
1393 1
            $refContent[] = $ref->text();
1394 1
        });
1395 1
        $uniqueRefs = count(array_unique($refContent));
1396
1397 1
        $sections = count($crawler->filter('#mw-content-text .mw-headline'));
1398
1399
        return [
1400 1
            'characters' => $chars,
1401 1
            'words' => $words,
1402 1
            'references' => $refs->count(),
1403 1
            'unique_references' => $uniqueRefs,
1404 1
            'sections' => $sections,
1405
        ];
1406
    }
1407
1408
    /**
1409
     * Count the number of characters and words of the plain text
1410
     * within the DOM element matched by the given selector.
1411
     * @param  Crawler $crawler
1412
     * @param  string $selector HTML selector.
1413
     * @return array [num chars, num words]
1414
     */
1415 1
    private function countCharsAndWords($crawler, $selector)
1416
    {
1417 1
        $totalChars = 0;
1418 1
        $totalWords = 0;
1419 1
        $paragraphs = $crawler->filter($selector);
1420 1
        $paragraphs->each(function ($node) use (&$totalChars, &$totalWords) {
1421 1
            $text = preg_replace('/\[\d+\]/', '', trim($node->text()));
1422 1
            $totalChars += strlen($text);
1423 1
            $totalWords += count(explode(' ', $text));
1424 1
        });
1425
1426 1
        return [$totalChars, $totalWords];
1427
    }
1428
1429
    /**
1430
     * Fetch transclusion data (categories, templates and files)
1431
     * that are on the page.
1432
     * @return array With keys 'categories', 'templates' and 'files'.
1433
     */
1434 1
    private function getTransclusionData()
1435
    {
1436 1
        if (!is_array($this->transclusionData)) {
0 ignored issues
show
introduced by
The condition ! is_array($this->transclusionData) can never be true.
Loading history...
1437 1
            $this->transclusionData = $this->getRepository()
1438 1
                ->getTransclusionData($this->page);
1439
        }
1440 1
        return $this->transclusionData;
1441
    }
1442
1443
    /**
1444
     * Get the number of categories that are on the page.
1445
     * @return int
1446
     */
1447 1
    public function getNumCategories()
1448
    {
1449 1
        return $this->getTransclusionData()['categories'];
1450
    }
1451
1452
    /**
1453
     * Get the number of templates that are on the page.
1454
     * @return int
1455
     */
1456 1
    public function getNumTemplates()
1457
    {
1458 1
        return $this->getTransclusionData()['templates'];
1459
    }
1460
1461
    /**
1462
     * Get the number of files that are on the page.
1463
     * @return int
1464
     */
1465 1
    public function getNumFiles()
1466
    {
1467 1
        return $this->getTransclusionData()['files'];
1468
    }
1469
}
1470