Completed
Push — master ( f9a368...59d319 )
by
unknown
02:44
created

ArticleInfoController::getWikidataErrors()   A

Complexity

Conditions 4
Paths 5

Size

Total Lines 58
Code Lines 31

Duplication

Lines 18
Ratio 31.03 %

Importance

Changes 0
Metric Value
dl 18
loc 58
rs 9.0077
c 0
b 0
f 0
cc 4
eloc 31
nc 5
nop 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * This file contains only the ArticleInfoController class.
4
 */
5
6
namespace AppBundle\Controller;
7
8
use AppBundle\Helper\AutomatedEditsHelper;
9
use AppBundle\Helper\PageviewsHelper;
10
use Doctrine\DBAL\Connection;
11
use Sensio\Bundle\FrameworkExtraBundle\Configuration\Route;
12
use Symfony\Bundle\FrameworkBundle\Controller\Controller;
13
use Symfony\Component\HttpFoundation\Request;
14
use Symfony\Component\DependencyInjection\ContainerInterface;
15
use Symfony\Component\HttpFoundation\Response;
16
use Xtools\ProjectRepository;
17
use Xtools\Page;
18
use Xtools\PagesRepository;
19
use Xtools\Edit;
20
21
/**
22
 * This controller serves the search form and results for the ArticleInfo tool
23
 */
24
class ArticleInfoController extends Controller
25
{
26
    /** @var mixed[] Information about the page in question. */
27
    private $pageInfo;
28
    /** @var Edit[] All edits of the page. */
29
    private $pageHistory;
30
    /** @var ProjectRepository Shared Project repository for use of getting table names, etc. */
31
    private $projectRepo;
32
    /** @var string Database name, for us of getting table names, etc. */
33
    private $dbName;
34
    /** @var Connection The projects' database connection. */
35
    protected $conn;
36
    /** @var AutomatedEditsHelper The semi-automated edits helper. */
37
    protected $aeh;
38
    /** @var PageviewsHelper The page-views helper. */
39
    protected $ph;
40
41
    /**
42
     * Get the tool's shortname.
43
     * @return string
44
     */
45
    public function getToolShortname()
46
    {
47
        return 'articleinfo';
48
    }
49
50
    /**
51
     * Override method to call ArticleInfoController::containerInitialized() when container set.
52
     * @param ContainerInterface|null $container A ContainerInterface instance or null
53
     */
54
    public function setContainer(ContainerInterface $container = null)
55
    {
56
        parent::setContainer($container);
57
        $this->containerInitialized();
58
    }
59
60
    /**
61
     * Perform some operations after controller initialized and container set.
62
     */
63
    private function containerInitialized()
64
    {
65
        $this->conn = $this->getDoctrine()->getManager('replicas')->getConnection();
0 ignored issues
show
Bug introduced by
It seems like you code against a concrete implementation and not the interface Doctrine\Common\Persistence\ObjectManager as the method getConnection() does only exist in the following implementations of said interface: Doctrine\ORM\Decorator\EntityManagerDecorator, Doctrine\ORM\EntityManager.

Let’s take a look at an example:

interface User
{
    /** @return string */
    public function getPassword();
}

class MyUser implements User
{
    public function getPassword()
    {
        // return something
    }

    public function getDisplayName()
    {
        // return some name.
    }
}

class AuthSystem
{
    public function authenticate(User $user)
    {
        $this->logger->info(sprintf('Authenticating %s.', $user->getDisplayName()));
        // do something.
    }
}

In the above example, the authenticate() method works fine as long as you just pass instances of MyUser. However, if you now also want to pass a different implementation of User which does not have a getDisplayName() method, the code will break.

Available Fixes

  1. Change the type-hint for the parameter:

    class AuthSystem
    {
        public function authenticate(MyUser $user) { /* ... */ }
    }
    
  2. Add an additional type-check:

    class AuthSystem
    {
        public function authenticate(User $user)
        {
            if ($user instanceof MyUser) {
                $this->logger->info(/** ... */);
            }
    
            // or alternatively
            if ( ! $user instanceof MyUser) {
                throw new \LogicException(
                    '$user must be an instance of MyUser, '
                   .'other instances are not supported.'
                );
            }
    
        }
    }
    
Note: PHP Analyzer uses reverse abstract interpretation to narrow down the types inside the if block in such a case.
  1. Add the method to the interface:

    interface User
    {
        /** @return string */
        public function getPassword();
    
        /** @return string */
        public function getDisplayName();
    }
    
Loading history...
66
        $this->ph = $this->get('app.pageviews_helper');
67
        $this->aeh = $this->get('app.automated_edits_helper');
68
    }
69
70
    /**
71
     * The search form.
72
     * @Route("/articleinfo", name="articleinfo")
73
     * @Route("/articleinfo", name="articleInfo")
74
     * @Route("/articleinfo/", name="articleInfoSlash")
75
     * @Route("/articleinfo/index.php", name="articleInfoIndexPhp")
76
     * @Route("/articleinfo/{project}", name="ArticleInfoProject")
77
     * @param Request $request The HTTP request.
78
     * @return Response
79
     */
80
    public function indexAction(Request $request)
81
    {
82
        $projectQuery = $request->query->get('project');
83
        $article = $request->query->get('article');
84
85
        if ($projectQuery != '' && $article != '') {
86
            return $this->redirectToRoute('ArticleInfoResult', [ 'project'=>$projectQuery, 'article' => $article ]);
87
        } elseif ($article != '') {
88
            return $this->redirectToRoute('ArticleInfoProject', [ 'project'=>$projectQuery ]);
89
        }
90
91
        if ($projectQuery == '') {
92
            $projectQuery = $this->container->getParameter('default_project');
93
        }
94
95
        $project = ProjectRepository::getProject($projectQuery, $this->container);
96
97
        return $this->render('articleInfo/index.html.twig', [
98
            'xtPage' => 'articleinfo',
99
            'xtPageTitle' => 'tool-articleinfo',
100
            'xtSubtitle' => 'tool-articleinfo-desc',
101
            'project' => $project,
102
        ]);
103
    }
104
105
    /**
106
     * Display the results.
107
     * @Route("/articleinfo/{project}/{article}", name="ArticleInfoResult", requirements={"article"=".+"})
108
     * @param Request $request The HTTP request.
109
     * @return Response
110
     */
111
    public function resultAction(Request $request)
112
    {
113
        $projectQuery = $request->attributes->get('project');
114
        $project = ProjectRepository::getProject($projectQuery, $this->container);
115
        $this->projectRepo = $project->getRepository();
116 View Code Duplication
        if (!$project->exists()) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
117
            $this->addFlash('notice', ['invalid-project', $projectQuery]);
0 ignored issues
show
Documentation introduced by
array('invalid-project', $projectQuery) is of type array<integer,*,{"0":"string","1":"*"}>, but the function expects a string.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
118
            return $this->redirectToRoute('articleInfo');
119
        }
120
        $this->dbName = $project->getDatabaseName();
121
122
        $pageQuery = $request->attributes->get('article');
123
        $page = new Page($project, $pageQuery);
124
        $pageRepo = new PagesRepository();
125
        $pageRepo->setContainer($this->container);
1 ignored issue
show
Compatibility introduced by
$this->container of type object<Symfony\Component...ion\ContainerInterface> is not a sub-type of object<Symfony\Component...ncyInjection\Container>. It seems like you assume a concrete implementation of the interface Symfony\Component\Depend...tion\ContainerInterface to be always present.

This check looks for parameters that are defined as one type in their type hint or doc comment but seem to be used as a narrower type, i.e an implementation of an interface or a subclass.

Consider changing the type of the parameter or doing an instanceof check before assuming your parameter is of the expected type.

Loading history...
126
        $page->setRepository($pageRepo);
127
128
        if (!$page->exists()) {
129
            $this->addFlash('notice', ['no-exist', $pageQuery]);
0 ignored issues
show
Documentation introduced by
array('no-exist', $pageQuery) is of type array<integer,*,{"0":"string","1":"*"}>, but the function expects a string.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
130
            return $this->redirectToRoute('articleInfo');
131
        }
132
133
        // TODO: throw error if $basicInfo['missing'] is set
1 ignored issue
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
134
135
        $this->pageInfo = [
0 ignored issues
show
Documentation Bug introduced by
It seems like array('project' => $proj...=> $project->getLang()) of type array<string,object<Xtoo...age>","lang":"string"}> is incompatible with the declared type array<integer,*> of property $pageInfo.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
136
            'project' => $project,
137
            'page' => $page,
138
            'lang' => $project->getLang(),
139
        ];
140
141
        // TODO: Adapted from legacy code; may be used to indicate how many dead ext links there are
142
        // if ( isset( $basicInfo->extlinks ) ){
1 ignored issue
show
Unused Code Comprehensibility introduced by
57% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
143
        //     foreach ( $basicInfo->extlinks as $i => $link ){
1 ignored issue
show
Unused Code Comprehensibility introduced by
53% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
144
        //         $this->extLinks[] = array("link" => $link->{'*'}, "status" => "unchecked" );
1 ignored issue
show
Unused Code Comprehensibility introduced by
64% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
145
        //     }
146
        // }
147
148
        $this->pageHistory = $page->getRevisions();
149
        $this->pageInfo['firstEdit'] = new Edit($this->pageInfo['page'], $this->pageHistory[0]);
150
        $this->pageInfo['lastEdit'] = new Edit(
151
            $this->pageInfo['page'],
152
            $this->pageHistory[$page->getNumRevisions() - 1]
153
        );
154
155
        // NOTE: bots are fetched first in case we want to restrict some stats to humans editors only
156
        $this->pageInfo['bots'] = $this->getBotData();
157
        $this->pageInfo['general']['bot_count'] = count($this->pageInfo['bots']);
158
159
        $this->pageInfo = array_merge($this->pageInfo, $this->parseHistory());
0 ignored issues
show
Documentation Bug introduced by
It seems like array_merge($this->pageI... $this->parseHistory()) of type array<string,?> is incompatible with the declared type array<integer,*> of property $pageInfo.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
160
        $this->pageInfo['general']['top_ten_count'] = $this->getTopTenCount();
161
        $this->pageInfo['general']['top_ten_percentage'] = round(
162
            ($this->pageInfo['general']['top_ten_count'] / $page->getNumRevisions()) * 100,
163
            1
164
        );
165
        $this->pageInfo = array_merge($this->pageInfo, $this->getLinksAndRedirects());
0 ignored issues
show
Documentation Bug introduced by
It seems like array_merge($this->pageI...getLinksAndRedirects()) of type array is incompatible with the declared type array<integer,*> of property $pageInfo.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
166
        $this->pageInfo['general']['pageviews_offset'] = 60;
167
        $this->pageInfo['general']['pageviews'] = $this->ph->sumLastDays(
168
            $this->pageInfo['project']->getDomain(),
169
            $this->pageInfo['page']->getTitle(),
170
            $this->pageInfo['general']['pageviews_offset']
171
        );
172
173
        $assessments = $page->getAssessments();
174
        if ($assessments) {
175
            $this->pageInfo['assessments'] = $assessments;
176
        }
177
        $this->setLogsEvents();
178
179
        $bugs = $page->getErrors();
180
        if (!empty($bugs)) {
181
            $this->pageInfo['bugs'] = $bugs;
182
        }
183
184
        $this->pageInfo['xtPage'] = 'articleinfo';
185
        $this->pageInfo['xtTitle'] = $this->pageInfo['page']->getTitle();
186
187
        return $this->render("articleInfo/result.html.twig", $this->pageInfo);
188
    }
189
190
    /**
191
     * Get info about bots that edited the page
192
     * This also sets $this->pageInfo['bot_revision_count'] and $this->pageInfo['bot_percentage']
193
     * @return array Associative array containing the bot's username, edit count to the page
194
     *               and whether or not they are currently a bot
195
     */
196
    private function getBotData()
197
    {
198
        $userGroupsTable = $this->projectRepo->getTableName($this->dbName, 'user_groups');
199
        $userFromerGroupsTable = $this->projectRepo->getTableName($this->dbName, 'user_former_groups');
200
        $query = "SELECT COUNT(rev_user_text) AS count, rev_user_text AS username, ug_group AS current
201
                  FROM " . $this->projectRepo->getTableName($this->dbName, 'revision') . "
202
                  LEFT JOIN $userGroupsTable ON rev_user = ug_user
203
                  LEFT JOIN $userFromerGroupsTable ON rev_user = ufg_user
204
                  WHERE rev_page = " . $this->pageInfo['page']->getId() . " AND (ug_group = 'bot' OR ufg_group = 'bot')
205
                  GROUP BY rev_user_text";
206
        $res = $this->conn->query($query)->fetchAll();
207
208
        // Parse the botedits
209
        $bots = [];
210
        $sum = 0;
211
        foreach ($res as $bot) {
212
            $bots[$bot['username']] = [
213
                'count' => (int) $bot['count'],
214
                'current' => $bot['current'] === 'bot'
215
            ];
216
            $sum += $bot['count'];
217
        }
218
219
        uasort($bots, function ($a, $b) {
220
            return $b['count'] - $a['count'];
221
        });
222
223
        $this->pageInfo['general']['bot_revision_count'] = $sum;
224
        $this->pageInfo['general']['bot_percentage'] = round(
225
            ($sum / $this->pageInfo['page']->getNumRevisions()) * 100,
226
            1
227
        );
228
229
        return $bots;
230
    }
231
232
    /**
233
     * Get the number of edits made to the page by the top 10% of editors
234
     * This is ran *after* parseHistory() since we need the grand totals first.
235
     * Various stats are also set for each editor in $this->pageInfo['editors']
236
     *   and top ten editors are stored in $this->pageInfo['general']['top_ten']
237
     *   to be used in the charts
238
     * @return integer Number of edits
239
     */
240
    private function getTopTenCount()
241
    {
242
        $topTenCount = $counter = 0;
243
        $topTenEditors = [];
244
245
        foreach ($this->pageInfo['editors'] as $editor => $info) {
246
            // Count how many users are in the top 10% by number of edits
247
            if ($counter < 10) {
248
                $topTenCount += $info['all'];
249
                $counter++;
250
251
                // To be used in the Top Ten charts
252
                $topTenEditors[] = [
253
                    'label' => $editor,
254
                    'value' => $info['all'],
255
                    'percentage' => (
256
                        100 * ($info['all'] / $this->pageInfo['page']->getNumRevisions())
257
                    )
258
                ];
259
            }
260
261
            // Compute the percentage of minor edits the user made
262
            $this->pageInfo['editors'][$editor]['minor_percentage'] = $info['all']
263
                ? ($info['minor'] / $info['all']) * 100
264
                : 0;
265
266
            if ($info['all'] > 1) {
267
                // Number of seconds between first and last edit
268
                $secs = intval(strtotime($info['last']) - strtotime($info['first']) / $info['all']);
269
270
                // Average time between edits (in days)
271
                $this->pageInfo['editors'][$editor]['atbe'] = $secs / ( 60 * 60 * 24 );
272
            }
273
274
            if (count($info['sizes'])) {
275
                // Average Total KB divided by number of stored sizes (user's edit count to this page)
276
                $this->pageInfo['editors'][$editor]['size'] = array_sum($info['sizes']) / count($info['sizes']);
277
            } else {
278
                $this->pageInfo['editors'][$editor]['size'] = 0;
279
            }
280
        }
281
282
        $this->pageInfo['topTenEditors'] = $topTenEditors;
283
284
        // First sort editors array by the amount of text they added
285
        $topTenEditorsByAdded = $this->pageInfo['editors'];
286
        uasort($topTenEditorsByAdded, function ($a, $b) {
287
            if ($a['added'] === $b['added']) {
288
                return 0;
289
            }
290
            return $a['added'] > $b['added'] ? -1 : 1;
291
        });
292
293
        // Then build a new array of top 10 editors by added text,
294
        //   in the data structure needed for the chart
295
        $this->pageInfo['topTenEditorsByAdded'] = array_map(function ($editor) {
296
            $added = $this->pageInfo['editors'][$editor]['added'];
297
            return [
298
                'label' => $editor,
299
                'value' => $added,
300
                'percentage' => (
301
                    100 * ($added / $this->pageInfo['general']['added'])
302
                )
303
            ];
304
        }, array_keys(array_slice($topTenEditorsByAdded, 0, 10)));
305
306
        return $topTenCount;
307
    }
308
309
    /**
310
     * Get number of in and outgoing links and redirects to the page
311
     * @return array Associative array containing counts
312
     */
313
    private function getLinksAndRedirects()
314
    {
315
        $pageId = $this->pageInfo['page']->getId();
316
        $namespace = $this->pageInfo['page']->getNamespace();
317
        $title = str_replace(' ', '_', $this->pageInfo['page']->getTitle());
318
        $externalLinksTable = $this->projectRepo->getTableName($this->dbName, 'externallinks');
319
        $pageLinksTable = $this->projectRepo->getTableName($this->dbName, 'pagelinks');
320
        $redirectTable = $this->projectRepo->getTableName($this->dbName, 'redirect');
321
322
        $query = "SELECT COUNT(*) AS value, 'links_ext' AS type
323
                  FROM $externalLinksTable WHERE el_from = $pageId
324
                  UNION
325
                  SELECT COUNT(*) AS value, 'links_out' AS type
326
                  FROM $pageLinksTable WHERE pl_from = $pageId
327
                  UNION
328
                  SELECT COUNT(*) AS value, 'links_in' AS type
329
                  FROM $pageLinksTable WHERE pl_namespace = $namespace AND pl_title = \"$title\"
330
                  UNION
331
                  SELECT COUNT(*) AS value, 'redirects' AS type
332
                  FROM $redirectTable WHERE rd_namespace = $namespace AND rd_title = \"$title\"";
333
        $statement = $this->conn->prepare($query);
334
        $statement->bindParam('title', $title);
335
        $statement->execute();
336
        $res = $statement->fetchAll();
337
338
        $data = [];
339
340
        // Transform to associative array by 'type'
341
        foreach ($res as $row) {
342
            $data[$row['type'] . '_count'] = $row['value'];
343
        }
344
345
        return $data;
346
    }
347
348
    /**
349
     * Query for log events during each year of the article's history,
350
     *   and set the results in $this->pageInfo['year_count']
351
     */
352
    private function setLogsEvents()
353
    {
354
        $loggingTable = $this->projectRepo->getTableName($this->dbName, 'logging', 'logindex');
355
        $title = str_replace(' ', '_', $this->pageInfo['page']->getTitle());
356
        $query = "SELECT log_action, log_type, log_timestamp AS timestamp
357
                  FROM $loggingTable
358
                  WHERE log_namespace = '" . $this->pageInfo['page']->getNamespace() . "'
359
                  AND log_title = '$title' AND log_timestamp > 1
360
                  AND log_type IN ('delete', 'move', 'protect', 'stable')";
361
        $events = $this->conn->query($query)->fetchAll();
362
363
        foreach ($events as $event) {
364
            $time = strtotime($event['timestamp']);
365
            $year = date('Y', $time);
366
            if (isset($this->pageInfo['year_count'][$year])) {
367
                $yearEvents = $this->pageInfo['year_count'][$year]['events'];
368
369
                // Convert log type value to i18n key
370
                switch ($event['log_type']) {
371
                    case 'protect':
372
                        $action = 'protections';
373
                        break;
374
                    case 'delete':
375
                        $action = 'deletions';
376
                        break;
377
                    case 'move':
378
                        $action = 'moves';
379
                        break;
380
                    // count pending-changes protections along with normal protections
381
                    case 'stable':
382
                        $action = 'protections';
383
                        break;
384
                }
385
386
                if (empty($yearEvents[$action])) {
387
                    $yearEvents[$action] = 1;
0 ignored issues
show
Bug introduced by
The variable $action does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
388
                } else {
389
                    $yearEvents[$action]++;
390
                }
391
392
                $this->pageInfo['year_count'][$year]['events'] = $yearEvents;
393
            }
394
        }
395
    }
396
397
    /**
398
     * Get the size of the diff.
399
     * @param  int $revIndex The index of the revision within $this->pageHistory
400
     * @return int Size of the diff
401
     */
402
    private function getDiffSize($revIndex)
403
    {
404
        $rev = $this->pageHistory[$revIndex];
405
406
        if ($revIndex === 0) {
407
            return $rev['length'];
408
        }
409
410
        $lastRev = $this->pageHistory[$revIndex - 1];
411
412
        // TODO: Remove once T101631 is resolved
413
        // Treat as zero change in size if length of previous edit is missing
414
        if ($lastRev['length'] === null) {
415
            return 0;
416
        } else {
417
            return $rev['length'] - $lastRev['length'];
418
        }
419
    }
420
421
    /**
422
     * Parse the revision history, which should be at $this->pageHistory
423
     * @return array Associative "master" array of metadata about the page
424
     */
425
    private function parseHistory()
426
    {
427
        $revisionCount = $this->pageInfo['page']->getNumRevisions();
428
        if ($revisionCount == 0) {
429
            // $this->error = "no records";
1 ignored issue
show
Unused Code Comprehensibility introduced by
45% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
430
            return;
431
        }
432
433
        $firstEdit = $this->pageInfo['firstEdit'];
434
435
        // Get UNIX timestamp of the first day of the month of the first edit
436
        // This is used as a comparison when building our array of per-month stats
437
        $firstEditMonth = mktime(0, 0, 0, (int) $firstEdit->getMonth(), 1, $firstEdit->getYear());
438
439
        $lastEdit = $this->pageInfo['lastEdit'];
440
        $secondLastEdit = $revisionCount === 1 ? $lastEdit : $this->pageHistory[ $revisionCount - 2 ];
0 ignored issues
show
Unused Code introduced by
$secondLastEdit is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
441
442
        // Now we can start our master array. This one will be HUGE!
443
        $data = [
444
            'general' => [
445
                'max_add' => $firstEdit,
446
                'max_del' => $firstEdit,
447
                'editor_count' => 0,
448
                'anon_count' => 0,
449
                'minor_count' => 0,
450
                'count_history' => ['day' => 0, 'week' => 0, 'month' => 0, 'year' => 0],
451
                'current_size' => $this->pageHistory[$revisionCount-1]['length'],
452
                'textshares' => [],
453
                'textshare_total' => 0,
454
                'automated_count' => 0,
455
                'revert_count' => 0,
456
                'added' => 0,
457
            ],
458
            'max_edits_per_month' => 0, // for bar chart in "Month counts" section
459
            'editors' => [],
460
            'anons' => [],
461
            'year_count' => [],
462
            'tools' => [],
463
        ];
464
465
        // restore existing general data
466
        $data['general'] = array_merge($data['general'], $this->pageInfo['general']);
467
468
        // And now comes the logic for filling said master array
469
        foreach ($this->pageHistory as $i => $rev) {
470
            $edit = new Edit($this->pageInfo['page'], $rev);
0 ignored issues
show
Documentation introduced by
$rev is of type object<Xtools\Edit>, but the function expects a array<integer,string>.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
471
            $diffSize = $this->getDiffSize($i);
0 ignored issues
show
Unused Code introduced by
$diffSize is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
472
            $username = htmlspecialchars($rev['username']);
473
474
            // Sometimes, with old revisions (2001 era), the revisions from 2002 come before 2001
475
            if ($edit->getTimestamp() < $firstEdit->getTimestamp()) {
476
                $firstEdit = $edit;
477
            }
478
479
            // Fill in the blank arrays for the year and 12 months
480
            if (!isset($data['year_count'][$edit->getYear()])) {
481
                $data['year_count'][$edit->getYear()] = [
482
                    'all' => 0,
483
                    'minor' => 0,
484
                    'anon' => 0,
485
                    'automated' => 0,
486
                    'size' => 0, // keep track of the size by the end of the year
487
                    'events' => [],
488
                    'months' => [],
489
                ];
490
491
                for ($i = 1; $i <= 12; $i++) {
492
                    $timeObj = mktime(0, 0, 0, $i, 1, $edit->getYear());
493
494
                    // don't show zeros for months before the first edit or after the current month
495
                    if ($timeObj < $firstEditMonth || $timeObj > strtotime('last day of this month')) {
496
                        continue;
497
                    }
498
499
                    $data['year_count'][$edit->getYear()]['months'][sprintf('%02d', $i)] = [
500
                        'all' => 0,
501
                        'minor' => 0,
502
                        'anon' => 0,
503
                        'automated' => 0,
504
                    ];
505
                }
506
            }
507
508
            // Increment year and month counts for all edits
509
            $data['year_count'][$edit->getYear()]['all']++;
510
            $data['year_count'][$edit->getYear()]['months'][$edit->getMonth()]['all']++;
511
            $data['year_count'][$edit->getYear()]['size'] = (int) $rev['length'];
512
513
            $editsThisMonth = $data['year_count'][$edit->getYear()]['months'][$edit->getMonth()]['all'];
514
            if ($editsThisMonth > $data['max_edits_per_month']) {
515
                $data['max_edits_per_month'] = $editsThisMonth;
516
            }
517
518
            // Fill in various user stats
519
            if (!isset($data['editors'][$username])) {
520
                $data['general']['editor_count']++;
521
                $data['editors'][$username] = [
522
                    'all' => 0,
523
                    'minor' => 0,
524
                    'minor_percentage' => 0,
525
                    'first' => date('Y-m-d, H:i', strtotime($rev['timestamp'])),
526
                    'first_id' => $rev['id'],
527
                    'last' => null,
528
                    'atbe' => null,
529
                    'added' => 0,
530
                    'sizes' => [],
531
                    'urlencoded' => rawurlencode($rev['username']),
532
                ];
533
            }
534
535
            // Increment user counts
536
            $data['editors'][$username]['all']++;
537
            $data['editors'][$username]['last'] = date('Y-m-d, H:i', strtotime($rev['timestamp']));
538
            $data['editors'][$username]['last_id'] = $rev['id'];
539
540
            // Store number of KB added with this edit
541
            $data['editors'][$username]['sizes'][] = $rev['length'] / 1024;
542
543
            // check if it was a revert
544
            if ($this->aeh->isRevert($rev['comment'])) {
545
                $data['general']['revert_count']++;
546
            } else {
547
                // edit was NOT a revert
548
549
                if ($edit->getSize() > 0) {
550
                    $data['general']['added'] += $edit->getSize();
551
                    $data['editors'][$username]['added'] += $edit->getSize();
552
                }
553
554
                // determine if the next revision was a revert
555
                $nextRevision = isset($this->pageHistory[$i + 1]) ? $this->pageHistory[$i + 1] : null;
556
                $nextRevisionIsRevert = $nextRevision &&
557
                    $this->getDiffSize($i + 1) === -$edit->getSize() &&
558
                    $this->aeh->isRevert($nextRevision['comment']);
559
560
                // don't count this edit as content removal if the next edit reverted it
561
                if (!$nextRevisionIsRevert && $edit->getSize() < $data['general']['max_del']->getSize()) {
562
                    $data['general']['max_del'] = $edit;
563
                }
564
565
                // FIXME: possibly remove this
566
                if ($edit->getLength() > 0) {
567
                    // keep track of added content
568
                    $data['general']['textshare_total'] += $edit->getLength();
569
                    if (!isset($data['textshares'][$username]['all'])) {
570
                        $data['textshares'][$username]['all'] = 0;
571
                    }
572
                    $data['textshares'][$username]['all'] += $edit->getLength();
573
                }
574
575
                if ($edit->getSize() > $data['general']['max_add']->getSize()) {
576
                    $data['general']['max_add'] = $edit;
577
                }
578
            }
579
580
            if ($edit->isAnon()) {
581
                if (!isset($rev['rev_user']['anons'][$username])) {
582
                    $data['general']['anon_count']++;
583
                }
584
                // Anonymous, increase counts
585
                $data['anons'][] = $username;
586
                $data['year_count'][$edit->getYear()]['anon']++;
587
                $data['year_count'][$edit->getYear()]['months'][$edit->getMonth()]['anon']++;
588
            }
589
590
            if ($edit->isMinor()) {
591
                // Logged in, increase counts
592
                $data['general']['minor_count']++;
593
                $data['year_count'][$edit->getYear()]['minor']++;
594
                $data['year_count'][$edit->getYear()]['months'][$edit->getMonth()]['minor']++;
595
                $data['editors'][$username]['minor']++;
596
            }
597
598
            $automatedTool = $this->aeh->getTool($rev['comment']);
599
            if ($automatedTool) {
600
                $data['general']['automated_count']++;
601
                $data['year_count'][$edit->getYear()]['automated']++;
602
                $data['year_count'][$edit->getYear()]['months'][$edit->getMonth()]['automated']++;
603
604
                if (!isset($data['tools'][$automatedTool])) {
605
                    $data['tools'][$automatedTool] = [
606
                        'count' => 1,
607
                        'link' => $this->aeh->getTools()[$automatedTool]['link'],
608
                    ];
609
                } else {
610
                    $data['tools'][$automatedTool]['count']++;
611
                }
612
            }
613
614
            // Increment "edits per <time>" counts
615 View Code Duplication
            if (strtotime($rev['timestamp']) > strtotime('-1 day')) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
616
                $data['general']['count_history']['day']++;
617
            }
618 View Code Duplication
            if (strtotime($rev['timestamp']) > strtotime('-1 week')) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
619
                $data['general']['count_history']['week']++;
620
            }
621 View Code Duplication
            if (strtotime($rev['timestamp']) > strtotime('-1 month')) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
622
                $data['general']['count_history']['month']++;
623
            }
624 View Code Duplication
            if (strtotime($rev['timestamp']) > strtotime('-1 year')) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
625
                $data['general']['count_history']['year']++;
626
            }
627
        }
628
629
        // add percentages
630
        $data['general']['minor_percentage'] = round(
631
            ($data['general']['minor_count'] / $revisionCount) * 100,
632
            1
633
        );
634
        $data['general']['anon_percentage'] = round(
635
            ($data['general']['anon_count'] / $revisionCount) * 100,
636
            1
637
        );
638
639
        // other general statistics
640
        $dateFirst = $firstEdit->getTimestamp();
641
        $dateLast = $lastEdit->getTimestamp();
642
        $data['general']['datetime_first_edit'] = $dateFirst;
643
        $data['general']['datetime_last_edit'] = $dateLast;
644
        $interval = date_diff($dateLast, $dateFirst, true);
645
646
        $data['totaldays'] = $interval->format('%a');
647
        $data['general']['average_days_per_edit'] = round($data['totaldays'] / $revisionCount, 1);
648
        $editsPerDay = $data['totaldays']
649
            ? $revisionCount / ($data['totaldays'] / (365 / 12 / 24))
650
            : 0;
651
        $data['general']['edits_per_day'] = round($editsPerDay, 1);
652
        $editsPerMonth = $data['totaldays']
653
            ? $revisionCount / ($data['totaldays'] / (365 / 12))
654
            : 0;
655
        $data['general']['edits_per_month'] = round($editsPerMonth, 1);
656
        $editsPerYear = $data['totaldays']
657
            ? $revisionCount / ($data['totaldays'] / 365)
658
            : 0;
659
        $data['general']['edits_per_year'] = round($editsPerYear, 1);
660
        $data['general']['edits_per_editor'] = round($revisionCount / count($data['editors']), 1);
661
662
        // If after processing max_del is positive, no edit actually removed text, so unset this value
663
        if ($data['general']['max_del']->getSize() > 0) {
664
            unset($data['general']['max_del']);
665
        }
666
667
        // Various sorts
668
        arsort($data['editors']);
669
        arsort($data['textshares']);
670
        arsort($data['tools']);
671
        ksort($data['year_count']);
672
673
        return $data;
674
    }
675
}
676