Passed
Push — master ( 0e30e3...7b249e )
by MusikAnimal
07:20
created

ArticleInfoRepository::getTransclusionData()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 31
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 3
eloc 15
c 1
b 0
f 0
nc 3
nop 1
dl 0
loc 31
rs 9.7666
1
<?php
2
/**
3
 * This file contains only the ArticleInfoRepository class.
4
 */
5
6
declare(strict_types = 1);
7
8
namespace AppBundle\Repository;
9
10
use AppBundle\Model\Page;
11
use Doctrine\DBAL\Driver\Statement;
12
13
/**
14
 * ArticleInfoRepository is responsible for retrieving data about a single
15
 * article on a given wiki.
16
 * @codeCoverageIgnore
17
 */
18
class ArticleInfoRepository extends Repository
19
{
20
    /**
21
     * Get the number of edits made to the page by bots or former bots.
22
     * @param Page $page
23
     * @param false|int $start
24
     * @param false|int $end
25
     * @return Statement resolving with keys 'count', 'username' and 'current'.
26
     */
27
    public function getBotData(Page $page, $start, $end, $count = false): Statement
28
    {
29
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_botdata');
30
        if ($this->cache->hasItem($cacheKey)) {
31
            return $this->cache->getItem($cacheKey)->get();
32
        }
33
34
        $project = $page->getProject();
35
        $revTable = $project->getTableName('revision');
36
        $userGroupsTable = $project->getTableName('user_groups');
37
        $userFormerGroupsTable = $project->getTableName('user_former_groups');
38
        $actorTable = $project->getTableName('actor');
39
40
        $datesConditions = $this->getDateConditions($start, $end);
41
42
        if ($count) {
43
            $actorSelect = '';
44
            $groupBy = '';
45
        } else {
46
            $actorSelect = 'actor_name AS username, ';
47
            $groupBy = 'GROUP BY actor_user';
48
        }
49
50
        $sql = "SELECT COUNT(DISTINCT(rev_id)) AS count, $actorSelect '1' AS current
51
                FROM $revTable
52
                JOIN $actorTable ON actor_id = rev_actor
53
                LEFT JOIN $userGroupsTable ON actor_user = ug_user
54
                WHERE rev_page = :pageId AND ug_group = 'bot' $datesConditions
55
                $groupBy
56
                UNION
57
                SELECT COUNT(DISTINCT(rev_id)) AS count, $actorSelect '0' AS current
58
                FROM $revTable
59
                JOIN $actorTable ON actor_id = rev_actor
60
                LEFT JOIN $userFormerGroupsTable ON actor_user = ufg_user
61
                WHERE rev_page = :pageId AND ufg_group = 'bot' $datesConditions
62
                $groupBy";
63
64
        $result = $this->executeProjectsQuery($sql, ['pageId' => $page->getId()]);
65
        return $this->setCache($cacheKey, $result);
66
    }
67
68
    /**
69
     * Get prior deletions, page moves, and protections to the page.
70
     * @param Page $page
71
     * @param false|int $start
72
     * @param false|int $end
73
     * @return string[] each entry with keys 'log_action', 'log_type' and 'timestamp'.
74
     */
75
    public function getLogEvents(Page $page, $start, $end): array
76
    {
77
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_logevents');
78
        if ($this->cache->hasItem($cacheKey)) {
79
            return $this->cache->getItem($cacheKey)->get();
80
        }
81
        $loggingTable = $page->getProject()->getTableName('logging', 'logindex');
82
83
        $datesConditions = $this->getDateConditions($start, $end, '', 'log_timestamp');
84
85
        $sql = "SELECT log_action, log_type, log_timestamp AS 'timestamp'
86
                FROM $loggingTable
87
                WHERE log_namespace = '" . $page->getNamespace() . "'
88
                AND log_title = :title AND log_timestamp > 1 $datesConditions
89
                AND log_type IN ('delete', 'move', 'protect', 'stable')";
90
        $title = str_replace(' ', '_', $page->getTitle());
91
92
        $result = $this->executeProjectsQuery($sql, ['title' => $title])->fetchAll();
93
        return $this->setCache($cacheKey, $result);
94
    }
95
96
    /**
97
     * Get the number of categories, templates, and files that are on the page.
98
     * @param Page $page
99
     * @return array With keys 'categories', 'templates' and 'files'.
100
     */
101
    public function getTransclusionData(Page $page): array
102
    {
103
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_transclusions');
104
        if ($this->cache->hasItem($cacheKey)) {
105
            return $this->cache->getItem($cacheKey)->get();
106
        }
107
108
        $categorylinksTable = $page->getProject()->getTableName('categorylinks');
109
        $templatelinksTable = $page->getProject()->getTableName('templatelinks');
110
        $imagelinksTable = $page->getProject()->getTableName('imagelinks');
111
        $sql = "(
112
                    SELECT 'categories' AS `key`, COUNT(*) AS val
113
                    FROM $categorylinksTable
114
                    WHERE cl_from = :pageId
115
                ) UNION (
116
                    SELECT 'templates' AS `key`, COUNT(*) AS val
117
                    FROM $templatelinksTable
118
                    WHERE tl_from = :pageId
119
                ) UNION (
120
                    SELECT 'files' AS `key`, COUNT(*) AS val
121
                    FROM $imagelinksTable
122
                    WHERE il_from = :pageId
123
                )";
124
        $resultQuery = $this->executeProjectsQuery($sql, ['pageId' => $page->getId()]);
125
        $transclusionCounts = [];
126
127
        while ($result = $resultQuery->fetch()) {
128
            $transclusionCounts[$result['key']] = (int)$result['val'];
129
        }
130
131
        return $this->setCache($cacheKey, $transclusionCounts);
132
    }
133
134
    /**
135
     * Get the top editors to the page by edit count.
136
     * @param Page $page
137
     * @param false|int $start
138
     * @param false|int $end
139
     * @param int $limit
140
     * @param bool $noBots
141
     * @return array
142
     */
143
    public function getTopEditorsByEditCount(
144
        Page $page,
145
        $start = false,
146
        $end = false,
147
        int $limit = 20,
148
        bool $noBots = false
149
    ): array {
150
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_topeditors');
151
        if ($this->cache->hasItem($cacheKey)) {
152
            return $this->cache->getItem($cacheKey)->get();
153
        }
154
155
        $project = $page->getProject();
156
        // Faster to use revision instead of revision_userindex in this case.
157
        $revTable = $project->getTableName('revision', '');
158
        $actorTable = $project->getTableName('actor');
159
160
        $dateConditions = $this->getDateConditions($start, $end);
161
162
        $sql = "SELECT actor_name AS username,
163
                    COUNT(rev_id) AS count,
164
                    SUM(rev_minor_edit) AS minor,
165
                    MIN(rev_timestamp) AS first_timestamp,
166
                    MIN(rev_id) AS first_revid,
167
                    MAX(rev_timestamp) AS latest_timestamp,
168
                    MAX(rev_id) AS latest_revid
169
                FROM $revTable
170
                JOIN $actorTable ON rev_actor = actor_id
171
                WHERE rev_page = :pageId $dateConditions";
172
173
        if ($noBots) {
174
            $userGroupsTable = $project->getTableName('user_groups');
175
            $sql .= "AND NOT EXISTS (
176
                         SELECT 1
177
                         FROM $userGroupsTable
178
                         WHERE ug_user = actor_user
179
                         AND ug_group = 'bot'
180
                     )";
181
        }
182
183
        $sql .= "GROUP BY actor_id
184
                 ORDER BY count DESC
185
                 LIMIT $limit";
186
187
        $result = $this->executeProjectsQuery($sql, [
188
            'pageId' => $page->getId(),
189
        ])->fetchAll();
190
191
        return $this->setCache($cacheKey, $result);
192
    }
193
194
    /**
195
     * Get various basic info used in the API, including the number of revisions, unique authors, initial author
196
     * and edit count of the initial author. This is combined into one query for better performance. Caching is only
197
     * applied if it took considerable time to process, because using the gadget, this will get hit for a different page
198
     * constantly, where the likelihood of cache benefiting us is slim.
199
     * @param Page $page The page.
200
     * @return string[]|false false if the page was not found.
201
     */
202
    public function getBasicEditingInfo(Page $page)
203
    {
204
        $cacheKey = $this->getCacheKey(func_get_args(), 'page_basicinfo');
205
        if ($this->cache->hasItem($cacheKey)) {
206
            return $this->cache->getItem($cacheKey)->get();
207
        }
208
209
        $revTable = $page->getProject()->getTableName('revision');
210
        $userTable = $page->getProject()->getTableName('user');
211
        $pageTable = $page->getProject()->getTableName('page');
212
        $actorTable = $page->getProject()->getTableName('actor');
213
214
        $sql = "SELECT *, (
215
                    SELECT user_editcount
216
                    FROM $userTable
217
                    WHERE user_id = author_user_id
218
                ) AS author_editcount
219
                FROM (
220
                    (
221
                        SELECT COUNT(rev_id) AS num_edits,
222
                            COUNT(DISTINCT(rev_actor)) AS num_editors,
223
                            SUM(rev_minor_edit) AS minor_edits
224
                        FROM $revTable
225
                        WHERE rev_page = :pageid
226
                        AND rev_timestamp > 0 # Use rev_timestamp index
227
                    ) a,
228
                    (
229
                        # With really old pages, the rev_timestamp may need to be sorted ASC,
230
                        #   and the lowest rev_id may not be the first revision.
231
                        SELECT actor_name AS author,
232
                               actor_user AS author_user_id,
233
                               rev_timestamp AS created_at,
234
                               rev_id AS created_rev_id
235
                        FROM $revTable
236
                        JOIN $actorTable ON actor_id = rev_actor
237
                        WHERE rev_page = :pageid
238
                        ORDER BY rev_timestamp ASC
239
                        LIMIT 1
240
                    ) b,
241
                    (
242
                        SELECT rev_timestamp AS modified_at,
243
                               rev_id AS modified_rev_id
244
                        FROM $revTable
245
                        JOIN $pageTable ON page_id = rev_page
246
                        WHERE rev_page = :pageid
247
                        AND rev_id = page_latest
248
                    ) c
249
                )";
250
        $params = ['pageid' => $page->getId()];
251
252
        // Get current time so we can compare timestamps
253
        // and decide whether or to cache the result.
254
        $time1 = time();
255
256
        /**
257
         * This query can sometimes take too long to run for pages with tens of thousands
258
         * of revisions. This query is used by the ArticleInfo gadget, which shows basic
259
         * data in real-time, so if it takes too long than the user probably didn't even
260
         * wait to see the result. We'll pass 60 as the last parameter to executeProjectsQuery,
261
         * which will set the max_statement_time to 60 seconds.
262
         */
263
        $result = $this->executeProjectsQuery($sql, $params, 60)->fetch();
264
265
        $time2 = time();
266
267
        // If it took over 5 seconds, cache the result for 20 minutes.
268
        if ($time2 - $time1 > 5) {
269
            $this->setCache($cacheKey, $result, 'PT20M');
270
        }
271
272
        return $result ?? false;
273
    }
274
}
275