Passed
Push — master ( 6fe0c7...cd5575 )
by Simon
14:57 queued 11:13
created

AutoFlagCommentsTask   A

Complexity

Total Complexity 3

Size/Duplication

Total Lines 82
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 3
eloc 71
c 1
b 0
f 0
dl 0
loc 82
rs 10

1 Method

Rating   Name   Duplication   Size   Complexity  
B execute() 0 80 3
1
<?php
2
/******************************************************************************
3
 * Wikipedia Account Creation Assistance tool                                 *
4
 * ACC Development Team. Please see team.json for a list of contributors.     *
5
 *                                                                            *
6
 * This is free and unencumbered software released into the public domain.    *
7
 * Please see LICENSE.md for the full licencing statement.                    *
8
 ******************************************************************************/
9
10
namespace Waca\ConsoleTasks;
11
12
use Exception;
13
use PDO;
14
use Waca\DataObjects\Comment;
15
use Waca\Helpers\Logger;
16
use Waca\Tasks\ConsoleTaskBase;
17
18
class AutoFlagCommentsTask extends ConsoleTaskBase
19
{
20
    public function execute()
21
    {
22
        $database = $this->getDatabase();
23
24
        $query = $database->prepare(<<<'SQL'
25
select c.id, r.domain
26
from comment c
27
inner join request r on r.id = c.request
28
where (
29
    1 = 0
30
    /* emails */
31
    or c.comment rlike '[^ @]+(?<!accounts-enwiki-l|unblock|functionaries-en|checkuser-l|info-en|enwiki-acc-admins|/|\\()@(?!lists.wikimedia.org|wikimedia.org|wikipedia.org|[a-z][a-z]wiki)[a-z\\.]+'
32
    -- or c.comment rlike 'gmail|yahoo' --  to many FPs
33
    -- ipv4
34
    OR c.comment rlike '[0-2]?[0-9]?[0-9]\\.[0-2]?[0-9]?[0-9]\\.[0-2]?[0-9]?[0-9]\\.[0-2]?[0-9]?[0-9]'
35
    -- ipv6
36
    OR (lower(c.comment) rlike '[0-9a-f]{1,4}:[0-9a-f]{1,4}:[0-9a-f]{1,4}' and c.comment not rlike '[0-2]?[0-9]:[0-5][0-9]:[0-5][0-9]')
37
    -- card pan
38
    OR c.comment rlike '[0-9]{4} [0-9]{4} [0-9]{4} [0-9]{4}'
39
    OR c.comment rlike '(?<!ticket|ticket#|OTRS|OTRS #) \\+?(?!20[0-2][0-9][01][0-9][0-3][0-9]100[0-9]{5})[0-9]{9,}'
40
    -- phone numbers
41
    OR c.comment like '%mobile no%'
42
    OR c.comment like '%contact no%'
43
    OR c.comment like '%phone no%'
44
    OR c.comment like '%cell no%'
45
    OR c.comment rlike '\\+[0-9]{1}[0-9 .-]{5}'
46
    OR c.comment rlike '(?:phone(?: )?:|mobile(?: )?:|cell(?: )?:)[ 0-9+]'
47
    OR c.comment rlike '(^|\\s)(contact|phone|cell|mobile)( no| number| nbr)?( is)? ?:? ?[0-9+][0-9]+'
48
    OR c.comment rlike '[0-9]{3,} ?(ext|x)\\.? ?[0-9]{3,}'
49
    -- OR c.comment like '%telephone%' -- too many FP
50
51
    -- requested passwords
52
    OR c.comment like '%my password to be %'
53
    OR c.comment like '% password be %'
54
    OR c.comment rlike '(my )password (to |should )?(be|as)(?! soon| quickly|ap|\\?)'
55
    OR c.comment rlike '(as )(my )?password(?! reset)'
56
    OR c.comment rlike 'password(?: )?:'
57
58
    -- holy FP craziness, but full of matches.
59
    -- OR (c.comment rlike 'password' and c.user is null)
60
61
    -- banking
62
    OR c.comment rlike ' (a/c|acct) (no|number|nbr)( |\\.)'
63
    -- OR c.comment rlike '(?<!requested|conflicting|similar) acct'
64
65
    -- OR c.comment rlike ' card ' -- too many FP
66
    -- OR c.comment like '% bank %' -- too many FP
67
68
    -- all of these have too many FPs
69
    -- or c.comment rlike '(?<!ip )(?<!email )(?<!e-mail )(?<!this )address(?!ed)'
70
    -- OR c.comment rlike ' (ave|st(?!\\w)|road|rd(?!\\w))'
71
    -- or c.comment rlike ' (road|street|avenue) '
72
    -- or (c.comment rlike '(^|\\s)[0-9]{5,}\\s' and c.user is null)
73
    -- or (c.comment rlike ' (?:Alabama|AL|Kentucky|KY|Ohio|Alaska|AK|Louisiana|LA|Oklahoma|Arizona|AZ|Maine|Oregon|Arkansas|AR|Maryland|MD|Pennsylvania|PA|Massachusetts|MA|California|CA|Michigan|MI|Rhode Island|RI|Colorado|Minnesota|MN|South Carolina|SC|Connecticut|CT|Mississippi|MS|South Dakota|SD|Delaware|DE|Missouri|MO|Tennessee|TN|DC|Montana|MT|Texas|TX|Florida|FL|Nebraska|NE|Georgia|GA|Nevada|NV|Utah|UT|New Hampshire|NH|Vermont|VT|Hawaii|New Jersey|NJ|Virginia|VA|Idaho|New Mexico|NM|Illinois|IL|New York|NY|Washington|WA|Indiana|North Carolina|NC|West Virginia|WV|Iowa|IA|North Dakota|ND|Wisconsin|WI|Kansas|KS|Wyoming|WY)(?: |\\.)' and c.user is null)
74
)
75
-- only find comments which haven't previously been flagged
76
and not exists (select 1 from log l where l.objectid = c.id and action = 'UnflaggedComment')
77
-- only comments on closed requests (give humans a chance to flag these)
78
and exists (select 1 from request r where r.id = c.request and r.status = 'Closed')
79
and c.flagged <> 1
80
-- not all edited comments have log entries (yay historical reasons!)
81
and c.comment not like '%[redacted]%'
82
;
83
SQL
84
        );
85
86
        $success = $query->execute();
87
88
        if (!$success) {
89
            throw new Exception('Error in transaction: Could not load data.');
90
        }
91
92
        $data = $query->fetchAll(PDO::FETCH_ASSOC);
93
        foreach ($data as $row) {
94
            /** @var Comment $dataObject */
95
            $dataObject = Comment::getById($row['id'], $database);
96
97
            Logger::flaggedComment($database, $dataObject, $row['domain']);
98
            $dataObject->setFlagged(true);
99
            $dataObject->save();
100
        }
101
    }
102
}