Passed
Push — autoflagcomment ( 5dfc59 )
by Simon
10:14
created

AutoFlagCommentsTask   A

Complexity

Total Complexity 3

Size/Duplication

Total Lines 84
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 3
eloc 71
c 1
b 0
f 0
dl 0
loc 84
rs 10

1 Method

Rating   Name   Duplication   Size   Complexity  
B execute() 0 82 3
1
<?php
2
/******************************************************************************
3
 * Wikipedia Account Creation Assistance tool                                 *
4
 * ACC Development Team. Please see team.json for a list of contributors.     *
5
 *                                                                            *
6
 * This is free and unencumbered software released into the public domain.    *
7
 * Please see LICENSE.md for the full licencing statement.                    *
8
 ******************************************************************************/
9
10
namespace Waca\ConsoleTasks;
11
12
use Exception;
13
use PDO;
14
use Waca\DataObjects\Comment;
15
use Waca\Helpers\Logger;
16
use Waca\Tasks\ConsoleTaskBase;
17
18
class AutoFlagCommentsTask extends ConsoleTaskBase
19
{
20
    public function execute()
21
    {
22
        $database = $this->getDatabase();
23
24
25
26
        $query = $database->prepare(<<<'SQL'
27
select c.id, r.domain
28
from comment c
29
inner join request r on r.id = c.request
30
where (
31
    1 = 0
32
    /* emails */
33
    or c.comment rlike '[^ @]+(?<!accounts-enwiki-l|unblock|functionaries-en|checkuser-l|info-en|enwiki-acc-admins|/|\\()@(?!lists.wikimedia.org|wikimedia.org|wikipedia.org|[a-z][a-z]wiki)[a-z\\.]+'
34
    -- or c.comment rlike 'gmail|yahoo' --  to many FPs
35
    -- ipv4
36
    OR c.comment rlike '[0-2]?[0-9]?[0-9]\\.[0-2]?[0-9]?[0-9]\\.[0-2]?[0-9]?[0-9]\\.[0-2]?[0-9]?[0-9]'
37
    -- ipv6
38
    OR (lower(c.comment) rlike '[0-9a-f]{1,4}:[0-9a-f]{1,4}:[0-9a-f]{1,4}' and c.comment not rlike '[0-2]?[0-9]:[0-5][0-9]:[0-5][0-9]')
39
    -- card pan
40
    OR c.comment rlike '[0-9]{4} [0-9]{4} [0-9]{4} [0-9]{4}'
41
    OR c.comment rlike '(?<!ticket|ticket#|OTRS|OTRS #) \\+?(?!20[0-2][0-9][01][0-9][0-3][0-9]100[0-9]{5})[0-9]{9,}'
42
    -- phone numbers
43
    OR c.comment like '%mobile no%'
44
    OR c.comment like '%contact no%'
45
    OR c.comment like '%phone no%'
46
    OR c.comment like '%cell no%'
47
    OR c.comment rlike '\\+[0-9]{1}[0-9 .-]{5}'
48
    OR c.comment rlike '(?:phone(?: )?:|mobile(?: )?:|cell(?: )?:)[ 0-9+]'
49
    OR c.comment rlike '(^|\\s)(contact|phone|cell|mobile)( no| number| nbr)?( is)? ?:? ?[0-9+][0-9]+'
50
    OR c.comment rlike '[0-9]{3,} ?(ext|x)\\.? ?[0-9]{3,}'
51
    -- OR c.comment like '%telephone%' -- too many FP
52
53
    -- requested passwords
54
    OR c.comment like '%my password to be %'
55
    OR c.comment like '% password be %'
56
    OR c.comment rlike '(my )password (to |should )?(be|as)(?! soon| quickly|ap|\\?)'
57
    OR c.comment rlike '(as )(my )?password(?! reset)'
58
    OR c.comment rlike 'password(?: )?:'
59
60
    -- holy FP craziness, but full of matches.
61
    -- OR (c.comment rlike 'password' and c.user is null)
62
63
    -- banking
64
    OR c.comment rlike ' (a/c|acct) (no|number|nbr)( |\\.)'
65
    -- OR c.comment rlike '(?<!requested|conflicting|similar) acct'
66
67
    -- OR c.comment rlike ' card ' -- too many FP
68
    -- OR c.comment like '% bank %' -- too many FP
69
70
    -- all of these have too many FPs
71
    -- or c.comment rlike '(?<!ip )(?<!email )(?<!e-mail )(?<!this )address(?!ed)'
72
    -- OR c.comment rlike ' (ave|st(?!\\w)|road|rd(?!\\w))'
73
    -- or c.comment rlike ' (road|street|avenue) '
74
    -- or (c.comment rlike '(^|\\s)[0-9]{5,}\\s' and c.user is null)
75
    -- or (c.comment rlike ' (?:Alabama|AL|Kentucky|KY|Ohio|Alaska|AK|Louisiana|LA|Oklahoma|Arizona|AZ|Maine|Oregon|Arkansas|AR|Maryland|MD|Pennsylvania|PA|Massachusetts|MA|California|CA|Michigan|MI|Rhode Island|RI|Colorado|Minnesota|MN|South Carolina|SC|Connecticut|CT|Mississippi|MS|South Dakota|SD|Delaware|DE|Missouri|MO|Tennessee|TN|DC|Montana|MT|Texas|TX|Florida|FL|Nebraska|NE|Georgia|GA|Nevada|NV|Utah|UT|New Hampshire|NH|Vermont|VT|Hawaii|New Jersey|NJ|Virginia|VA|Idaho|New Mexico|NM|Illinois|IL|New York|NY|Washington|WA|Indiana|North Carolina|NC|West Virginia|WV|Iowa|IA|North Dakota|ND|Wisconsin|WI|Kansas|KS|Wyoming|WY)(?: |\\.)' and c.user is null)
76
)
77
-- only find comments which haven't previously been flagged
78
and not exists (select 1 from log l where l.objectid = c.id and action = 'UnflaggedComment')
79
-- only comments on closed requests (give humans a chance to flag these)
80
and exists (select 1 from request r where r.id = c.request and r.status = 'Closed')
81
and c.flagged <> 1
82
-- not all edited comments have log entries (yay historical reasons!)
83
and c.comment not like '%[redacted]%'
84
;
85
SQL
86
        );
87
88
        $success = $query->execute();
89
90
        if (!$success) {
91
            throw new Exception('Error in transaction: Could not load data.');
92
        }
93
94
        $data = $query->fetchAll(PDO::FETCH_ASSOC);
95
        foreach ($data as $row) {
96
            /** @var Comment $dataObject */
97
            $dataObject = Comment::getById($row['id'], $database);
98
99
            Logger::flaggedComment($database, $dataObject, $row['domain']);
100
            $dataObject->setFlagged(true);
101
            $dataObject->save();
102
        }
103
    }
104
}