1
|
|
|
<?php |
2
|
|
|
/****************************************************************************** |
3
|
|
|
* Wikipedia Account Creation Assistance tool * |
4
|
|
|
* ACC Development Team. Please see team.json for a list of contributors. * |
5
|
|
|
* * |
6
|
|
|
* This is free and unencumbered software released into the public domain. * |
7
|
|
|
* Please see LICENSE.md for the full licencing statement. * |
8
|
|
|
******************************************************************************/ |
9
|
|
|
|
10
|
|
|
namespace Waca\ConsoleTasks; |
11
|
|
|
|
12
|
|
|
use Exception; |
13
|
|
|
use PDO; |
14
|
|
|
use Waca\DataObjects\Comment; |
15
|
|
|
use Waca\Helpers\Logger; |
16
|
|
|
use Waca\Tasks\ConsoleTaskBase; |
17
|
|
|
|
18
|
|
|
class AutoFlagCommentsTask extends ConsoleTaskBase |
19
|
|
|
{ |
20
|
|
|
public function execute() |
21
|
|
|
{ |
22
|
|
|
$database = $this->getDatabase(); |
23
|
|
|
|
24
|
|
|
|
25
|
|
|
|
26
|
|
|
$query = $database->prepare(<<<'SQL' |
27
|
|
|
select c.id, r.domain |
28
|
|
|
from comment c |
29
|
|
|
inner join request r on r.id = c.request |
30
|
|
|
where ( |
31
|
|
|
1 = 0 |
32
|
|
|
/* emails */ |
33
|
|
|
or c.comment rlike '[^ @]+(?<!accounts-enwiki-l|unblock|functionaries-en|checkuser-l|info-en|enwiki-acc-admins|/|\\()@(?!lists.wikimedia.org|wikimedia.org|wikipedia.org|[a-z][a-z]wiki)[a-z\\.]+' |
34
|
|
|
-- or c.comment rlike 'gmail|yahoo' -- to many FPs |
35
|
|
|
-- ipv4 |
36
|
|
|
OR c.comment rlike '[0-2]?[0-9]?[0-9]\\.[0-2]?[0-9]?[0-9]\\.[0-2]?[0-9]?[0-9]\\.[0-2]?[0-9]?[0-9]' |
37
|
|
|
-- ipv6 |
38
|
|
|
OR (lower(c.comment) rlike '[0-9a-f]{1,4}:[0-9a-f]{1,4}:[0-9a-f]{1,4}' and c.comment not rlike '[0-2]?[0-9]:[0-5][0-9]:[0-5][0-9]') |
39
|
|
|
-- card pan |
40
|
|
|
OR c.comment rlike '[0-9]{4} [0-9]{4} [0-9]{4} [0-9]{4}' |
41
|
|
|
OR c.comment rlike '(?<!ticket|ticket#|OTRS|OTRS #) \\+?(?!20[0-2][0-9][01][0-9][0-3][0-9]100[0-9]{5})[0-9]{9,}' |
42
|
|
|
-- phone numbers |
43
|
|
|
OR c.comment like '%mobile no%' |
44
|
|
|
OR c.comment like '%contact no%' |
45
|
|
|
OR c.comment like '%phone no%' |
46
|
|
|
OR c.comment like '%cell no%' |
47
|
|
|
OR c.comment rlike '\\+[0-9]{1}[0-9 .-]{5}' |
48
|
|
|
OR c.comment rlike '(?:phone(?: )?:|mobile(?: )?:|cell(?: )?:)[ 0-9+]' |
49
|
|
|
OR c.comment rlike '(^|\\s)(contact|phone|cell|mobile)( no| number| nbr)?( is)? ?:? ?[0-9+][0-9]+' |
50
|
|
|
OR c.comment rlike '[0-9]{3,} ?(ext|x)\\.? ?[0-9]{3,}' |
51
|
|
|
-- OR c.comment like '%telephone%' -- too many FP |
52
|
|
|
|
53
|
|
|
-- requested passwords |
54
|
|
|
OR c.comment like '%my password to be %' |
55
|
|
|
OR c.comment like '% password be %' |
56
|
|
|
OR c.comment rlike '(my )password (to |should )?(be|as)(?! soon| quickly|ap|\\?)' |
57
|
|
|
OR c.comment rlike '(as )(my )?password(?! reset)' |
58
|
|
|
OR c.comment rlike 'password(?: )?:' |
59
|
|
|
|
60
|
|
|
-- holy FP craziness, but full of matches. |
61
|
|
|
-- OR (c.comment rlike 'password' and c.user is null) |
62
|
|
|
|
63
|
|
|
-- banking |
64
|
|
|
OR c.comment rlike ' (a/c|acct) (no|number|nbr)( |\\.)' |
65
|
|
|
-- OR c.comment rlike '(?<!requested|conflicting|similar) acct' |
66
|
|
|
|
67
|
|
|
-- OR c.comment rlike ' card ' -- too many FP |
68
|
|
|
-- OR c.comment like '% bank %' -- too many FP |
69
|
|
|
|
70
|
|
|
-- all of these have too many FPs |
71
|
|
|
-- or c.comment rlike '(?<!ip )(?<!email )(?<!e-mail )(?<!this )address(?!ed)' |
72
|
|
|
-- OR c.comment rlike ' (ave|st(?!\\w)|road|rd(?!\\w))' |
73
|
|
|
-- or c.comment rlike ' (road|street|avenue) ' |
74
|
|
|
-- or (c.comment rlike '(^|\\s)[0-9]{5,}\\s' and c.user is null) |
75
|
|
|
-- or (c.comment rlike ' (?:Alabama|AL|Kentucky|KY|Ohio|Alaska|AK|Louisiana|LA|Oklahoma|Arizona|AZ|Maine|Oregon|Arkansas|AR|Maryland|MD|Pennsylvania|PA|Massachusetts|MA|California|CA|Michigan|MI|Rhode Island|RI|Colorado|Minnesota|MN|South Carolina|SC|Connecticut|CT|Mississippi|MS|South Dakota|SD|Delaware|DE|Missouri|MO|Tennessee|TN|DC|Montana|MT|Texas|TX|Florida|FL|Nebraska|NE|Georgia|GA|Nevada|NV|Utah|UT|New Hampshire|NH|Vermont|VT|Hawaii|New Jersey|NJ|Virginia|VA|Idaho|New Mexico|NM|Illinois|IL|New York|NY|Washington|WA|Indiana|North Carolina|NC|West Virginia|WV|Iowa|IA|North Dakota|ND|Wisconsin|WI|Kansas|KS|Wyoming|WY)(?: |\\.)' and c.user is null) |
76
|
|
|
) |
77
|
|
|
-- only find comments which haven't previously been flagged |
78
|
|
|
and not exists (select 1 from log l where l.objectid = c.id and action = 'UnflaggedComment') |
79
|
|
|
-- only comments on closed requests (give humans a chance to flag these) |
80
|
|
|
and exists (select 1 from request r where r.id = c.request and r.status = 'Closed') |
81
|
|
|
and c.flagged <> 1 |
82
|
|
|
-- not all edited comments have log entries (yay historical reasons!) |
83
|
|
|
and c.comment not like '%[redacted]%' |
84
|
|
|
; |
85
|
|
|
SQL |
86
|
|
|
); |
87
|
|
|
|
88
|
|
|
$success = $query->execute(); |
89
|
|
|
|
90
|
|
|
if (!$success) { |
91
|
|
|
throw new Exception('Error in transaction: Could not load data.'); |
92
|
|
|
} |
93
|
|
|
|
94
|
|
|
$data = $query->fetchAll(PDO::FETCH_ASSOC); |
95
|
|
|
foreach ($data as $row) { |
96
|
|
|
/** @var Comment $dataObject */ |
97
|
|
|
$dataObject = Comment::getById($row['id'], $database); |
98
|
|
|
|
99
|
|
|
Logger::flaggedComment($database, $dataObject, $row['domain']); |
100
|
|
|
$dataObject->setFlagged(true); |
101
|
|
|
$dataObject->save(); |
102
|
|
|
} |
103
|
|
|
} |
104
|
|
|
} |