1
|
|
|
<?php |
2
|
|
|
/****************************************************************************** |
3
|
|
|
* Wikipedia Account Creation Assistance tool * |
4
|
|
|
* ACC Development Team. Please see team.json for a list of contributors. * |
5
|
|
|
* * |
6
|
|
|
* This is free and unencumbered software released into the public domain. * |
7
|
|
|
* Please see LICENSE.md for the full licencing statement. * |
8
|
|
|
******************************************************************************/ |
9
|
|
|
|
10
|
|
|
namespace Waca\ConsoleTasks; |
11
|
|
|
|
12
|
|
|
use Exception; |
13
|
|
|
use PDO; |
14
|
|
|
use Waca\DataObjects\Comment; |
15
|
|
|
use Waca\Helpers\Logger; |
16
|
|
|
use Waca\Tasks\ConsoleTaskBase; |
17
|
|
|
|
18
|
|
|
class AutoFlagCommentsTask extends ConsoleTaskBase |
19
|
|
|
{ |
20
|
|
|
public function execute() |
21
|
|
|
{ |
22
|
|
|
$database = $this->getDatabase(); |
23
|
|
|
|
24
|
|
|
$query = $database->prepare(<<<'SQL' |
25
|
|
|
select c.id, r.domain |
26
|
|
|
from comment c |
27
|
|
|
inner join request r on r.id = c.request |
28
|
|
|
where ( |
29
|
|
|
1 = 0 |
30
|
|
|
/* emails */ |
31
|
|
|
or c.comment rlike '[^ @]+(?<!accounts-enwiki-l|unblock|functionaries-en|checkuser-l|info-en|enwiki-acc-admins|/|\\()@(?!lists.wikimedia.org|wikimedia.org|wikipedia.org|[a-z][a-z]wiki)[a-z\\.]+' |
32
|
|
|
-- or c.comment rlike 'gmail|yahoo' -- to many FPs |
33
|
|
|
-- ipv4 |
34
|
|
|
OR c.comment rlike '[0-2]?[0-9]?[0-9]\\.[0-2]?[0-9]?[0-9]\\.[0-2]?[0-9]?[0-9]\\.[0-2]?[0-9]?[0-9]' |
35
|
|
|
-- ipv6 |
36
|
|
|
OR (lower(c.comment) rlike '[0-9a-f]{1,4}:[0-9a-f]{1,4}:[0-9a-f]{1,4}' and c.comment not rlike '[0-2]?[0-9]:[0-5][0-9]:[0-5][0-9]') |
37
|
|
|
-- card pan |
38
|
|
|
OR c.comment rlike '[0-9]{4} [0-9]{4} [0-9]{4} [0-9]{4}' |
39
|
|
|
OR c.comment rlike '(?<!ticket|ticket#|OTRS|OTRS #) \\+?(?!20[0-2][0-9][01][0-9][0-3][0-9]100[0-9]{5})[0-9]{9,}' |
40
|
|
|
-- phone numbers |
41
|
|
|
OR c.comment like '%mobile no%' |
42
|
|
|
OR c.comment like '%contact no%' |
43
|
|
|
OR c.comment like '%phone no%' |
44
|
|
|
OR c.comment like '%cell no%' |
45
|
|
|
OR c.comment rlike '\\+[0-9]{1}[0-9 .-]{5}' |
46
|
|
|
OR c.comment rlike '(?:phone(?: )?:|mobile(?: )?:|cell(?: )?:)[ 0-9+]' |
47
|
|
|
OR c.comment rlike '(^|\\s)(contact|phone|cell|mobile)( no| number| nbr)?( is)? ?:? ?[0-9+][0-9]+' |
48
|
|
|
OR c.comment rlike '[0-9]{3,} ?(ext|x)\\.? ?[0-9]{3,}' |
49
|
|
|
-- OR c.comment like '%telephone%' -- too many FP |
50
|
|
|
|
51
|
|
|
-- requested passwords |
52
|
|
|
OR c.comment like '%my password to be %' |
53
|
|
|
OR c.comment like '% password be %' |
54
|
|
|
OR c.comment rlike '(my )password (to |should )?(be|as)(?! soon| quickly|ap|\\?)' |
55
|
|
|
OR c.comment rlike '(as )(my )?password(?! reset)' |
56
|
|
|
OR c.comment rlike 'password(?: )?:' |
57
|
|
|
|
58
|
|
|
-- holy FP craziness, but full of matches. |
59
|
|
|
-- OR (c.comment rlike 'password' and c.user is null) |
60
|
|
|
|
61
|
|
|
-- banking |
62
|
|
|
OR c.comment rlike ' (a/c|acct) (no|number|nbr)( |\\.)' |
63
|
|
|
-- OR c.comment rlike '(?<!requested|conflicting|similar) acct' |
64
|
|
|
|
65
|
|
|
-- OR c.comment rlike ' card ' -- too many FP |
66
|
|
|
-- OR c.comment like '% bank %' -- too many FP |
67
|
|
|
|
68
|
|
|
-- all of these have too many FPs |
69
|
|
|
-- or c.comment rlike '(?<!ip )(?<!email )(?<!e-mail )(?<!this )address(?!ed)' |
70
|
|
|
-- OR c.comment rlike ' (ave|st(?!\\w)|road|rd(?!\\w))' |
71
|
|
|
-- or c.comment rlike ' (road|street|avenue) ' |
72
|
|
|
-- or (c.comment rlike '(^|\\s)[0-9]{5,}\\s' and c.user is null) |
73
|
|
|
-- or (c.comment rlike ' (?:Alabama|AL|Kentucky|KY|Ohio|Alaska|AK|Louisiana|LA|Oklahoma|Arizona|AZ|Maine|Oregon|Arkansas|AR|Maryland|MD|Pennsylvania|PA|Massachusetts|MA|California|CA|Michigan|MI|Rhode Island|RI|Colorado|Minnesota|MN|South Carolina|SC|Connecticut|CT|Mississippi|MS|South Dakota|SD|Delaware|DE|Missouri|MO|Tennessee|TN|DC|Montana|MT|Texas|TX|Florida|FL|Nebraska|NE|Georgia|GA|Nevada|NV|Utah|UT|New Hampshire|NH|Vermont|VT|Hawaii|New Jersey|NJ|Virginia|VA|Idaho|New Mexico|NM|Illinois|IL|New York|NY|Washington|WA|Indiana|North Carolina|NC|West Virginia|WV|Iowa|IA|North Dakota|ND|Wisconsin|WI|Kansas|KS|Wyoming|WY)(?: |\\.)' and c.user is null) |
74
|
|
|
) |
75
|
|
|
-- only find comments which haven't previously been flagged |
76
|
|
|
and not exists (select 1 from log l where l.objectid = c.id and action = 'UnflaggedComment') |
77
|
|
|
-- only comments on closed requests (give humans a chance to flag these) |
78
|
|
|
and exists (select 1 from request r where r.id = c.request and r.status = 'Closed') |
79
|
|
|
and c.flagged <> 1 |
80
|
|
|
-- not all edited comments have log entries (yay historical reasons!) |
81
|
|
|
and c.comment not like '%[redacted]%' |
82
|
|
|
; |
83
|
|
|
SQL |
84
|
|
|
); |
85
|
|
|
|
86
|
|
|
$success = $query->execute(); |
87
|
|
|
|
88
|
|
|
if (!$success) { |
89
|
|
|
throw new Exception('Error in transaction: Could not load data.'); |
90
|
|
|
} |
91
|
|
|
|
92
|
|
|
$data = $query->fetchAll(PDO::FETCH_ASSOC); |
93
|
|
|
foreach ($data as $row) { |
94
|
|
|
/** @var Comment $dataObject */ |
95
|
|
|
$dataObject = Comment::getById($row['id'], $database); |
96
|
|
|
|
97
|
|
|
Logger::flaggedComment($database, $dataObject, $row['domain']); |
98
|
|
|
$dataObject->setFlagged(true); |
99
|
|
|
$dataObject->save(); |
100
|
|
|
} |
101
|
|
|
} |
102
|
|
|
} |