|
1
|
|
|
<?php |
|
2
|
|
|
/****************************************************************************** |
|
3
|
|
|
* Wikipedia Account Creation Assistance tool * |
|
4
|
|
|
* ACC Development Team. Please see team.json for a list of contributors. * |
|
5
|
|
|
* * |
|
6
|
|
|
* This is free and unencumbered software released into the public domain. * |
|
7
|
|
|
* Please see LICENSE.md for the full licencing statement. * |
|
8
|
|
|
******************************************************************************/ |
|
9
|
|
|
|
|
10
|
|
|
namespace Waca\ConsoleTasks; |
|
11
|
|
|
|
|
12
|
|
|
use Exception; |
|
13
|
|
|
use PDO; |
|
14
|
|
|
use Waca\DataObjects\Comment; |
|
15
|
|
|
use Waca\Helpers\Logger; |
|
16
|
|
|
use Waca\Tasks\ConsoleTaskBase; |
|
17
|
|
|
|
|
18
|
|
|
class AutoFlagCommentsTask extends ConsoleTaskBase |
|
19
|
|
|
{ |
|
20
|
|
|
public function execute() |
|
21
|
|
|
{ |
|
22
|
|
|
$database = $this->getDatabase(); |
|
23
|
|
|
|
|
24
|
|
|
$query = $database->prepare(<<<'SQL' |
|
25
|
|
|
select c.id, r.domain |
|
26
|
|
|
from comment c |
|
27
|
|
|
inner join request r on r.id = c.request |
|
28
|
|
|
where ( |
|
29
|
|
|
1 = 0 |
|
30
|
|
|
/* emails */ |
|
31
|
|
|
or c.comment rlike '[^ @]+(?<!accounts-enwiki-l|unblock|functionaries-en|checkuser-l|info-en|enwiki-acc-admins|/|\\()@(?!lists.wikimedia.org|wikimedia.org|wikipedia.org|[a-z][a-z]wiki)[a-z\\.]+' |
|
32
|
|
|
-- or c.comment rlike 'gmail|yahoo' -- to many FPs |
|
33
|
|
|
-- ipv4 |
|
34
|
|
|
OR c.comment rlike '[0-2]?[0-9]?[0-9]\\.[0-2]?[0-9]?[0-9]\\.[0-2]?[0-9]?[0-9]\\.[0-2]?[0-9]?[0-9]' |
|
35
|
|
|
-- ipv6 |
|
36
|
|
|
OR (lower(c.comment) rlike '[0-9a-f]{1,4}:[0-9a-f]{1,4}:[0-9a-f]{1,4}' and c.comment not rlike '[0-2]?[0-9]:[0-5][0-9]:[0-5][0-9]') |
|
37
|
|
|
-- card pan |
|
38
|
|
|
OR c.comment rlike '[0-9]{4} [0-9]{4} [0-9]{4} [0-9]{4}' |
|
39
|
|
|
OR c.comment rlike '(?<!ticket|ticket#|OTRS|OTRS #) \\+?(?!20[0-2][0-9][01][0-9][0-3][0-9]100[0-9]{5})[0-9]{9,}' |
|
40
|
|
|
-- phone numbers |
|
41
|
|
|
OR c.comment like '%mobile no%' |
|
42
|
|
|
OR c.comment like '%contact no%' |
|
43
|
|
|
OR c.comment like '%phone no%' |
|
44
|
|
|
OR c.comment like '%cell no%' |
|
45
|
|
|
OR c.comment rlike '\\+[0-9]{1}[0-9 .-]{5}' |
|
46
|
|
|
OR c.comment rlike '(?:phone(?: )?:|mobile(?: )?:|cell(?: )?:)[ 0-9+]' |
|
47
|
|
|
OR c.comment rlike '(^|\\s)(contact|phone|cell|mobile)( no| number| nbr)?( is)? ?:? ?[0-9+][0-9]+' |
|
48
|
|
|
OR c.comment rlike '[0-9]{3,} ?(ext|x)\\.? ?[0-9]{3,}' |
|
49
|
|
|
-- OR c.comment like '%telephone%' -- too many FP |
|
50
|
|
|
|
|
51
|
|
|
-- requested passwords |
|
52
|
|
|
OR c.comment like '%my password to be %' |
|
53
|
|
|
OR c.comment like '% password be %' |
|
54
|
|
|
OR c.comment rlike '(my )password (to |should )?(be|as)(?! soon| quickly|ap|\\?)' |
|
55
|
|
|
OR c.comment rlike '(as )(my )?password(?! reset)' |
|
56
|
|
|
OR c.comment rlike 'password(?: )?:' |
|
57
|
|
|
|
|
58
|
|
|
-- holy FP craziness, but full of matches. |
|
59
|
|
|
-- OR (c.comment rlike 'password' and c.user is null) |
|
60
|
|
|
|
|
61
|
|
|
-- banking |
|
62
|
|
|
OR c.comment rlike ' (a/c|acct) (no|number|nbr)( |\\.)' |
|
63
|
|
|
-- OR c.comment rlike '(?<!requested|conflicting|similar) acct' |
|
64
|
|
|
|
|
65
|
|
|
-- OR c.comment rlike ' card ' -- too many FP |
|
66
|
|
|
-- OR c.comment like '% bank %' -- too many FP |
|
67
|
|
|
|
|
68
|
|
|
-- all of these have too many FPs |
|
69
|
|
|
-- or c.comment rlike '(?<!ip )(?<!email )(?<!e-mail )(?<!this )address(?!ed)' |
|
70
|
|
|
-- OR c.comment rlike ' (ave|st(?!\\w)|road|rd(?!\\w))' |
|
71
|
|
|
-- or c.comment rlike ' (road|street|avenue) ' |
|
72
|
|
|
-- or (c.comment rlike '(^|\\s)[0-9]{5,}\\s' and c.user is null) |
|
73
|
|
|
-- or (c.comment rlike ' (?:Alabama|AL|Kentucky|KY|Ohio|Alaska|AK|Louisiana|LA|Oklahoma|Arizona|AZ|Maine|Oregon|Arkansas|AR|Maryland|MD|Pennsylvania|PA|Massachusetts|MA|California|CA|Michigan|MI|Rhode Island|RI|Colorado|Minnesota|MN|South Carolina|SC|Connecticut|CT|Mississippi|MS|South Dakota|SD|Delaware|DE|Missouri|MO|Tennessee|TN|DC|Montana|MT|Texas|TX|Florida|FL|Nebraska|NE|Georgia|GA|Nevada|NV|Utah|UT|New Hampshire|NH|Vermont|VT|Hawaii|New Jersey|NJ|Virginia|VA|Idaho|New Mexico|NM|Illinois|IL|New York|NY|Washington|WA|Indiana|North Carolina|NC|West Virginia|WV|Iowa|IA|North Dakota|ND|Wisconsin|WI|Kansas|KS|Wyoming|WY)(?: |\\.)' and c.user is null) |
|
74
|
|
|
) |
|
75
|
|
|
-- only find comments which haven't previously been flagged |
|
76
|
|
|
and not exists (select 1 from log l where l.objectid = c.id and action = 'UnflaggedComment') |
|
77
|
|
|
-- only comments on closed requests (give humans a chance to flag these) |
|
78
|
|
|
and exists (select 1 from request r where r.id = c.request and r.status = 'Closed') |
|
79
|
|
|
and c.flagged <> 1 |
|
80
|
|
|
-- not all edited comments have log entries (yay historical reasons!) |
|
81
|
|
|
and c.comment not like '%[redacted]%' |
|
82
|
|
|
; |
|
83
|
|
|
SQL |
|
84
|
|
|
); |
|
85
|
|
|
|
|
86
|
|
|
$success = $query->execute(); |
|
87
|
|
|
|
|
88
|
|
|
if (!$success) { |
|
89
|
|
|
throw new Exception('Error in transaction: Could not load data.'); |
|
90
|
|
|
} |
|
91
|
|
|
|
|
92
|
|
|
$data = $query->fetchAll(PDO::FETCH_ASSOC); |
|
93
|
|
|
foreach ($data as $row) { |
|
94
|
|
|
/** @var Comment $dataObject */ |
|
95
|
|
|
$dataObject = Comment::getById($row['id'], $database); |
|
96
|
|
|
|
|
97
|
|
|
Logger::flaggedComment($database, $dataObject, $row['domain']); |
|
98
|
|
|
$dataObject->setFlagged(true); |
|
99
|
|
|
$dataObject->save(); |
|
100
|
|
|
} |
|
101
|
|
|
} |
|
102
|
|
|
} |