Issues (1963)

html/ops/delete_spammers.php (6 issues)

1
#!/usr/bin/env php
2
0 ignored issues
show
PHP files must only contain PHP code
Loading history...
3
<?php
0 ignored issues
show
The opening PHP tag must be the first content in the file
Loading history...
4
// This file is part of BOINC.
5
// http://boinc.berkeley.edu
6
// Copyright (C) 2024 University of California
7
//
8
// BOINC is free software; you can redistribute it and/or modify it
9
// under the terms of the GNU Lesser General Public License
10
// as published by the Free Software Foundation,
11
// either version 3 of the License, or (at your option) any later version.
12
//
13
// BOINC is distributed in the hope that it will be useful,
14
// but WITHOUT ANY WARRANTY; without even the implied warranty of
15
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16
// See the GNU Lesser General Public License for more details.
17
//
18
// You should have received a copy of the GNU Lesser General Public License
19
// along with BOINC.  If not, see <http://www.gnu.org/licenses/>.
20
// -----------------------------------------------
21
22
// delete_spammers.php [--test] [--min_days n] [--max_days n] command
23
//
24
// script to delete spammer accounts, profiles, forum posts, and/or teams.
25
// The various options delete different categories of spammers.
26
//
27
// USE WITH CARE.  You don't want to delete legit accounts.
28
// Run with --test and examine the results first.
29
30
// In this context, 'spam' is text that advertises something
31
// (e.g. viagra, porn sites etc.) and contains hyperlinks.
32
// Spammers can put such text in
33
// - profiles
34
// - team URLs or descriptions
35
// - account URLs
36
// - forum posts
37
//
38
// All the above can legitimately contain links,
39
// so to decide what's spam we look at attributes of the user:
40
// - whether they've attached a client to the project
41
//      Most spammers haven't, so they have no hosts.
42
//      Note: legit users might create an account just to participate
43
//      in the project forums (e.g. Science United users).
44
//      So we generally need to check for forum activity.
45
// - whether they've been granted any credit
46
//      This is more stringent.
47
//      But we need to take into account that it may take a month or two
48
//      to get credit because of validation
49
//
50
// When we identify spam, we delete everything associated with that user:
51
// - profile
52
// - forum stuff: post, thread, subscriptions etc.
53
// - private messages
54
// - friend links
55
// - badges
56
57
// options:
58
// --min_days N
59
//    Only delete accounts created at least N days ago
60
// --max_days N
61
//    Only delete accounts created at most N days ago
62
// --test
63
//    Show what accounts would be deleted, but don't delete them
64
//
65
// commands:
66
//
67
// --profiles
68
//   delete accounts that
69
//   - have a profile containing a link.
70
//   - have no hosts
71
//   - have no message-board posts
72
//
73
// --user_url
74
//   delete accounts that
75
//   - have a nonempty URL
76
//   - have no hosts
77
//   - have no message-board posts
78
//   Use for spammers who create accounts with commercial URLs.
79
//
80
// --user_null
81
//   delete accounts that
82
//   - have no hosts
83
//   - have no message-board posts
84
//   - don't belong to a team
85
//   Spammers may create accounts and attempt to create a profile but fail.
86
//   This cleans up those accounts.
87
//
88
// --forums
89
//   delete accounts that
90
//   - have no hosts
91
//   - have message-board posts containing links or URLs
92
//   - don't belong to a team (avoid deleting BOINC-wide team owners)
93
//   Use this for spammers who create accounts and post spam.
94
//   Don't use this for non-computing projects (like BOINC message boards).
95
//   In fact, don't use this in general:
96
//   it will delete users who join to participate in forums.
97
//
98
// --profiles_strict
99
//  delete accounts that have a profile and no message-board posts.
100
//  For the BOINC message boards.
101
//
102
// --list filename
103
//   "filename" contains a list of user IDs, one per line.
104
//
105
// --id_range N M
106
//   delete users with ID N to M inclusive
107
//
108
// --teams
109
//   delete teams (and their owners and members) where the team
110
//   - has no total credit
111
//   - has description containing a link, or a URL
112
//   - is not a BOINC-Wide team
113
//   and the owner and members
114
//   - have no posts
115
//   - have no hosts
116
//
117
// --all (recommended for BOINC projects)
118
//   Does: --teams --user_url --profiles
119
//   Doesn't do --forums (see comments above).
120
//   Can use moderators for that.
121
122
error_reporting(E_ALL);
123
ini_set('display_errors', true);
0 ignored issues
show
true of type true is incompatible with the type string expected by parameter $value of ini_set(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

123
ini_set('display_errors', /** @scrutinizer ignore-type */ true);
Loading history...
124
ini_set('display_startup_errors', true);
125
ini_set('memory_limit', '4G');
126
127
require_once("../inc/db.inc");
128
require_once("../inc/profile.inc");
129
require_once("../inc/forum.inc");
130
require_once("../inc/user_util.inc");
131
db_init();
132
133
$min_days = 0;
134
$max_days = 0;
135
$test = false;
136
137
// delete a spammer account, and everything associated with it
138
//
139
function do_delete_user($user) {
140
    global $test;
141
    $age = (time() - $user->create_time) / 86400;
142
    echo "deleting user\n";
143
    echo "   ID: $user->id\n";
144
    echo "   email: $user->email_addr\n";
145
    echo "   name: $user->name\n";
146
    echo "   URL: $user->url\n";
147
    echo "   age:$age days\n";
148
    if ($test) {
149
        $n = count(BoincHost::enum("userid=$user->id"));
150
        $m = count(BoincPost::enum("user=$user->id"));
151
        echo "   $n hosts\n";
152
        echo "   $m posts\n";
153
        echo "   (test mode - nothing deleted)\n";
154
        return;
155
    }
156
    delete_user($user);
157
}
158
159
function delete_list($fname) {
160
    $f = fopen($fname, "r");
161
    if (!$f) die("no such file $fname\n");
0 ignored issues
show
Using exit here is not recommended.

In general, usage of exit should be done with care and only when running in a scripting context like a CLI script.

Loading history...
$f is of type resource, thus it always evaluated to false.
Loading history...
162
    while ($s = fgets($f)) {
163
        $s = trim($s);
164
        if (!is_numeric($s)) die("bad ID $s\n");
0 ignored issues
show
Using exit here is not recommended.

In general, usage of exit should be done with care and only when running in a scripting context like a CLI script.

Loading history...
165
        $user = BoincUser::lookup_id((int)$s);
166
        if ($user) {
167
            do_delete_user($user);
168
        } else {
169
            echo "no user ID $s\n";
170
        }
171
    }
172
}
173
174
function has_link($x) {
175
    if (strstr($x, "<a ")) return true;
176
    if (strstr($x, "[url")) return true;
177
    if (strstr($x, "http://")) return true;
178
    if (strstr($x, "https://")) return true;
179
    if (strstr($x, "www.")) return true;
180
    return false;
181
}
182
183
// delete users with
184
// - no hosts
185
// - no team
186
// - posts contain links and/or URLs
187
//
188
function delete_forums() {
189
    global $min_days, $max_days;
190
191
    // if they've posted, they'll have forum prefs.
192
    // This is faster than enumerating all users
193
    //
194
    $prefs = BoincForumPrefs::enum("posts>0");
195
    $n = 0;
196
    foreach ($prefs as $p) {
197
        $user = BoincUser::lookup_id($p->userid);
198
        if (!$user) {
199
            echo "missing user $p->userid\n";
200
            continue;
201
        }
202
        if ($min_days) {
203
            if ($user->create_time > time() - $min_days*86400) continue;
204
        }
205
        if ($max_days) {
206
            if ($user->create_time < time() - $max_days*86400) continue;
207
        }
208
        if ($user->teamid) {
209
            continue;
210
        }
211
        $h = BoincHost::count("userid=$p->userid");
212
        if ($h) continue;
213
214
        $n = BoincPost::count("user=$user->id and (content like '%<a %' or content like '%[url%' or content like '%http://%' or content like '%https://%')");
215
        if (!$n) continue;
216
        do_delete_user($user);
217
        $n++;
218
    }
219
    echo "deleted $n users\n";
220
}
221
222
function delete_profiles() {
223
    global $test, $min_days, $max_days;
224
    $profiles = BoincProfile::enum("");
225
    $n = 0;
226
    foreach ($profiles as $p) {
227
        if (has_link($p->response1) || has_link($p->response2)) {
228
            $user = BoincUser::lookup_id($p->userid);
229
            if (!$user) {
230
                echo "profile has missing user: $p->userid\n";
231
                continue;
232
            }
233
234
            if ($min_days) {
235
                if ($user->create_time > time() - $min_days*86400) continue;
236
            }
237
            if ($max_days) {
238
                if ($user->create_time < time() - $max_days*86400) continue;
239
            }
240
241
            $m = BoincHost::count("userid=$p->userid");
242
            if ($m) continue;
243
            $m = BoincPost::count("user=$p->userid");
244
            if ($m) continue;
245
246
            do_delete_user($user);
247
            if ($test) {
248
                echo "\n$p->userid\n$p->response1\n$p->response2\n";
249
            }
250
            $n++;
251
        }
252
    }
253
    echo "deleted $n users\n";
254
}
255
256
function delete_profiles_strict() {
257
    global $test;
258
    $profiles = BoincProfile::enum("");
259
    foreach ($profiles as $p) {
260
        $user = BoincUser::lookup_id($p->userid);
261
        if (!$user) {
262
            echo "profile has missing user: $p->userid\n";
263
            continue;
264
        }
265
        $n = BoincPost::count("user=$p->userid");
266
        if ($n) continue;
267
        do_delete_user($user);
268
        if ($test) {
269
            echo "\n$p->userid\n$p->response1\n$p->response2\n";
270
        }
271
    }
272
}
273
274
function delete_users($no_hosts, $no_posts, $no_teams, $have_url) {
275
    global $test, $min_days, $max_days;
276
    $db = BoincDb::get();
277
    $query = "select a.* from user a ";
278
    if ($no_hosts) {
279
        $query .= " left join host c on c.userid=a.id ";
280
    }
281
    if ($no_posts) {
282
        $query .= " left join post b on a.id=b.user ";
283
    }
284
    if ($no_teams) {
285
        $query .= " left join team d on a.id=d.userid ";
286
    }
287
    $query .= " where true ";
288
    if ($no_hosts) {
289
        $query .= " and c.userid is null ";
290
    }
291
    if ($no_posts) {
292
        $query .= " and b.user is null ";
293
    }
294
    if ($no_teams) {
295
        $query .= " and d.userid is null ";
296
    }
297
    if ($min_days) {
298
        $t = time() - $min_days*86400;
299
        $query .= " and a.create_time < $t ";
300
    }
301
    if ($max_days) {
302
        $t = time() - $max_days*86400;
303
        $query .= " and a.create_time > $t ";
304
    }
305
306
    $result = $db->do_query($query);
307
    $n = 0;
308
    while ($u = $result->fetch_object()) {
309
        $user = BoincUser::lookup_id($u->id);
310
        if (!$user) {
311
            continue;
312
        }
313
        if ($have_url) {
314
            if (!strlen($user->url)) continue;
315
        }
316
        do_delete_user($user);
317
        $n++;
318
    }
319
    echo "deleted $n users\n";
320
}
321
322
function delete_banished() {
323
    global $min_days, $max_days;
324
    $fps = BoincForumPrefs::enum("banished_until>0");
325
    foreach ($fps as $fp) {
326
        $user = BoincUser::lookup_id($fp->userid);
327
        if (!$user) continue;
328
        if ($user->create_time > time() - $min_days*86400) continue;
329
        if ($user->create_time < time() - $max_days*86400) continue;
330
        do_delete_user($user);
331
    }
332
}
333
334
function delete_teams() {
335
    global $min_days, $max_days, $test;
336
    $query = "nusers < 2 and seti_id=0 and total_credit=0";
337
    if ($min_days) {
338
        $x = time() - $min_days*86400;
339
        $query .= " and create_time < $x";
340
    }
341
    if ($max_days) {
342
        $x = time() - $max_days*86400;
343
        $query .= " and create_time > $x";
344
    }
345
    $teams = BoincTeam::enum($query);
346
    $count = 0;
347
    foreach ($teams as $team) {
348
        $founder = null;
349
        if ($team->userid) {
350
            $founder = BoincUser::lookup_id($team->userid);
351
        }
352
353
        // delete teams with no founder
354
        if (!$founder) {
355
            delete_team($team, []);
356
            $count++;
357
            continue;
358
        }
359
360
        $n = team_count_members($team->id);
361
        if ($n > 1) continue;
362
        if (!has_link($team->description) && !$team->url) continue;
363
364
        // get list of team members
365
        //
366
        $users = BoincUser::enum("teamid = $team->id");
367
368
        // add team founder if not member
369
        //
370
        if ($founder->teamid != $team->id) {
371
            $users[] = $founder;
372
        }
373
374
        // if any of these users has signs of life, skip team
375
        //
376
        $life = false;
377
        foreach ($users as $user) {
378
            if ($user->seti_nresults) {
379
                // for SETI@home
380
                $life = true;
381
                break;
382
            }
383
            $n = BoincPost::count("user=$user->id");
384
            if ($n) {
385
                $life = true;
386
                break;
387
            }
388
            $n = BoincHost::count("userid=$user->id");
389
            if ($n) {
390
                $life = true;
391
                break;
392
            }
393
        }
394
        if ($life) {
395
            continue;
396
        }
397
398
        $count++;
399
        delete_team($team, $users);
400
    }
401
    echo "deleted $count teams\n";
402
}
403
404
function delete_team($team, $users) {
405
    global $test;
406
    if ($test) {
407
        echo "would delete team:\n";
408
        echo "   ID: $team->id\n";
409
        echo "   name: $team->name\n";
410
        echo "   description: $team->description\n";
411
        echo "   URL: $team->url\n";
412
        foreach ($users as $user) {
413
            echo "would delete user $user->id: $user->email_addr:\n";
414
        }
415
    } else {
416
        $team->delete();
417
        echo "deleted team ID $team->id name $team->name\n";
418
        foreach ($users as $user) {
419
            do_delete_user($user);
420
        }
421
    }
422
}
423
424
function delete_user_id($id) {
425
    $user = BoincUser::lookup_id($id);
426
    if ($user) {
427
        echo "deleting user $id\n";
428
        do_delete_user($user);
429
    } else {
430
        echo "no such user\n";
431
    }
432
}
433
434
function delete_user_id_range($id1, $id2) {
435
    for ($i=$id1; $i <= $id2; $i++) {
436
        $user = BoincUser::lookup_id($i);
437
        if ($user) {
438
            echo "deleting user $i\n";
439
            do_delete_user($user);
440
        }
441
    }
442
}
443
444
// this is for cleaning up BOINC-wide teams
445
//
446
function delete_team_id_range($id1, $id2) {
447
    for ($i=$id1; $i <= $id2; $i++) {
448
        echo "deleting team $i\n";
449
        $team = BoincTeam::lookup_id($i);
450
        if ($team) {
451
            $team->delete();
452
            $user = BoincUser::lookup_id($team->userid);
453
            if ($user) $user->delete();
454
        }
455
    }
456
}
457
458
echo "Starting: ".date(DATE_RFC2822)."\n";
459
460
// get settings first
461
//
462
for ($i=1; $i<$argc; $i++) {
463
    if ($argv[$i] == "--test") {
464
        $test = true;
465
    } else if ($argv[$i] == "--min_days") {
466
        $min_days = $argv[++$i];
467
    } else if ($argv[$i] == "--max_days") {
468
        $max_days = $argv[++$i];
469
    } else if ($argv[$i] == "--days") {     // deprecated
470
        $max_days = $argv[++$i];
471
    }
472
}
473
474
// then do actions
475
//
476
for ($i=1; $i<$argc; $i++) {
477
    if ($argv[$i] == "--list") {
478
        delete_list($argv[++$i]);
479
    } else if ($argv[$i] == "--profiles") {
480
        delete_profiles();
481
    } else if ($argv[$i] == "--profiles_strict") {
482
        delete_profiles_strict();
483
    } else if ($argv[$i] == "--forums") {
484
        delete_forums();
485
    } else if ($argv[$i] == "--id_range") {
486
        $id1 = $argv[++$i];
487
        $id2 = $argv[++$i];
488
        if (!is_numeric($id1) || !is_numeric($id2)) {
489
            die ("bad args\n");
490
        }
491
        if ($id2 < $id1) {
492
            die("bad args\n");
493
        }
494
        delete_user_id_range($id1, $id2);
495
    } else if ($argv[$i] == "--id") {
496
        $id = $argv[++$i];
497
        if (!is_numeric($id)) {
498
            die ("bad arg\n");
499
        }
500
        delete_user_id($id);
501
    } else if ($argv[$i] == "--team_id_range") {
502
        $id1 = $argv[++$i];
503
        $id2 = $argv[++$i];
504
        if (!is_numeric($id1) || !is_numeric($id2)) {
505
            die ("bad args\n");
506
        }
507
        if ($id2 < $id1) {
508
            die("bad args\n");
509
        }
510
        delete_team_id_range($id1, $id2);
511
    } else if ($argv[$i] == "--banished") {
512
        delete_banished();
513
    } else if ($argv[$i] == "--teams") {
514
        delete_teams();
515
    } else if ($argv[$i] == "--user_url") {
516
        delete_users(true, true, false, true);
517
    } else if ($argv[$i] == "--user_null") {
518
        delete_users(true, true, true, false);
519
    } else if ($argv[$i] == "--all") {
520
        delete_profiles();
521
        delete_teams();
522
        delete_users(true, true, false, true);
523
    }
524
}
525
echo "Finished: ".date(DATE_RFC2822)."\n";
526
527
?>
528