1 | #!/usr/bin/env php |
||||
2 | |||||
0 ignored issues
–
show
Coding Style
introduced
by
![]() |
|||||
3 | <?php |
||||
0 ignored issues
–
show
|
|||||
4 | // This file is part of BOINC. |
||||
5 | // http://boinc.berkeley.edu |
||||
6 | // Copyright (C) 2024 University of California |
||||
7 | // |
||||
8 | // BOINC is free software; you can redistribute it and/or modify it |
||||
9 | // under the terms of the GNU Lesser General Public License |
||||
10 | // as published by the Free Software Foundation, |
||||
11 | // either version 3 of the License, or (at your option) any later version. |
||||
12 | // |
||||
13 | // BOINC is distributed in the hope that it will be useful, |
||||
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
||||
16 | // See the GNU Lesser General Public License for more details. |
||||
17 | // |
||||
18 | // You should have received a copy of the GNU Lesser General Public License |
||||
19 | // along with BOINC. If not, see <http://www.gnu.org/licenses/>. |
||||
20 | // ----------------------------------------------- |
||||
21 | |||||
22 | // delete_spammers.php [--test] [--min_days n] [--max_days n] command |
||||
23 | // |
||||
24 | // script to delete spammer accounts, profiles, forum posts, and/or teams. |
||||
25 | // The various options delete different categories of spammers. |
||||
26 | // |
||||
27 | // USE WITH CARE. You don't want to delete legit accounts. |
||||
28 | // Run with --test and examine the results first. |
||||
29 | |||||
30 | // In this context, 'spam' is text that advertises something |
||||
31 | // (e.g. viagra, porn sites etc.) and contains hyperlinks. |
||||
32 | // Spammers can put such text in |
||||
33 | // - profiles |
||||
34 | // - team URLs or descriptions |
||||
35 | // - account URLs |
||||
36 | // - forum posts |
||||
37 | // |
||||
38 | // All the above can legitimately contain links, |
||||
39 | // so to decide what's spam we look at attributes of the user: |
||||
40 | // - whether they've attached a client to the project |
||||
41 | // Most spammers haven't, so they have no hosts. |
||||
42 | // Note: legit users might create an account just to participate |
||||
43 | // in the project forums (e.g. Science United users). |
||||
44 | // So we generally need to check for forum activity. |
||||
45 | // - whether they've been granted any credit |
||||
46 | // This is more stringent. |
||||
47 | // But we need to take into account that it may take a month or two |
||||
48 | // to get credit because of validation |
||||
49 | // |
||||
50 | // When we identify spam, we delete everything associated with that user: |
||||
51 | // - profile |
||||
52 | // - forum stuff: post, thread, subscriptions etc. |
||||
53 | // - private messages |
||||
54 | // - friend links |
||||
55 | // - badges |
||||
56 | |||||
57 | // options: |
||||
58 | // --min_days N |
||||
59 | // Only delete accounts created at least N days ago |
||||
60 | // --max_days N |
||||
61 | // Only delete accounts created at most N days ago |
||||
62 | // --test |
||||
63 | // Show what accounts would be deleted, but don't delete them |
||||
64 | // |
||||
65 | // commands: |
||||
66 | // |
||||
67 | // --profiles |
||||
68 | // delete accounts that |
||||
69 | // - have a profile containing a link. |
||||
70 | // - have no hosts |
||||
71 | // - have no message-board posts |
||||
72 | // |
||||
73 | // --user_url |
||||
74 | // delete accounts that |
||||
75 | // - have a nonempty URL |
||||
76 | // - have no hosts |
||||
77 | // - have no message-board posts |
||||
78 | // Use for spammers who create accounts with commercial URLs. |
||||
79 | // |
||||
80 | // --user_null |
||||
81 | // delete accounts that |
||||
82 | // - have no hosts |
||||
83 | // - have no message-board posts |
||||
84 | // - don't belong to a team |
||||
85 | // Spammers may create accounts and attempt to create a profile but fail. |
||||
86 | // This cleans up those accounts. |
||||
87 | // |
||||
88 | // --forums |
||||
89 | // delete accounts that |
||||
90 | // - have no hosts |
||||
91 | // - have message-board posts containing links or URLs |
||||
92 | // - don't belong to a team (avoid deleting BOINC-wide team owners) |
||||
93 | // Use this for spammers who create accounts and post spam. |
||||
94 | // Don't use this for non-computing projects (like BOINC message boards). |
||||
95 | // In fact, don't use this in general: |
||||
96 | // it will delete users who join to participate in forums. |
||||
97 | // |
||||
98 | // --profiles_strict |
||||
99 | // delete accounts that have a profile and no message-board posts. |
||||
100 | // For the BOINC message boards. |
||||
101 | // |
||||
102 | // --list filename |
||||
103 | // "filename" contains a list of user IDs, one per line. |
||||
104 | // |
||||
105 | // --id_range N M |
||||
106 | // delete users with ID N to M inclusive |
||||
107 | // |
||||
108 | // --teams |
||||
109 | // delete teams (and their owners and members) where the team |
||||
110 | // - has no total credit |
||||
111 | // - has description containing a link, or a URL |
||||
112 | // - is not a BOINC-Wide team |
||||
113 | // and the owner and members |
||||
114 | // - have no posts |
||||
115 | // - have no hosts |
||||
116 | // |
||||
117 | // --all (recommended for BOINC projects) |
||||
118 | // Does: --teams --user_url --profiles |
||||
119 | // Doesn't do --forums (see comments above). |
||||
120 | // Can use moderators for that. |
||||
121 | |||||
122 | error_reporting(E_ALL); |
||||
123 | ini_set('display_errors', true); |
||||
0 ignored issues
–
show
true of type true is incompatible with the type string expected by parameter $value of ini_set() .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
124 | ini_set('display_startup_errors', true); |
||||
125 | ini_set('memory_limit', '4G'); |
||||
126 | |||||
127 | require_once("../inc/db.inc"); |
||||
128 | require_once("../inc/profile.inc"); |
||||
129 | require_once("../inc/forum.inc"); |
||||
130 | require_once("../inc/user_util.inc"); |
||||
131 | db_init(); |
||||
132 | |||||
133 | $min_days = 0; |
||||
134 | $max_days = 0; |
||||
135 | $test = false; |
||||
136 | |||||
137 | // delete a spammer account, and everything associated with it |
||||
138 | // |
||||
139 | function do_delete_user($user) { |
||||
140 | global $test; |
||||
141 | $age = (time() - $user->create_time) / 86400; |
||||
142 | echo "deleting user\n"; |
||||
143 | echo " ID: $user->id\n"; |
||||
144 | echo " email: $user->email_addr\n"; |
||||
145 | echo " name: $user->name\n"; |
||||
146 | echo " URL: $user->url\n"; |
||||
147 | echo " age:$age days\n"; |
||||
148 | if ($test) { |
||||
149 | $n = count(BoincHost::enum("userid=$user->id")); |
||||
150 | $m = count(BoincPost::enum("user=$user->id")); |
||||
151 | echo " $n hosts\n"; |
||||
152 | echo " $m posts\n"; |
||||
153 | echo " (test mode - nothing deleted)\n"; |
||||
154 | return; |
||||
155 | } |
||||
156 | delete_user($user); |
||||
157 | } |
||||
158 | |||||
159 | function delete_list($fname) { |
||||
160 | $f = fopen($fname, "r"); |
||||
161 | if (!$f) die("no such file $fname\n"); |
||||
0 ignored issues
–
show
|
|||||
162 | while ($s = fgets($f)) { |
||||
163 | $s = trim($s); |
||||
164 | if (!is_numeric($s)) die("bad ID $s\n"); |
||||
0 ignored issues
–
show
|
|||||
165 | $user = BoincUser::lookup_id((int)$s); |
||||
166 | if ($user) { |
||||
167 | do_delete_user($user); |
||||
168 | } else { |
||||
169 | echo "no user ID $s\n"; |
||||
170 | } |
||||
171 | } |
||||
172 | } |
||||
173 | |||||
174 | function has_link($x) { |
||||
175 | if (strstr($x, "<a ")) return true; |
||||
176 | if (strstr($x, "[url")) return true; |
||||
177 | if (strstr($x, "http://")) return true; |
||||
178 | if (strstr($x, "https://")) return true; |
||||
179 | if (strstr($x, "www.")) return true; |
||||
180 | return false; |
||||
181 | } |
||||
182 | |||||
183 | // delete users with |
||||
184 | // - no hosts |
||||
185 | // - no team |
||||
186 | // - posts contain links and/or URLs |
||||
187 | // |
||||
188 | function delete_forums() { |
||||
189 | global $min_days, $max_days; |
||||
190 | |||||
191 | // if they've posted, they'll have forum prefs. |
||||
192 | // This is faster than enumerating all users |
||||
193 | // |
||||
194 | $prefs = BoincForumPrefs::enum("posts>0"); |
||||
195 | $n = 0; |
||||
196 | foreach ($prefs as $p) { |
||||
197 | $user = BoincUser::lookup_id($p->userid); |
||||
198 | if (!$user) { |
||||
199 | echo "missing user $p->userid\n"; |
||||
200 | continue; |
||||
201 | } |
||||
202 | if ($min_days) { |
||||
203 | if ($user->create_time > time() - $min_days*86400) continue; |
||||
204 | } |
||||
205 | if ($max_days) { |
||||
206 | if ($user->create_time < time() - $max_days*86400) continue; |
||||
207 | } |
||||
208 | if ($user->teamid) { |
||||
209 | continue; |
||||
210 | } |
||||
211 | $h = BoincHost::count("userid=$p->userid"); |
||||
212 | if ($h) continue; |
||||
213 | |||||
214 | $n = BoincPost::count("user=$user->id and (content like '%<a %' or content like '%[url%' or content like '%http://%' or content like '%https://%')"); |
||||
215 | if (!$n) continue; |
||||
216 | do_delete_user($user); |
||||
217 | $n++; |
||||
218 | } |
||||
219 | echo "deleted $n users\n"; |
||||
220 | } |
||||
221 | |||||
222 | function delete_profiles() { |
||||
223 | global $test, $min_days, $max_days; |
||||
224 | $profiles = BoincProfile::enum(""); |
||||
225 | $n = 0; |
||||
226 | foreach ($profiles as $p) { |
||||
227 | if (has_link($p->response1) || has_link($p->response2)) { |
||||
228 | $user = BoincUser::lookup_id($p->userid); |
||||
229 | if (!$user) { |
||||
230 | echo "profile has missing user: $p->userid\n"; |
||||
231 | continue; |
||||
232 | } |
||||
233 | |||||
234 | if ($min_days) { |
||||
235 | if ($user->create_time > time() - $min_days*86400) continue; |
||||
236 | } |
||||
237 | if ($max_days) { |
||||
238 | if ($user->create_time < time() - $max_days*86400) continue; |
||||
239 | } |
||||
240 | |||||
241 | $m = BoincHost::count("userid=$p->userid"); |
||||
242 | if ($m) continue; |
||||
243 | $m = BoincPost::count("user=$p->userid"); |
||||
244 | if ($m) continue; |
||||
245 | |||||
246 | do_delete_user($user); |
||||
247 | if ($test) { |
||||
248 | echo "\n$p->userid\n$p->response1\n$p->response2\n"; |
||||
249 | } |
||||
250 | $n++; |
||||
251 | } |
||||
252 | } |
||||
253 | echo "deleted $n users\n"; |
||||
254 | } |
||||
255 | |||||
256 | function delete_profiles_strict() { |
||||
257 | global $test; |
||||
258 | $profiles = BoincProfile::enum(""); |
||||
259 | foreach ($profiles as $p) { |
||||
260 | $user = BoincUser::lookup_id($p->userid); |
||||
261 | if (!$user) { |
||||
262 | echo "profile has missing user: $p->userid\n"; |
||||
263 | continue; |
||||
264 | } |
||||
265 | $n = BoincPost::count("user=$p->userid"); |
||||
266 | if ($n) continue; |
||||
267 | do_delete_user($user); |
||||
268 | if ($test) { |
||||
269 | echo "\n$p->userid\n$p->response1\n$p->response2\n"; |
||||
270 | } |
||||
271 | } |
||||
272 | } |
||||
273 | |||||
274 | function delete_users($no_hosts, $no_posts, $no_teams, $have_url) { |
||||
275 | global $test, $min_days, $max_days; |
||||
276 | $db = BoincDb::get(); |
||||
277 | $query = "select a.* from user a "; |
||||
278 | if ($no_hosts) { |
||||
279 | $query .= " left join host c on c.userid=a.id "; |
||||
280 | } |
||||
281 | if ($no_posts) { |
||||
282 | $query .= " left join post b on a.id=b.user "; |
||||
283 | } |
||||
284 | if ($no_teams) { |
||||
285 | $query .= " left join team d on a.id=d.userid "; |
||||
286 | } |
||||
287 | $query .= " where true "; |
||||
288 | if ($no_hosts) { |
||||
289 | $query .= " and c.userid is null "; |
||||
290 | } |
||||
291 | if ($no_posts) { |
||||
292 | $query .= " and b.user is null "; |
||||
293 | } |
||||
294 | if ($no_teams) { |
||||
295 | $query .= " and d.userid is null "; |
||||
296 | } |
||||
297 | if ($min_days) { |
||||
298 | $t = time() - $min_days*86400; |
||||
299 | $query .= " and a.create_time < $t "; |
||||
300 | } |
||||
301 | if ($max_days) { |
||||
302 | $t = time() - $max_days*86400; |
||||
303 | $query .= " and a.create_time > $t "; |
||||
304 | } |
||||
305 | |||||
306 | $result = $db->do_query($query); |
||||
307 | $n = 0; |
||||
308 | while ($u = $result->fetch_object()) { |
||||
309 | $user = BoincUser::lookup_id($u->id); |
||||
310 | if (!$user) { |
||||
311 | continue; |
||||
312 | } |
||||
313 | if ($have_url) { |
||||
314 | if (!strlen($user->url)) continue; |
||||
315 | } |
||||
316 | do_delete_user($user); |
||||
317 | $n++; |
||||
318 | } |
||||
319 | echo "deleted $n users\n"; |
||||
320 | } |
||||
321 | |||||
322 | function delete_banished() { |
||||
323 | global $min_days, $max_days; |
||||
324 | $fps = BoincForumPrefs::enum("banished_until>0"); |
||||
325 | foreach ($fps as $fp) { |
||||
326 | $user = BoincUser::lookup_id($fp->userid); |
||||
327 | if (!$user) continue; |
||||
328 | if ($user->create_time > time() - $min_days*86400) continue; |
||||
329 | if ($user->create_time < time() - $max_days*86400) continue; |
||||
330 | do_delete_user($user); |
||||
331 | } |
||||
332 | } |
||||
333 | |||||
334 | function delete_teams() { |
||||
335 | global $min_days, $max_days, $test; |
||||
336 | $query = "nusers < 2 and seti_id=0 and total_credit=0"; |
||||
337 | if ($min_days) { |
||||
338 | $x = time() - $min_days*86400; |
||||
339 | $query .= " and create_time < $x"; |
||||
340 | } |
||||
341 | if ($max_days) { |
||||
342 | $x = time() - $max_days*86400; |
||||
343 | $query .= " and create_time > $x"; |
||||
344 | } |
||||
345 | $teams = BoincTeam::enum($query); |
||||
346 | $count = 0; |
||||
347 | foreach ($teams as $team) { |
||||
348 | $founder = null; |
||||
349 | if ($team->userid) { |
||||
350 | $founder = BoincUser::lookup_id($team->userid); |
||||
351 | } |
||||
352 | |||||
353 | // delete teams with no founder |
||||
354 | if (!$founder) { |
||||
355 | delete_team($team, []); |
||||
356 | $count++; |
||||
357 | continue; |
||||
358 | } |
||||
359 | |||||
360 | $n = team_count_members($team->id); |
||||
361 | if ($n > 1) continue; |
||||
362 | if (!has_link($team->description) && !$team->url) continue; |
||||
363 | |||||
364 | // get list of team members |
||||
365 | // |
||||
366 | $users = BoincUser::enum("teamid = $team->id"); |
||||
367 | |||||
368 | // add team founder if not member |
||||
369 | // |
||||
370 | if ($founder->teamid != $team->id) { |
||||
371 | $users[] = $founder; |
||||
372 | } |
||||
373 | |||||
374 | // if any of these users has signs of life, skip team |
||||
375 | // |
||||
376 | $life = false; |
||||
377 | foreach ($users as $user) { |
||||
378 | if ($user->seti_nresults) { |
||||
379 | // for SETI@home |
||||
380 | $life = true; |
||||
381 | break; |
||||
382 | } |
||||
383 | $n = BoincPost::count("user=$user->id"); |
||||
384 | if ($n) { |
||||
385 | $life = true; |
||||
386 | break; |
||||
387 | } |
||||
388 | $n = BoincHost::count("userid=$user->id"); |
||||
389 | if ($n) { |
||||
390 | $life = true; |
||||
391 | break; |
||||
392 | } |
||||
393 | } |
||||
394 | if ($life) { |
||||
395 | continue; |
||||
396 | } |
||||
397 | |||||
398 | $count++; |
||||
399 | delete_team($team, $users); |
||||
400 | } |
||||
401 | echo "deleted $count teams\n"; |
||||
402 | } |
||||
403 | |||||
404 | function delete_team($team, $users) { |
||||
405 | global $test; |
||||
406 | if ($test) { |
||||
407 | echo "would delete team:\n"; |
||||
408 | echo " ID: $team->id\n"; |
||||
409 | echo " name: $team->name\n"; |
||||
410 | echo " description: $team->description\n"; |
||||
411 | echo " URL: $team->url\n"; |
||||
412 | foreach ($users as $user) { |
||||
413 | echo "would delete user $user->id: $user->email_addr:\n"; |
||||
414 | } |
||||
415 | } else { |
||||
416 | $team->delete(); |
||||
417 | echo "deleted team ID $team->id name $team->name\n"; |
||||
418 | foreach ($users as $user) { |
||||
419 | do_delete_user($user); |
||||
420 | } |
||||
421 | } |
||||
422 | } |
||||
423 | |||||
424 | function delete_user_id($id) { |
||||
425 | $user = BoincUser::lookup_id($id); |
||||
426 | if ($user) { |
||||
427 | echo "deleting user $id\n"; |
||||
428 | do_delete_user($user); |
||||
429 | } else { |
||||
430 | echo "no such user\n"; |
||||
431 | } |
||||
432 | } |
||||
433 | |||||
434 | function delete_user_id_range($id1, $id2) { |
||||
435 | for ($i=$id1; $i <= $id2; $i++) { |
||||
436 | $user = BoincUser::lookup_id($i); |
||||
437 | if ($user) { |
||||
438 | echo "deleting user $i\n"; |
||||
439 | do_delete_user($user); |
||||
440 | } |
||||
441 | } |
||||
442 | } |
||||
443 | |||||
444 | // this is for cleaning up BOINC-wide teams |
||||
445 | // |
||||
446 | function delete_team_id_range($id1, $id2) { |
||||
447 | for ($i=$id1; $i <= $id2; $i++) { |
||||
448 | echo "deleting team $i\n"; |
||||
449 | $team = BoincTeam::lookup_id($i); |
||||
450 | if ($team) { |
||||
451 | $team->delete(); |
||||
452 | $user = BoincUser::lookup_id($team->userid); |
||||
453 | if ($user) $user->delete(); |
||||
454 | } |
||||
455 | } |
||||
456 | } |
||||
457 | |||||
458 | echo "Starting: ".date(DATE_RFC2822)."\n"; |
||||
459 | |||||
460 | // get settings first |
||||
461 | // |
||||
462 | for ($i=1; $i<$argc; $i++) { |
||||
463 | if ($argv[$i] == "--test") { |
||||
464 | $test = true; |
||||
465 | } else if ($argv[$i] == "--min_days") { |
||||
466 | $min_days = $argv[++$i]; |
||||
467 | } else if ($argv[$i] == "--max_days") { |
||||
468 | $max_days = $argv[++$i]; |
||||
469 | } else if ($argv[$i] == "--days") { // deprecated |
||||
470 | $max_days = $argv[++$i]; |
||||
471 | } |
||||
472 | } |
||||
473 | |||||
474 | // then do actions |
||||
475 | // |
||||
476 | for ($i=1; $i<$argc; $i++) { |
||||
477 | if ($argv[$i] == "--list") { |
||||
478 | delete_list($argv[++$i]); |
||||
479 | } else if ($argv[$i] == "--profiles") { |
||||
480 | delete_profiles(); |
||||
481 | } else if ($argv[$i] == "--profiles_strict") { |
||||
482 | delete_profiles_strict(); |
||||
483 | } else if ($argv[$i] == "--forums") { |
||||
484 | delete_forums(); |
||||
485 | } else if ($argv[$i] == "--id_range") { |
||||
486 | $id1 = $argv[++$i]; |
||||
487 | $id2 = $argv[++$i]; |
||||
488 | if (!is_numeric($id1) || !is_numeric($id2)) { |
||||
489 | die ("bad args\n"); |
||||
490 | } |
||||
491 | if ($id2 < $id1) { |
||||
492 | die("bad args\n"); |
||||
493 | } |
||||
494 | delete_user_id_range($id1, $id2); |
||||
495 | } else if ($argv[$i] == "--id") { |
||||
496 | $id = $argv[++$i]; |
||||
497 | if (!is_numeric($id)) { |
||||
498 | die ("bad arg\n"); |
||||
499 | } |
||||
500 | delete_user_id($id); |
||||
501 | } else if ($argv[$i] == "--team_id_range") { |
||||
502 | $id1 = $argv[++$i]; |
||||
503 | $id2 = $argv[++$i]; |
||||
504 | if (!is_numeric($id1) || !is_numeric($id2)) { |
||||
505 | die ("bad args\n"); |
||||
506 | } |
||||
507 | if ($id2 < $id1) { |
||||
508 | die("bad args\n"); |
||||
509 | } |
||||
510 | delete_team_id_range($id1, $id2); |
||||
511 | } else if ($argv[$i] == "--banished") { |
||||
512 | delete_banished(); |
||||
513 | } else if ($argv[$i] == "--teams") { |
||||
514 | delete_teams(); |
||||
515 | } else if ($argv[$i] == "--user_url") { |
||||
516 | delete_users(true, true, false, true); |
||||
517 | } else if ($argv[$i] == "--user_null") { |
||||
518 | delete_users(true, true, true, false); |
||||
519 | } else if ($argv[$i] == "--all") { |
||||
520 | delete_profiles(); |
||||
521 | delete_teams(); |
||||
522 | delete_users(true, true, false, true); |
||||
523 | } |
||||
524 | } |
||||
525 | echo "Finished: ".date(DATE_RFC2822)."\n"; |
||||
526 | |||||
527 | ?> |
||||
528 |