1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace detox\dataset; |
4
|
|
|
|
5
|
|
|
|
6
|
|
|
class EnglishSet implements SetContract |
7
|
|
|
{ |
8
|
|
|
|
9
|
|
|
private $words = [ |
10
|
|
|
'1.0' => [ |
11
|
|
|
// simple bad words |
12
|
|
|
'fuck', |
13
|
|
|
'slut', |
14
|
|
|
'dick', |
15
|
|
|
'faggot', |
16
|
|
|
'bitch', |
17
|
|
|
'douchebag', |
18
|
|
|
'dickhead', |
19
|
|
|
'jerk', |
20
|
|
|
'cunt', |
21
|
|
|
'shit', |
22
|
|
|
'piss', |
23
|
|
|
'crap', |
24
|
|
|
'cock', |
25
|
|
|
'twat', |
26
|
|
|
'arse', |
27
|
|
|
'arsehole', |
28
|
|
|
'tosser', |
29
|
|
|
'wanker', |
30
|
|
|
'bastard', |
31
|
|
|
'honkey', |
32
|
|
|
'nigger', |
33
|
|
|
'flikker', |
34
|
|
|
'scumbag', |
35
|
|
|
// porn industry |
36
|
|
|
'bukakke', |
37
|
|
|
'dildo', |
38
|
|
|
'strapon', |
39
|
|
|
'shag', |
40
|
|
|
'sex', |
41
|
|
|
'blowjob', |
42
|
|
|
'bdsm', |
43
|
|
|
'bbd', |
44
|
|
|
'milf', |
45
|
|
|
'anal', |
46
|
|
|
'vagina', |
47
|
|
|
], |
48
|
|
|
'0.9' => ['ugly', 'stupid', 'dumb', 'boobs', 'pish', 'fanny', 'slag', 'squirt', 'torture', 'ass', 'nitwit', 'whiffet'], |
49
|
|
|
'0.8' => ['silly', 'pussy', 'sick', 'git', 'poop', 'slaughter', 'sperm'], |
50
|
|
|
'0.7' => ['shallow', 'tit', 'tits', 'foolish', 'nonce', 'bugger', 'naught', 'prick', 'schmuck', 'nonentity', 'idiot'], |
51
|
|
|
'0.6' => ['rednack', 'mindless', 'fat', 'nude', 'wft', 'snot', 'bloodbath', 'massacre', 'massacrer'], |
52
|
|
|
'0.5' => ['bully', 'sneaky', 'greedy', 'creep', 'kill', 'revenge', 'catfight', 'die', 'death', 'nought', 'nonentity'], |
53
|
|
|
'0.4' => ['superficial', 'numb', 'clown', 'villager', 'flatter', 'murder', 'nothingness'], |
54
|
|
|
'0.3' => ['fake', 'strange', 'ignorant', 'critical', 'nuts', 'cum', 'genitals', 'retaliation', 'freak', 'kick'], |
55
|
|
|
'0.2' => ['useless', 'thoughtless', 'crazy', 'bollocks', 'bit', 'hit', 'exterminate', 'gangster'], |
56
|
|
|
// <= 0.1 is almost noise for detox |
57
|
|
|
'0.1' => ['punch', 'insect', 'annihilate', 'steal', 'kidnap'], |
58
|
|
|
'0.06' => ['dude', 'pal', 'yo'], |
59
|
|
|
]; |
60
|
|
|
|
61
|
|
|
private $phrases = [ |
62
|
|
|
'1.0' => ['dirty sanchez', 'gang bang', 'piss off', 'blow job', 'kick ass'], |
63
|
|
|
'0.9' => ['swinger party', 'bloody hell', 'bugger off', 'black on white', 'double penetration'], |
64
|
|
|
'0.8' => ['get staffed', 'get lost'], |
65
|
|
|
'0.7' => ['screw you', 'screw u', 'get off'], |
66
|
|
|
'0.3' => ['white supremacy', 'black supremacy', 'ku klux klan'], |
67
|
|
|
'0.2' => [ |
68
|
|
|
'black people', |
69
|
|
|
'white people', |
70
|
|
|
'asian people', |
71
|
|
|
'indian people', |
72
|
|
|
'spanish people', |
73
|
|
|
'mexican people', |
74
|
|
|
'black ppl', |
75
|
|
|
'white ppl', |
76
|
|
|
'asian ppl', |
77
|
|
|
'indian ppl', |
78
|
|
|
'spanish ppl', |
79
|
|
|
'mexican ppl', |
80
|
|
|
], |
81
|
|
|
]; |
82
|
|
|
|
83
|
|
|
/** |
84
|
|
|
* @return array |
85
|
|
|
*/ |
86
|
|
|
public function getWords() : array |
87
|
|
|
{ |
88
|
|
|
return $this->words; |
89
|
|
|
} |
90
|
|
|
|
91
|
|
|
/** |
92
|
|
|
* @return array |
93
|
|
|
*/ |
94
|
|
|
public function getPhrases() : array |
95
|
|
|
{ |
96
|
|
|
return $this->phrases; |
97
|
|
|
} |
98
|
|
|
} |