EnglishSet::getWords()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 1
c 1
b 0
f 0
dl 0
loc 3
rs 10
cc 1
nc 1
nop 0
1
<?php
2
3
namespace detox\dataset;
4
5
6
class EnglishSet implements SetContract
7
{
8
9
    private $words = [
10
        '1.0'  => [
11
            // simple bad words
12
            'fuck',
13
            'slut',
14
            'dick',
15
            'faggot',
16
            'bitch',
17
            'douchebag',
18
            'dickhead',
19
            'jerk',
20
            'cunt',
21
            'shit',
22
            'piss',
23
            'crap',
24
            'cock',
25
            'twat',
26
            'arse',
27
            'arsehole',
28
            'tosser',
29
            'wanker',
30
            'bastard',
31
            'honkey',
32
            'nigger',
33
            'flikker',
34
            'scumbag',
35
            // porn industry
36
            'bukakke',
37
            'dildo',
38
            'strapon',
39
            'shag',
40
            'sex',
41
            'blowjob',
42
            'bdsm',
43
            'bbd',
44
            'milf',
45
            'anal',
46
            'vagina',
47
        ],
48
        '0.9'  => ['ugly', 'stupid', 'dumb', 'boobs', 'pish', 'fanny', 'slag', 'squirt', 'torture', 'ass', 'nitwit', 'whiffet'],
49
        '0.8'  => ['silly', 'pussy', 'sick', 'git', 'poop', 'slaughter', 'sperm'],
50
        '0.7'  => ['shallow', 'tit', 'tits', 'foolish', 'nonce', 'bugger', 'naught', 'prick', 'schmuck', 'nonentity', 'idiot'],
51
        '0.6'  => ['rednack', 'mindless', 'fat', 'nude', 'wft', 'snot', 'bloodbath', 'massacre', 'massacrer'],
52
        '0.5'  => ['bully', 'sneaky', 'greedy', 'creep', 'kill', 'revenge', 'catfight', 'die', 'death', 'nought', 'nonentity'],
53
        '0.4'  => ['superficial', 'numb', 'clown', 'villager', 'flatter', 'murder', 'nothingness'],
54
        '0.3'  => ['fake', 'strange', 'ignorant', 'critical', 'nuts', 'cum', 'genitals', 'retaliation', 'freak', 'kick'],
55
        '0.2'  => ['useless', 'thoughtless', 'crazy', 'bollocks', 'bit', 'hit', 'exterminate', 'gangster'],
56
        // <= 0.1 is almost noise for detox
57
        '0.1'  => ['punch', 'insect', 'annihilate', 'steal', 'kidnap'],
58
        '0.06' => ['dude', 'pal', 'yo'],
59
    ];
60
61
    private $phrases = [
62
        '1.0' => ['dirty sanchez', 'gang bang', 'piss off', 'blow job', 'kick ass'],
63
        '0.9' => ['swinger party', 'bloody hell', 'bugger off', 'black on white', 'double penetration'],
64
        '0.8' => ['get staffed', 'get lost'],
65
        '0.7' => ['screw you', 'screw u', 'get off'],
66
        '0.3' => ['white supremacy', 'black supremacy', 'ku klux klan'],
67
        '0.2' => [
68
            'black people',
69
            'white people',
70
            'asian people',
71
            'indian people',
72
            'spanish people',
73
            'mexican people',
74
            'black ppl',
75
            'white ppl',
76
            'asian ppl',
77
            'indian ppl',
78
            'spanish ppl',
79
            'mexican ppl',
80
        ],
81
    ];
82
83
    /**
84
     * @return array
85
     */
86
    public function getWords() : array
87
    {
88
        return $this->words;
89
    }
90
91
    /**
92
     * @return array
93
     */
94
    public function getPhrases() : array
95
    {
96
        return $this->phrases;
97
    }
98
}