EnglishSet   A
last analyzed

Complexity

Total Complexity 2

Size/Duplication

Total Lines 91
Duplicated Lines 0 %

Importance

Changes 5
Bugs 0 Features 0
Metric Value
wmc 2
eloc 68
c 5
b 0
f 0
dl 0
loc 91
rs 10

2 Methods

Rating   Name   Duplication   Size   Complexity  
A getWords() 0 3 1
A getPhrases() 0 3 1
1
<?php
2
3
namespace detox\dataset;
4
5
6
class EnglishSet implements SetContract
7
{
8
9
    private $words = [
10
        '1.0'  => [
11
            // simple bad words
12
            'fuck',
13
            'slut',
14
            'dick',
15
            'faggot',
16
            'bitch',
17
            'douchebag',
18
            'dickhead',
19
            'jerk',
20
            'cunt',
21
            'shit',
22
            'piss',
23
            'crap',
24
            'cock',
25
            'twat',
26
            'arse',
27
            'arsehole',
28
            'tosser',
29
            'wanker',
30
            'bastard',
31
            'honkey',
32
            'nigger',
33
            'flikker',
34
            'scumbag',
35
            // porn industry
36
            'bukakke',
37
            'dildo',
38
            'strapon',
39
            'shag',
40
            'sex',
41
            'blowjob',
42
            'bdsm',
43
            'bbd',
44
            'milf',
45
            'anal',
46
            'vagina',
47
        ],
48
        '0.9'  => ['ugly', 'stupid', 'dumb', 'boobs', 'pish', 'fanny', 'slag', 'squirt', 'torture', 'ass', 'nitwit', 'whiffet'],
49
        '0.8'  => ['silly', 'pussy', 'sick', 'git', 'poop', 'slaughter', 'sperm'],
50
        '0.7'  => ['shallow', 'tit', 'tits', 'foolish', 'nonce', 'bugger', 'naught', 'prick', 'schmuck', 'nonentity', 'idiot'],
51
        '0.6'  => ['rednack', 'mindless', 'fat', 'nude', 'wft', 'snot', 'bloodbath', 'massacre', 'massacrer'],
52
        '0.5'  => ['bully', 'sneaky', 'greedy', 'creep', 'kill', 'revenge', 'catfight', 'die', 'death', 'nought', 'nonentity'],
53
        '0.4'  => ['superficial', 'numb', 'clown', 'villager', 'flatter', 'murder', 'nothingness'],
54
        '0.3'  => ['fake', 'strange', 'ignorant', 'critical', 'nuts', 'cum', 'genitals', 'retaliation', 'freak', 'kick'],
55
        '0.2'  => ['useless', 'thoughtless', 'crazy', 'bollocks', 'bit', 'hit', 'exterminate', 'gangster'],
56
        // <= 0.1 is almost noise for detox
57
        '0.1'  => ['punch', 'insect', 'annihilate', 'steal', 'kidnap'],
58
        '0.06' => ['dude', 'pal', 'yo'],
59
    ];
60
61
    private $phrases = [
62
        '1.0' => ['dirty sanchez', 'gang bang', 'piss off', 'blow job', 'kick ass'],
63
        '0.9' => ['swinger party', 'bloody hell', 'bugger off', 'black on white', 'double penetration'],
64
        '0.8' => ['get staffed', 'get lost'],
65
        '0.7' => ['screw you', 'screw u', 'get off'],
66
        '0.3' => ['white supremacy', 'black supremacy', 'ku klux klan'],
67
        '0.2' => [
68
            'black people',
69
            'white people',
70
            'asian people',
71
            'indian people',
72
            'spanish people',
73
            'mexican people',
74
            'black ppl',
75
            'white ppl',
76
            'asian ppl',
77
            'indian ppl',
78
            'spanish ppl',
79
            'mexican ppl',
80
        ],
81
    ];
82
83
    /**
84
     * @return array
85
     */
86
    public function getWords() : array
87
    {
88
        return $this->words;
89
    }
90
91
    /**
92
     * @return array
93
     */
94
    public function getPhrases() : array
95
    {
96
        return $this->phrases;
97
    }
98
}