GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.

NSFWImageDetectorPlugin   B
last analyzed

Complexity

Total Complexity 36

Size/Duplication

Total Lines 161
Duplicated Lines 0 %

Importance

Changes 3
Bugs 0 Features 1
Metric Value
dl 0
loc 161
rs 8.8
c 3
b 0
f 1
wmc 36

12 Methods

Rating   Name   Duplication   Size   Complexity  
A handle_message() 0 17 4
B _process_images() 0 20 5
A _is_gif() 0 4 1
D _get_skin_ratio_percentage() 0 23 8
A _is_png() 0 4 1
A _is_image_url() 0 7 1
A _is_image() 0 3 1
C _download_image() 0 29 7
A __init__() 0 3 1
A _is_jpg() 0 4 1
A _is_nsfw_image() 0 9 1
B _get_image_urls() 0 20 5
1
"""
2
Plugin which tries to detect NSFW image URLs.
3
4
Requirements:
5
6
* Pillow
7
* requests
8
* jpeg decoder for PIL (libjpeg-dev package on Ubuntu)
9
"""
10
11
import os
12
import uuid
13
import tempfile
14
import regex
15
from os.path import join as pjoin
16
17
import requests
18
from PIL import Image
19
20
from base import BotPlugin
21
22
__all__ = ('NSFWImageDetectorPlugin')
23
24
IMAGE_EXTENSIONS = [
25
    '.png',
26
    '.jpg',
27
    '.gif',
28
]
29
30
CHUNK_SIZE = 1024
31
32
SKIN_PERCENTAGE_THRESHOLD = 30
33
34
35
class NSFWImageDetectorPlugin(BotPlugin):
36
37
    name = 'NSFW Image Detector'
38
    description = ('Scans image URLs for potential NSFW images and warns '
39
                   'users about them')
40
41
    def __init__(self, bot):
42
        super(NSFWImageDetectorPlugin, self).__init__(bot=bot)
43
        self._images_dir = tempfile.mkdtemp(suffix='nsfw-images')
44
45
    def handle_message(self, channel, nick, msg, line=None):
46
        urls = regex.WEB_URL.findall(msg)
47
48
        if not urls:
49
            return
50
51
        image_urls = self._get_image_urls(urls)
52
53
        if not image_urls:
54
            return
55
56
        nsfw_image_urls = self._process_images(urls=image_urls)
57
58
        for url in nsfw_image_urls:
59
            from response import NSFW_LINKS, random_response
60
            msg = random_response(NSFW_LINKS) % {'url': url, 'nick': nick}
61
            self.bot.say(msg, channel)
62
63
    def _process_images(self, urls):
64
        """
65
        Download all the images and return links which include potentially NSFW
66
        content.
67
        """
68
        nsfw_urls = []
69
70
        for url in urls:
71
            file_path = self._download_image(url=url)
72
73
            if file_path and os.path.isfile(file_path):
74
                try:
75
                    is_nsfw = self._is_nsfw_image(file_path=file_path)
76
77
                    if is_nsfw:
78
                        nsfw_urls.append(url)
79
                finally:
80
                    os.remove(file_path)
81
82
        return nsfw_urls
83
84
    def _is_nsfw_image(self, file_path):
85
        """
86
        Detect if the provided image file is NSFW.
87
88
        Current version of this function is very simple and only detects very
89
        basic nudity by measuring skin tone percentage in the image.
90
        """
91
        skin_percent = self._get_skin_ratio_percentage(file_path)
92
        return skin_percent > SKIN_PERCENTAGE_THRESHOLD
93
94
    def _get_skin_ratio_percentage(self, file_path):
95
        try:
96
            im = Image.open(file_path)
97
        except Exception:
98
            self.bot.log_error('Could not open NSFW image: "'
99
                               + file_path + '"')
100
            return 0.0
101
102
        im = im.convert('RGB')
103
104
        im = im.crop((int(im.size[0] * 0.2), int(im.size[1] * 0.2),
105
                      im.size[0] - int(im.size[0] * 0.2),
106
                      im.size[1] - int(im.size[1] * 0.2)))
107
108
        colors = im.getcolors(im.size[0] * im.size[1])
109
110
        skin = sum(count for count, rgb in colors if rgb[0] > 60
111
                   and rgb[1] < (rgb[0] * 0.85) and rgb[1] < (rgb[0] * 0.70)
112
                   and rgb[1] > (rgb[0] * 0.40) and rgb[1] > (rgb[0] * 0.20))
113
114
        percentage = float(skin) / float(im.size[0] * im.size[1])
115
        percentage = percentage * 100
116
        return percentage
117
118
    def _get_image_urls(self, urls):
119
        """
120
        Filter urls to returns only image urls.
121
122
        Contains url transformers for a few common image sharers.
123
        """
124
        if not urls:
125
            return
126
127
        image_urls = []
128
        for url in urls:
129
            # Rewrite imgur urls
130
            imgur_res = regex.IMGUR.search(url)
131
            if imgur_res:
132
                url = "https://i.imgur.com/" + imgur_res.group('id') + ".jpg"
133
134
            if self._is_image_url(url=url):
135
                image_urls.append(url)
136
137
        return image_urls
138
139
    @staticmethod
140
    def _is_image_url(url):
141
        # Very simple logic, doesn't support urls which don't have an extension
142
        url = url.lower()
143
        extension = os.path.splitext(url)[1]
144
145
        return extension in IMAGE_EXTENSIONS
146
147
    def _download_image(self, url):
148
        """Download image in a temporary directory and return its path."""
149
        try:
150
            extension = os.path.splitext(url)[1]
151
            response = requests.get(url, stream=True)
152
        except Exception:
153
            self.bot.log_error('Failed to download NSFW image: "'
154
                               + url + '"')
155
            return
156
157
        if not response.status_code == 200:
158
            return
159
160
        name = str(uuid.uuid4()) + extension
161
        file_path = pjoin(self._images_dir, name)
162
163
        first_chunk = True
164
        with open(file_path, 'wb') as fp:
165
            for chunk in response.iter_content(CHUNK_SIZE):
166
                if first_chunk:
167
                    first_chunk = False
168
                    if not self._is_image(chunk):
169
                        self.bot.log_error('NSFW image was not an image: "'
170
                                           + url + '"')
171
                        return
172
173
                fp.write(chunk)
174
175
        return file_path
176
177
    # From http://people.iola.dk/olau/python/imagedetect.py by Ole Laursen
178
    @staticmethod
179
    def _is_jpg(data):
180
        """Return True if data is the first 2 bytes of a JPEG file."""
181
        return data[:2] == '\xff\xd8'
182
183
    @staticmethod
184
    def _is_png(data):
185
        """Return True if data is the first 8 bytes of a PNG file."""
186
        return data[:8] == '\x89PNG\x0d\x0a\x1a\x0a'
187
188
    @staticmethod
189
    def _is_gif(data):
190
        """Return True if data is the first 4 bytes of a GIF file."""
191
        return data[:4] == 'GIF8'
192
193
    def _is_image(self, data):
194
        """Return True if data conforms to a magic number of an image file."""
195
        return self._is_jpg(data) or self._is_png(data) or self._is_gif(data)
196