NSFWImageDetectorPlugin._get_image_urls() - Code Metrics - Psywerx/botko - Measure and Improve Code Quality continuously with Scrutinizer

NSFWImageDetectorPlugin._get_image_urls() B
last analyzed 2017-06-07 14:46 UTC

↳ Parent: NSFWImageDetectorPlugin

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	5
dl	0
loc	20
rs	8.5454
c	0
b	0
f	0

"""
Plugin which tries to detect NSFW image URLs.

Requirements:

* Pillow
* requests
* jpeg decoder for PIL (libjpeg-dev package on Ubuntu)
"""

import os
import uuid
import tempfile
import regex
from os.path import join as pjoin

import requests
from PIL import Image

from base import BotPlugin

__all__ = ('NSFWImageDetectorPlugin')

IMAGE_EXTENSIONS = [
    '.png',
    '.jpg',
    '.gif',
]

CHUNK_SIZE = 1024

SKIN_PERCENTAGE_THRESHOLD = 30


class NSFWImageDetectorPlugin(BotPlugin):

    name = 'NSFW Image Detector'
    description = ('Scans image URLs for potential NSFW images and warns '
                   'users about them')

    def __init__(self, bot):
        super(NSFWImageDetectorPlugin, self).__init__(bot=bot)
        self._images_dir = tempfile.mkdtemp(suffix='nsfw-images')

    def handle_message(self, channel, nick, msg, line=None):
        urls = regex.WEB_URL.findall(msg)

        if not urls:
            return

        image_urls = self._get_image_urls(urls)

        if not image_urls:
            return

        nsfw_image_urls = self._process_images(urls=image_urls)

        for url in nsfw_image_urls:
            from response import NSFW_LINKS, random_response
            msg = random_response(NSFW_LINKS) % {'url': url, 'nick': nick}
            self.bot.say(msg, channel)

    def _process_images(self, urls):
        """
        Download all the images and return links which include potentially NSFW
        content.
        """
        nsfw_urls = []

        for url in urls:
            file_path = self._download_image(url=url)

            if file_path and os.path.isfile(file_path):
                try:
                    is_nsfw = self._is_nsfw_image(file_path=file_path)

                    if is_nsfw:
                        nsfw_urls.append(url)
                finally:
                    os.remove(file_path)

        return nsfw_urls

    def _is_nsfw_image(self, file_path):
        """
        Detect if the provided image file is NSFW.

        Current version of this function is very simple and only detects very
        basic nudity by measuring skin tone percentage in the image.
        """
        skin_percent = self._get_skin_ratio_percentage(file_path)
        return skin_percent > SKIN_PERCENTAGE_THRESHOLD

    def _get_skin_ratio_percentage(self, file_path):
        try:
            im = Image.open(file_path)
        except Exception:
            self.bot.log_error('Could not open NSFW image: "'
                               + file_path + '"')
            return 0.0

        im = im.convert('RGB')

        im = im.crop((int(im.size[0] * 0.2), int(im.size[1] * 0.2),
                      im.size[0] - int(im.size[0] * 0.2),
                      im.size[1] - int(im.size[1] * 0.2)))

        colors = im.getcolors(im.size[0] * im.size[1])

        skin = sum(count for count, rgb in colors if rgb[0] > 60
                   and rgb[1] < (rgb[0] * 0.85) and rgb[1] < (rgb[0] * 0.70)
                   and rgb[1] > (rgb[0] * 0.40) and rgb[1] > (rgb[0] * 0.20))

        percentage = float(skin) / float(im.size[0] * im.size[1])
        percentage = percentage * 100
        return percentage

    def _get_image_urls(self, urls):
        """
        Filter urls to returns only image urls.

        Contains url transformers for a few common image sharers.
        """
        if not urls:
            return

        image_urls = []
        for url in urls:
            # Rewrite imgur urls
            imgur_res = regex.IMGUR.search(url)
            if imgur_res:
                url = "https://i.imgur.com/" + imgur_res.group('id') + ".jpg"

            if self._is_image_url(url=url):
                image_urls.append(url)

        return image_urls

    @staticmethod
    def _is_image_url(url):
        # Very simple logic, doesn't support urls which don't have an extension
        url = url.lower()
        extension = os.path.splitext(url)[1]

        return extension in IMAGE_EXTENSIONS

    def _download_image(self, url):
        """Download image in a temporary directory and return its path."""
        try:
            extension = os.path.splitext(url)[1]
            response = requests.get(url, stream=True)
        except Exception:
            self.bot.log_error('Failed to download NSFW image: "'
                               + url + '"')
            return

        if not response.status_code == 200:
            return

        name = str(uuid.uuid4()) + extension
        file_path = pjoin(self._images_dir, name)

        first_chunk = True
        with open(file_path, 'wb') as fp:
            for chunk in response.iter_content(CHUNK_SIZE):
                if first_chunk:
                    first_chunk = False
                    if not self._is_image(chunk):
                        self.bot.log_error('NSFW image was not an image: "'
                                           + url + '"')
                        return

                fp.write(chunk)

        return file_path

    # From http://people.iola.dk/olau/python/imagedetect.py by Ole Laursen
    @staticmethod
    def _is_jpg(data):
        """Return True if data is the first 2 bytes of a JPEG file."""
        return data[:2] == '\xff\xd8'

    @staticmethod
    def _is_png(data):
        """Return True if data is the first 8 bytes of a PNG file."""
        return data[:8] == '\x89PNG\x0d\x0a\x1a\x0a'

    @staticmethod
    def _is_gif(data):
        """Return True if data is the first 4 bytes of a GIF file."""
        return data[:4] == 'GIF8'

    def _is_image(self, data):
        """Return True if data conforms to a magic number of an image file."""
        return self._is_jpg(data) or self._is_png(data) or self._is_gif(data)


1			"""
2			Plugin which tries to detect NSFW image URLs.
3
4			Requirements:
5
6			* Pillow
7			* requests
8			* jpeg decoder for PIL (libjpeg-dev package on Ubuntu)
9			"""
10
11			import os
12			import uuid
13			import tempfile
14			import regex
15			from os.path import join as pjoin
16
17			import requests
18			from PIL import Image
19
20			from base import BotPlugin
21
22			__all__ = ('NSFWImageDetectorPlugin')
23
24			IMAGE_EXTENSIONS = [
25			'.png',
26			'.jpg',
27			'.gif',
28			]
29
30			CHUNK_SIZE = 1024
31
32			SKIN_PERCENTAGE_THRESHOLD = 30
33
34
35			class NSFWImageDetectorPlugin(BotPlugin):
36
37			name = 'NSFW Image Detector'
38			description = ('Scans image URLs for potential NSFW images and warns '
39			'users about them')
40
41			def __init__(self, bot):
42			super(NSFWImageDetectorPlugin, self).__init__(bot=bot)
43			self._images_dir = tempfile.mkdtemp(suffix='nsfw-images')
44
45			def handle_message(self, channel, nick, msg, line=None):
46			urls = regex.WEB_URL.findall(msg)
47
48			if not urls:
49			return
50
51			image_urls = self._get_image_urls(urls)
52
53			if not image_urls:
54			return
55
56			nsfw_image_urls = self._process_images(urls=image_urls)
57
58			for url in nsfw_image_urls:
59			from response import NSFW_LINKS, random_response
60			msg = random_response(NSFW_LINKS) % {'url': url, 'nick': nick}
61			self.bot.say(msg, channel)
62
63			def _process_images(self, urls):
64			"""
65			Download all the images and return links which include potentially NSFW
66			content.
67			"""
68			nsfw_urls = []
69
70			for url in urls:
71			file_path = self._download_image(url=url)
72
73			if file_path and os.path.isfile(file_path):
74			try:
75			is_nsfw = self._is_nsfw_image(file_path=file_path)
76
77			if is_nsfw:
78			nsfw_urls.append(url)
79			finally:
80			os.remove(file_path)
81
82			return nsfw_urls
83
84			def _is_nsfw_image(self, file_path):
85			"""
86			Detect if the provided image file is NSFW.
87
88			Current version of this function is very simple and only detects very
89			basic nudity by measuring skin tone percentage in the image.
90			"""
91			skin_percent = self._get_skin_ratio_percentage(file_path)
92			return skin_percent > SKIN_PERCENTAGE_THRESHOLD
93
94			def _get_skin_ratio_percentage(self, file_path):
95			try:
96			im = Image.open(file_path)
97			except Exception:
98			self.bot.log_error('Could not open NSFW image: "'
99			+ file_path + '"')
100			return 0.0
101
102			im = im.convert('RGB')
103
104			im = im.crop((int(im.size[0] * 0.2), int(im.size[1] * 0.2),
105			im.size[0] - int(im.size[0] * 0.2),
106			im.size[1] - int(im.size[1] * 0.2)))
107
108			colors = im.getcolors(im.size[0] * im.size[1])
109
110			skin = sum(count for count, rgb in colors if rgb[0] > 60
111			and rgb[1] < (rgb[0] * 0.85) and rgb[1] < (rgb[0] * 0.70)
112			and rgb[1] > (rgb[0] * 0.40) and rgb[1] > (rgb[0] * 0.20))
113
114			percentage = float(skin) / float(im.size[0] * im.size[1])
115			percentage = percentage * 100
116			return percentage
117
118			def _get_image_urls(self, urls):
119			"""
120			Filter urls to returns only image urls.
121
122			Contains url transformers for a few common image sharers.
123			"""
124			if not urls:
125			return
126
127			image_urls = []
128			for url in urls:
129			# Rewrite imgur urls
130			imgur_res = regex.IMGUR.search(url)
131			if imgur_res:
132			url = "https://i.imgur.com/" + imgur_res.group('id') + ".jpg"
133
134			if self._is_image_url(url=url):
135			image_urls.append(url)
136
137			return image_urls
138
139			@staticmethod
140			def _is_image_url(url):
141			# Very simple logic, doesn't support urls which don't have an extension
142			url = url.lower()
143			extension = os.path.splitext(url)[1]
144
145			return extension in IMAGE_EXTENSIONS
146
147			def _download_image(self, url):
148			"""Download image in a temporary directory and return its path."""
149			try:
150			extension = os.path.splitext(url)[1]
151			response = requests.get(url, stream=True)
152			except Exception:
153			self.bot.log_error('Failed to download NSFW image: "'
154			+ url + '"')
155			return
156
157			if not response.status_code == 200:
158			return
159
160			name = str(uuid.uuid4()) + extension
161			file_path = pjoin(self._images_dir, name)
162
163			first_chunk = True
164			with open(file_path, 'wb') as fp:
165			for chunk in response.iter_content(CHUNK_SIZE):
166			if first_chunk:
167			first_chunk = False
168			if not self._is_image(chunk):
169			self.bot.log_error('NSFW image was not an image: "'
170			+ url + '"')
171			return
172
173			fp.write(chunk)
174
175			return file_path
176
177			# From http://people.iola.dk/olau/python/imagedetect.py by Ole Laursen
178			@staticmethod
179			def _is_jpg(data):
180			"""Return True if data is the first 2 bytes of a JPEG file."""
181			return data[:2] == '\xff\xd8'
182
183			@staticmethod
184			def _is_png(data):
185			"""Return True if data is the first 8 bytes of a PNG file."""
186			return data[:8] == '\x89PNG\x0d\x0a\x1a\x0a'
187
188			@staticmethod
189			def _is_gif(data):
190			"""Return True if data is the first 4 bytes of a GIF file."""
191			return data[:4] == 'GIF8'
192
193			def _is_image(self, data):
194			"""Return True if data conforms to a magic number of an image file."""
195			return self._is_jpg(data) or self._is_png(data) or self._is_gif(data)
196

Psywerx / botko

GitHub Access Token became invalid

NSFWImageDetectorPlugin._get_image_urls() B last analyzed 2017-06-07 14:46 UTC

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like

NSFWImageDetectorPlugin._get_image_urls() B
last analyzed 2017-06-07 14:46 UTC