sopel.modules.url.process_urls() - Code Metrics - sopel-irc/sopel - Measure and Improve Code Quality continuously with Scrutinizer

sopel.modules.url.process_urls() F
last analyzed 2019-10-21 18:25 UTC

↳ Parent: sopel.modules.url

Complexity

Conditions

Size

Total Lines	50
Code Lines	29

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	29
dl	0
loc	50
rs	2.4
c	0
b	0
f	0
cc	16
nop	3

How to fix Complexity

# coding=utf-8
"""
url.py - Sopel URL Title Module
Copyright 2010-2011, Michael Yanovich (yanovich.net) & Kenneth Sham
Copyright 2012-2013, Elsie Powell
Copyright 2013, Lior Ramati <[email protected]>
Copyright 2014, Elad Alfassa <[email protected]>
Licensed under the Eiffel Forum License 2.

https://sopel.chat
"""
from __future__ import unicode_literals, absolute_import, print_function, division

import re

import dns.resolver
import ipaddress
import requests

from sopel import __version__, module, tools
from sopel.config.types import ListAttribute, StaticSection, ValidatedAttribute
from sopel.tools import web

# Python3 vs Python2
try:
    from urllib.parse import urlparse
except ImportError:
    from urlparse import urlparse

USER_AGENT = 'Sopel/{} (https://sopel.chat)'.format(__version__)
default_headers = {'User-Agent': USER_AGENT}
# These are used to clean up the title tag before actually parsing it. Not the
# world's best way to do this, but it'll do for now.
title_tag_data = re.compile('<(/?)title( [^>]+)?>', re.IGNORECASE)
quoted_title = re.compile('[\'"]<title>[\'"]', re.IGNORECASE)
# This is another regex that presumably does something important.
re_dcc = re.compile(r'(?i)dcc\ssend')
# This sets the maximum number of bytes that should be read in order to find
# the title. We don't want it too high, or a link to a big file/stream will
# just keep downloading until there's no more memory. 640k ought to be enough
# for anybody.
max_bytes = 655360


class UrlSection(StaticSection):
    # TODO some validation rules maybe?
    exclude = ListAttribute('exclude')
    """A list of regular expressions to match URLs for which the title should not be shown."""
    exclusion_char = ValidatedAttribute('exclusion_char', default='!')
    """A character (or string) which, when immediately preceding a URL, will stop that URL's title from being shown."""
    shorten_url_length = ValidatedAttribute(
        'shorten_url_length', int, default=0)
    """If greater than 0, the title fetcher will include a TinyURL version of links longer than this many characters."""
    enable_private_resolution = ValidatedAttribute(
        'enable_private_resolution', bool, default=False)
    """Enable URL lookups for RFC1918 addresses"""
    enable_dns_resolution = ValidatedAttribute(
        'enable_dns_resolution', bool, default=False)
    """Enable DNS resolution for all domains to validate if there are RFC1918 resolutions"""


def configure(config):
    """
    | name | example | purpose |
    | ---- | ------- | ------- |
    | exclude | https?://git\\\\.io/.* | A list of regular expressions for URLs for which the title should not be shown. |
    | exclusion\\_char | ! | A character (or string) which, when immediately preceding a URL, will stop the URL's title from being shown. |
    | shorten\\_url\\_length | 72 | If greater than 0, the title fetcher will include a TinyURL version of links longer than this many characters. |
    | enable\\_private\\_resolution | False | Enable URL lookups for RFC1918 addresses. |
    | enable\\_dns\\_resolution | False | Enable DNS resolution for all domains to validate if there are RFC1918 resolutions. |
    """
    config.define_section('url', UrlSection)
    config.url.configure_setting(
        'exclude',
        'Enter regular expressions for each URL you would like to exclude.'
    )
    config.url.configure_setting(
        'exclusion_char',
        'Enter a character which can be prefixed to suppress URL titling'
    )
    config.url.configure_setting(
        'shorten_url_length',
        'Enter how many characters a URL should be before the bot puts a'
        ' shorter version of the URL in the title as a TinyURL link'
        ' (0 to disable)'
    )
    config.url.configure_setting(
        'enable_private_resolution',
        'Enable URL lookups for RFC1918 addresses?'
    )
    config.url.configure_setting(
        'enable_dns_resolution',
        'Enable DNS resolution for all domains to validate if there are RFC1918 resolutions?'
    )


def setup(bot):
    bot.config.define_section('url', UrlSection)

    if bot.config.url.exclude:
        regexes = [re.compile(s) for s in bot.config.url.exclude]
    else:
        regexes = []

    # We're keeping these in their own list, rather than putting then in the
    # callbacks list because 1, it's easier to deal with modules that are still
    # using this list, and not the newer callbacks list and 2, having a lambda
    # just to pass is kinda ugly.
    if 'url_exclude' not in bot.memory:
        bot.memory['url_exclude'] = regexes
    else:
        exclude = bot.memory['url_exclude']
        if regexes:
            exclude.extend(regexes)
        bot.memory['url_exclude'] = exclude

    # Ensure last_seen_url is in memory
    if 'last_seen_url' not in bot.memory:
        bot.memory['last_seen_url'] = tools.SopelMemory()

    # Initialize shortened_urls as a dict if it doesn't exist.
    if 'shortened_urls' not in bot.memory:
        bot.memory['shortened_urls'] = tools.SopelMemory()


def shutdown(bot):
    # Unset `url_exclude` and `last_seen_url`, but not `shortened_urls`;
    # clearing `shortened_urls` will increase API calls. Leaving it in memory
    # should not lead to unexpected behavior.
    for key in ['url_exclude', 'last_seen_url']:
        try:
            del bot.memory[key]
        except KeyError:
            pass


@module.commands('title')
@module.example(
    '.title https://www.google.com',
    '[ Google ] - www.google.com',
    online=True)
def title_command(bot, trigger):
    """
    Show the title or URL information for the given URL, or the last URL seen
    in this channel.
    """
    if not trigger.group(2):
        if trigger.sender not in bot.memory['last_seen_url']:
            return
        matched = check_callbacks(
            bot, bot.memory['last_seen_url'][trigger.sender])
        if matched:
            return
        else:
            urls = [bot.memory['last_seen_url'][trigger.sender]]
    else:
        urls = web.search_urls(
            trigger,
            exclusion_char=bot.config.url.exclusion_char)

    for url, title, domain, tinyurl in process_urls(bot, trigger, urls):
        message = '[ %s ] - %s' % (title, domain)
        if tinyurl:
            message += ' ( %s )' % tinyurl
        bot.reply(message)
        bot.memory['last_seen_url'][trigger.sender] = url


@module.rule(r'(?u).*(https?://\S+).*')
def title_auto(bot, trigger):
    """
    Automatically show titles for URLs. For shortened URLs/redirects, find
    where the URL redirects to and show the title for that (or call a function
    from another module to give more information).
    """
    if re.match(bot.config.core.prefix + 'title', trigger):
        return

    # Avoid fetching known malicious links
    if 'safety_cache' in bot.memory and trigger in bot.memory['safety_cache']:
        if bot.memory['safety_cache'][trigger]['positives'] > 1:
            return

    urls = web.search_urls(
        trigger, exclusion_char=bot.config.url.exclusion_char, clean=True)

    for url, title, domain, tinyurl in process_urls(bot, trigger, urls):
        message = '[ %s ] - %s' % (title, domain)
        if tinyurl:
            message += ' ( %s )' % tinyurl
        # Guard against responding to other instances of this bot.
        if message != trigger:
            bot.say(message)
            bot.memory['last_seen_url'][trigger.sender] = url


def process_urls(bot, trigger, urls):
    """
    For each URL in the list, ensure that it isn't handled by another module.
    If not, find where it redirects to, if anywhere. If that redirected URL
    should be handled by another module, dispatch the callback for it.
    Return a list of (title, hostname) tuples for each URL which is not handled
    by another module.
    """
    shorten_url_length = bot.config.url.shorten_url_length
    for url in urls:
        # Exclude URLs that start with the exclusion char
        if url.startswith(bot.config.url.exclusion_char):
            continue

        # Check the URL does not match an existing URL callback
        if check_callbacks(bot, url):
            continue

        # Prevent private addresses from being queried if enable_private_resolution is False
        if not bot.config.url.enable_private_resolution:
            parsed = urlparse(url)
            # Check if it's an address like http://192.168.1.1
            try:
                if ipaddress.ip_address(parsed.hostname).is_private or ipaddress.ip_address(parsed.hostname).is_loopback:
                    continue
            except ValueError:
                pass

            # Check if domains are RFC1918 addresses if enable_dns_resolutions is set
            if bot.config.url.enable_dns_resolution:
                private = False
                for result in dns.resolver.query(parsed.hostname):
                    if ipaddress.ip_address(result).is_private or ipaddress.ip_address(parsed.hostname).is_loopback:
                        private = True
                        break
                if private:
                    continue

        # Call the URL to get a title, if possible
        title = find_title(url, verify=bot.config.core.verify_ssl)
        if not title:
            # No title found: don't handle this URL
            continue

        # If the URL is over bot.config.url.shorten_url_length, shorten the URL
        tinyurl = None
        if (shorten_url_length > 0) and (len(url) > shorten_url_length):
            tinyurl = get_or_create_shorturl(bot, url)

        yield (url, title, get_hostname(url), tinyurl)


def check_callbacks(bot, url):
    """Check if ``url`` is excluded or matches any URL callback patterns.

    :param bot: Sopel instance
    :param str url: URL to check
    :return: True if ``url`` is excluded or matches any URL Callback pattern

    This function looks at the ``bot.memory`` for ``url_exclude`` patterns and
    it returns ``True`` if any matches the given ``url``. Otherwise, it looks
    at the ``bot``'s URL Callback patterns, and it returns ``True`` if any
    matches, ``False`` otherwise.

    .. seealso::

        The :func:`~sopel.modules.url.setup` function that defines the
        ``url_exclude`` in ``bot.memory``.

    .. versionchanged:: 7.0

        This function **does not** trigger URL callbacks anymore when ``url``
        matches a pattern.

    """
    # Check if it matches the exclusion list first
    matched = any(regex.search(url) for regex in bot.memory['url_exclude'])
    return matched or any(bot.search_url_callbacks(url))


def find_title(url, verify=True):
    """Return the title for the given URL."""
    try:
        response = requests.get(url, stream=True, verify=verify,
                                headers=default_headers)
        content = b''
        for byte in response.iter_content(chunk_size=512):
            content += byte
            if b'</title>' in content or len(content) > max_bytes:
                break
        content = content.decode('utf-8', errors='ignore')
        # Need to close the connection because we have not read all
        # the data
        response.close()
    except requests.exceptions.ConnectionError:
        return None

    # Some cleanup that I don't really grok, but was in the original, so
    # we'll keep it (with the compiled regexes made global) for now.
    content = title_tag_data.sub(r'<\1title>', content)
    content = quoted_title.sub('', content)

    start = content.rfind('<title>')
    end = content.rfind('</title>')
    if start == -1 or end == -1:
        return
    title = web.decode(content[start + 7:end])
    title = title.strip()[:200]

    title = ' '.join(title.split())  # cleanly remove multiple spaces

    # More cryptic regex substitutions. This one looks to be myano's invention.
    title = re_dcc.sub('', title)

    return title or None


def get_hostname(url):
    idx = 7
    if url.startswith('https://'):
        idx = 8
    elif url.startswith('ftp://'):
        idx = 6
    hostname = url[idx:]
    slash = hostname.find('/')
    if slash != -1:
        hostname = hostname[:slash]
    return hostname


def get_or_create_shorturl(bot, url):
    """Get or create a short URL for ``url``

    :param bot: Sopel instance
    :param str url: URL to get or create a short URL for
    :return: A short URL
    :rtype: str

    It gets the short URL for ``url`` from the bot's memory if it exists.
    Otherwise, it creates a short URL (see :func:`get_tinyurl`), stores it
    into the bot's memory, then returns it.
    """
    # Check bot memory to see if the shortened URL is already in
    # memory
    if url in bot.memory['shortened_urls']:
        return bot.memory['shortened_urls'][url]

    tinyurl = get_tinyurl(url)
    bot.memory['shortened_urls'][url] = tinyurl
    return tinyurl


def get_tinyurl(url):
    """Returns a shortened tinyURL link of the URL"""
    base_url = "https://tinyurl.com/api-create.php"
    tinyurl = "%s?%s" % (base_url, web.urlencode({'url': url}))
    try:
        res = requests.get(tinyurl)
        res.raise_for_status()
    except requests.exceptions.RequestException:
        return None
    # Replace text output with https instead of http to make the
    # result an HTTPS link.
    return res.text.replace("http://", "https://")


if __name__ == "__main__":
    from sopel.test_tools import run_example_tests
    run_example_tests(__file__)


1			# coding=utf-8
2			"""
3			url.py - Sopel URL Title Module
4			Copyright 2010-2011, Michael Yanovich (yanovich.net) & Kenneth Sham
5			Copyright 2012-2013, Elsie Powell
6			Copyright 2013, Lior Ramati <[email protected]>
7			Copyright 2014, Elad Alfassa <[email protected]>
8			Licensed under the Eiffel Forum License 2.
9
10			https://sopel.chat
11			"""
12			from __future__ import unicode_literals, absolute_import, print_function, division
13
14			import re
15
16			import dns.resolver
17			import ipaddress
18			import requests
19
20			from sopel import __version__, module, tools
21			from sopel.config.types import ListAttribute, StaticSection, ValidatedAttribute
22			from sopel.tools import web
23
24			# Python3 vs Python2
25			try:
26			from urllib.parse import urlparse
27			except ImportError:
28			from urlparse import urlparse
29
30			USER_AGENT = 'Sopel/{} (https://sopel.chat)'.format(__version__)
31			default_headers = {'User-Agent': USER_AGENT}
32			# These are used to clean up the title tag before actually parsing it. Not the
33			# world's best way to do this, but it'll do for now.
34			title_tag_data = re.compile('<(/?)title( [^>]+)?>', re.IGNORECASE)
35			quoted_title = re.compile('[\'"]<title>[\'"]', re.IGNORECASE)
36			# This is another regex that presumably does something important.
37			re_dcc = re.compile(r'(?i)dcc\ssend')
38			# This sets the maximum number of bytes that should be read in order to find
39			# the title. We don't want it too high, or a link to a big file/stream will
40			# just keep downloading until there's no more memory. 640k ought to be enough
41			# for anybody.
42			max_bytes = 655360
43
44
45			class UrlSection(StaticSection):
46			# TODO some validation rules maybe?
47			exclude = ListAttribute('exclude')
48			"""A list of regular expressions to match URLs for which the title should not be shown."""
49			exclusion_char = ValidatedAttribute('exclusion_char', default='!')
50			"""A character (or string) which, when immediately preceding a URL, will stop that URL's title from being shown."""
51			shorten_url_length = ValidatedAttribute(
52			'shorten_url_length', int, default=0)
53			"""If greater than 0, the title fetcher will include a TinyURL version of links longer than this many characters."""
54			enable_private_resolution = ValidatedAttribute(
55			'enable_private_resolution', bool, default=False)
56			"""Enable URL lookups for RFC1918 addresses"""
57			enable_dns_resolution = ValidatedAttribute(
58			'enable_dns_resolution', bool, default=False)
59			"""Enable DNS resolution for all domains to validate if there are RFC1918 resolutions"""
60
61
62			def configure(config):
63			"""
64			\| name \| example \| purpose \|
65			\| ---- \| ------- \| ------- \|
66			\| exclude \| https?://git\\\\.io/.* \| A list of regular expressions for URLs for which the title should not be shown. \|
67			\| exclusion\\_char \| ! \| A character (or string) which, when immediately preceding a URL, will stop the URL's title from being shown. \|
68			\| shorten\\_url\\_length \| 72 \| If greater than 0, the title fetcher will include a TinyURL version of links longer than this many characters. \|
69			\| enable\\_private\\_resolution \| False \| Enable URL lookups for RFC1918 addresses. \|
70			\| enable\\_dns\\_resolution \| False \| Enable DNS resolution for all domains to validate if there are RFC1918 resolutions. \|
71			"""
72			config.define_section('url', UrlSection)
73			config.url.configure_setting(
74			'exclude',
75			'Enter regular expressions for each URL you would like to exclude.'
76			)
77			config.url.configure_setting(
78			'exclusion_char',
79			'Enter a character which can be prefixed to suppress URL titling'
80			)
81			config.url.configure_setting(
82			'shorten_url_length',
83			'Enter how many characters a URL should be before the bot puts a'
84			' shorter version of the URL in the title as a TinyURL link'
85			' (0 to disable)'
86			)
87			config.url.configure_setting(
88			'enable_private_resolution',
89			'Enable URL lookups for RFC1918 addresses?'
90			)
91			config.url.configure_setting(
92			'enable_dns_resolution',
93			'Enable DNS resolution for all domains to validate if there are RFC1918 resolutions?'
94			)
95
96
97			def setup(bot):
98			bot.config.define_section('url', UrlSection)
99
100			if bot.config.url.exclude:
101			regexes = [re.compile(s) for s in bot.config.url.exclude]
102			else:
103			regexes = []
104
105			# We're keeping these in their own list, rather than putting then in the
106			# callbacks list because 1, it's easier to deal with modules that are still
107			# using this list, and not the newer callbacks list and 2, having a lambda
108			# just to pass is kinda ugly.
109			if 'url_exclude' not in bot.memory:
110			bot.memory['url_exclude'] = regexes
111			else:
112			exclude = bot.memory['url_exclude']
113			if regexes:
114			exclude.extend(regexes)
115			bot.memory['url_exclude'] = exclude
116
117			# Ensure last_seen_url is in memory
118			if 'last_seen_url' not in bot.memory:
119			bot.memory['last_seen_url'] = tools.SopelMemory()
120
121			# Initialize shortened_urls as a dict if it doesn't exist.
122			if 'shortened_urls' not in bot.memory:
123			bot.memory['shortened_urls'] = tools.SopelMemory()
124
125
126			def shutdown(bot):
127			# Unset `url_exclude` and `last_seen_url`, but not `shortened_urls`;
128			# clearing `shortened_urls` will increase API calls. Leaving it in memory
129			# should not lead to unexpected behavior.
130			for key in ['url_exclude', 'last_seen_url']:
131			try:
132			del bot.memory[key]
133			except KeyError:
134			pass
135
136
137			@module.commands('title')
138			@module.example(
139			'.title https://www.google.com',
140			'[ Google ] - www.google.com',
141			online=True)
142			def title_command(bot, trigger):
143			"""
144			Show the title or URL information for the given URL, or the last URL seen
145			in this channel.
146			"""
147			if not trigger.group(2):
148			if trigger.sender not in bot.memory['last_seen_url']:
149			return
150			matched = check_callbacks(
151			bot, bot.memory['last_seen_url'][trigger.sender])
152			if matched:
153			return
154			else:
155			urls = [bot.memory['last_seen_url'][trigger.sender]]
156			else:
157			urls = web.search_urls(
158			trigger,
159			exclusion_char=bot.config.url.exclusion_char)
160
161			for url, title, domain, tinyurl in process_urls(bot, trigger, urls):
162			message = '[ %s ] - %s' % (title, domain)
163			if tinyurl:
164			message += ' ( %s )' % tinyurl
165			bot.reply(message)
166			bot.memory['last_seen_url'][trigger.sender] = url
167
168
169			@module.rule(r'(?u).(https?://\S+).')
170			def title_auto(bot, trigger):
171			"""
172			Automatically show titles for URLs. For shortened URLs/redirects, find
173			where the URL redirects to and show the title for that (or call a function
174			from another module to give more information).
175			"""
176			if re.match(bot.config.core.prefix + 'title', trigger):
177			return
178
179			# Avoid fetching known malicious links
180			if 'safety_cache' in bot.memory and trigger in bot.memory['safety_cache']:
181			if bot.memory['safety_cache'][trigger]['positives'] > 1:
182			return
183
184			urls = web.search_urls(
185			trigger, exclusion_char=bot.config.url.exclusion_char, clean=True)
186
187			for url, title, domain, tinyurl in process_urls(bot, trigger, urls):
188			message = '[ %s ] - %s' % (title, domain)
189			if tinyurl:
190			message += ' ( %s )' % tinyurl
191			# Guard against responding to other instances of this bot.
192			if message != trigger:
193			bot.say(message)
194			bot.memory['last_seen_url'][trigger.sender] = url
195
196
197			def process_urls(bot, trigger, urls):
198			"""
199			For each URL in the list, ensure that it isn't handled by another module.
200			If not, find where it redirects to, if anywhere. If that redirected URL
201			should be handled by another module, dispatch the callback for it.
202			Return a list of (title, hostname) tuples for each URL which is not handled
203			by another module.
204			"""
205			shorten_url_length = bot.config.url.shorten_url_length
206			for url in urls:
207			# Exclude URLs that start with the exclusion char
208			if url.startswith(bot.config.url.exclusion_char):
209			continue
210
211			# Check the URL does not match an existing URL callback
212			if check_callbacks(bot, url):
213			continue
214
215			# Prevent private addresses from being queried if enable_private_resolution is False
216			if not bot.config.url.enable_private_resolution:
217			parsed = urlparse(url)
218			# Check if it's an address like http://192.168.1.1
219			try:
220			if ipaddress.ip_address(parsed.hostname).is_private or ipaddress.ip_address(parsed.hostname).is_loopback:
221			continue
222			except ValueError:
223			pass
224
225			# Check if domains are RFC1918 addresses if enable_dns_resolutions is set
226			if bot.config.url.enable_dns_resolution:
227			private = False
228			for result in dns.resolver.query(parsed.hostname):
229			if ipaddress.ip_address(result).is_private or ipaddress.ip_address(parsed.hostname).is_loopback:
230			private = True
231			break
232			if private:
233			continue
234
235			# Call the URL to get a title, if possible
236			title = find_title(url, verify=bot.config.core.verify_ssl)
237			if not title:
238			# No title found: don't handle this URL
239			continue
240
241			# If the URL is over bot.config.url.shorten_url_length, shorten the URL
242			tinyurl = None
243			if (shorten_url_length > 0) and (len(url) > shorten_url_length):
244			tinyurl = get_or_create_shorturl(bot, url)
245
246			yield (url, title, get_hostname(url), tinyurl)
247
248
249			def check_callbacks(bot, url):
250			"""Check if ``url`` is excluded or matches any URL callback patterns.
251
252			:param bot: Sopel instance
253			:param str url: URL to check
254			:return: True if ``url`` is excluded or matches any URL Callback pattern
255
256			This function looks at the ``bot.memory`` for ``url_exclude`` patterns and
257			it returns ``True`` if any matches the given ``url``. Otherwise, it looks
258			at the ``bot``'s URL Callback patterns, and it returns ``True`` if any
259			matches, ``False`` otherwise.
260
261			.. seealso::
262
263			The :func:`~sopel.modules.url.setup` function that defines the
264			``url_exclude`` in ``bot.memory``.
265
266			.. versionchanged:: 7.0
267
268			This function does not trigger URL callbacks anymore when ``url``
269			matches a pattern.
270
271			"""
272			# Check if it matches the exclusion list first
273			matched = any(regex.search(url) for regex in bot.memory['url_exclude'])
274			return matched or any(bot.search_url_callbacks(url))
275
276
277			def find_title(url, verify=True):
278			"""Return the title for the given URL."""
279			try:
280			response = requests.get(url, stream=True, verify=verify,
281			headers=default_headers)
282			content = b''
283			for byte in response.iter_content(chunk_size=512):
284			content += byte
285			if b'</title>' in content or len(content) > max_bytes:
286			break
287			content = content.decode('utf-8', errors='ignore')
288			# Need to close the connection because we have not read all
289			# the data
290			response.close()
291			except requests.exceptions.ConnectionError:
292			return None
293
294			# Some cleanup that I don't really grok, but was in the original, so
295			# we'll keep it (with the compiled regexes made global) for now.
296			content = title_tag_data.sub(r'<\1title>', content)
297			content = quoted_title.sub('', content)
298
299			start = content.rfind('<title>')
300			end = content.rfind('</title>')
301			if start == -1 or end == -1:
302			return
303			title = web.decode(content[start + 7:end])
304			title = title.strip()[:200]
305
306			title = ' '.join(title.split()) # cleanly remove multiple spaces
307
308			# More cryptic regex substitutions. This one looks to be myano's invention.
309			title = re_dcc.sub('', title)
310
311			return title or None
312
313
314			def get_hostname(url):
315			idx = 7
316			if url.startswith('https://'):
317			idx = 8
318			elif url.startswith('ftp://'):
319			idx = 6
320			hostname = url[idx:]
321			slash = hostname.find('/')
322			if slash != -1:
323			hostname = hostname[:slash]
324			return hostname
325
326
327			def get_or_create_shorturl(bot, url):
328			"""Get or create a short URL for ``url``
329
330			:param bot: Sopel instance
331			:param str url: URL to get or create a short URL for
332			:return: A short URL
333			:rtype: str
334
335			It gets the short URL for ``url`` from the bot's memory if it exists.
336			Otherwise, it creates a short URL (see :func:`get_tinyurl`), stores it
337			into the bot's memory, then returns it.
338			"""
339			# Check bot memory to see if the shortened URL is already in
340			# memory
341			if url in bot.memory['shortened_urls']:
342			return bot.memory['shortened_urls'][url]
343
344			tinyurl = get_tinyurl(url)
345			bot.memory['shortened_urls'][url] = tinyurl
346			return tinyurl
347
348
349			def get_tinyurl(url):
350			"""Returns a shortened tinyURL link of the URL"""
351			base_url = "https://tinyurl.com/api-create.php"
352			tinyurl = "%s?%s" % (base_url, web.urlencode({'url': url}))
353			try:
354			res = requests.get(tinyurl)
355			res.raise_for_status()
356			except requests.exceptions.RequestException:
357			return None
358			# Replace text output with https instead of http to make the
359			# result an HTTPS link.
360			return res.text.replace("http://", "https://")
361
362
363			if __name__ == "__main__":
364			from sopel.test_tools import run_example_tests
365			run_example_tests(__file__)
366

sopel-irc / sopel

sopel.modules.url.process_urls() F last analyzed 2019-10-21 18:25 UTC

Complexity

Size

Duplication

Importance

How to fix Complexity

Complexity

Duplication Side-by-Side

Filter issues like

sopel.modules.url.process_urls() F
last analyzed 2019-10-21 18:25 UTC