tumdlr.sanitize_filename() - Code Metrics - FujiMakoto/tumdlr - Measure and Improve Code Quality continuously with Scrutinizer

tumdlr.sanitize_filename() D
last analyzed 2016-03-14 02:42 UTC

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
dl	0
loc	49
rs	4.7619
cc	8

import html
import os
import re
import unicodedata

import click
from humanize import naturalsize
from requests import Session


def download(url, filename, progress_data=None, session=None, silent=False):
    """
    Initiate a file download and display the progress

    Args:
        url(str):               Download URL
        filename(str):          Path to save the file to
        progress_data(dict):    Static information to display above the progress bar
        session(Session):       An optional download session to use
        silent(bool):           Download the file, but don't print any output

    Returns:

    """
    # Set up our requests session and make sure the filepath exists
    session = session or Session()
    os.makedirs(os.path.dirname(filename), 0o755, True)

    # Test the connection
    response = session.head(url, allow_redirects=True)  # type: Response
    response.raise_for_status()

    # Get some information about the file we are downloading
    filesize    = naturalsize(response.headers.get('content-length', 0))
    filetype    = response.headers.get('content-type', 'Unknown')

    # Format the information output
    info_lines = [
        click.style('Saving to: ', bold=True) + filename,
        click.style('File type: ', bold=True) + filetype,
        click.style('File size: ', bold=True) + filesize
    ]

    if progress_data:
        for key, value in progress_data.items():
            info_lines.append('{key} {value}'.format(key=click.style(key + ':', bold=True), value=value))

    # Print the static information now
    click.echo()
    for line in info_lines:
        click.echo(line)

    # Now let's make the real download request
    response = session.get(url, allow_redirects=True)  # type: Response

    # Process the download
    with open(filename, 'wb') as file:
        length = int(response.headers.get('content-length', 0))

        with click.progressbar(response.iter_content(1024), (length / 1024)) as progress:
            for chunk in progress:
                if chunk:
                    file.write(chunk)
                    file.flush()


def sanitize_filename(name):
    """
    Replace reserved characters/names with underscores (windows)

    Args:
        name(str)

    Returns:
        str
    """
    if isinstance(name, int):
        return str(name)

    if os.sep == '/':
        bad_chars = re.compile(r'^\.|\.$|^ | $|^$|\?|:|<|>|\||\*|\"|/')
    else:
        bad_chars = re.compile(r'^\.|\.$|^ | $|^$|\?|:|<|>|/|\||\*|\"|\\')

    bad_names = re.compile(r'(aux|com[1-9]|con|lpt[1-9]|prn)(\.|$)')

    # Unescape '&amp;', '&lt;', and '&gt;'
    name = html.unescape(name)

    # Replace bad characters with an underscore
    name = bad_chars.sub('_', name)
    if bad_names.match(name):
        name = '_' + name

    # Replace newlines with spaces
    name = name.replace("\r", '')
    name = name.replace("\n", ' ')

    # Yavos (?)
    while name.find('.\\') != -1:
        name = name.replace('.\\', '\\')

    name = name.replace('\\', os.sep)

    # Replace tab characters with spaces
    name = name.replace('\t', ' ')

    # Cut to 125 characters
    if len(name) > 125:
        name = name[:125]

    # Remove unicode control characters
    name = ''.join(char for char in name if unicodedata.category(char)[0] != "C")

    return name.strip()


1			import html
2			import os
3			import re
4			import unicodedata
5
6			import click
7			from humanize import naturalsize
8			from requests import Session
9
10
11			def download(url, filename, progress_data=None, session=None, silent=False):
12			"""
13			Initiate a file download and display the progress
14
15			Args:
16			url(str): Download URL
17			filename(str): Path to save the file to
18			progress_data(dict): Static information to display above the progress bar
19			session(Session): An optional download session to use
20			silent(bool): Download the file, but don't print any output
21
22			Returns:
23
24			"""
25			# Set up our requests session and make sure the filepath exists
26			session = session or Session()
27			os.makedirs(os.path.dirname(filename), 0o755, True)
28
29			# Test the connection
30			response = session.head(url, allow_redirects=True) # type: Response
31			response.raise_for_status()
32
33			# Get some information about the file we are downloading
34			filesize = naturalsize(response.headers.get('content-length', 0))
35			filetype = response.headers.get('content-type', 'Unknown')
36
37			# Format the information output
38			info_lines = [
39			click.style('Saving to: ', bold=True) + filename,
40			click.style('File type: ', bold=True) + filetype,
41			click.style('File size: ', bold=True) + filesize
42			]
43
44			if progress_data:
45			for key, value in progress_data.items():
46			info_lines.append('{key} {value}'.format(key=click.style(key + ':', bold=True), value=value))
47
48			# Print the static information now
49			click.echo()
50			for line in info_lines:
51			click.echo(line)
52
53			# Now let's make the real download request
54			response = session.get(url, allow_redirects=True) # type: Response
55
56			# Process the download
57			with open(filename, 'wb') as file:
58			length = int(response.headers.get('content-length', 0))
59
60			with click.progressbar(response.iter_content(1024), (length / 1024)) as progress:
61			for chunk in progress:
62			if chunk:
63			file.write(chunk)
64			file.flush()
65
66
67			def sanitize_filename(name):
68			"""
69			Replace reserved characters/names with underscores (windows)
70
71			Args:
72			name(str)
73
74			Returns:
75			str
76			"""
77			if isinstance(name, int):
78			return str(name)
79
80			if os.sep == '/':
81			bad_chars = re.compile(r'^\.\|\.$\|^ \| $\|^$\|\?\|:\|<\|>\|\\|\|\*\|\"\|/')
82			else:
83			bad_chars = re.compile(r'^\.\|\.$\|^ \| $\|^$\|\?\|:\|<\|>\|/\|\\|\|\*\|\"\|\\')
84
85			bad_names = re.compile(r'(aux\|com[1-9]\|con\|lpt[1-9]\|prn)(\.\|$)')
86
87			# Unescape '&', '<', and '>'
88			name = html.unescape(name)
89
90			# Replace bad characters with an underscore
91			name = bad_chars.sub('_', name)
92			if bad_names.match(name):
93			name = '_' + name
94
95			# Replace newlines with spaces
96			name = name.replace("\r", '')
97			name = name.replace("\n", ' ')
98
99			# Yavos (?)
100			while name.find('.\\') != -1:
101			name = name.replace('.\\', '\\')
102
103			name = name.replace('\\', os.sep)
104
105			# Replace tab characters with spaces
106			name = name.replace('\t', ' ')
107
108			# Cut to 125 characters
109			if len(name) > 125:
110			name = name[:125]
111
112			# Remove unicode control characters
113			name = ''.join(char for char in name if unicodedata.category(char)[0] != "C")
114
115			return name.strip()
116

FujiMakoto / tumdlr

tumdlr.sanitize_filename() D last analyzed 2016-03-14 02:42 UTC

Complexity

Size

Duplication

Duplication Side-by-Side

Filter issues like

tumdlr.sanitize_filename() D
last analyzed 2016-03-14 02:42 UTC