Completed
Push — master ( d6e75b...42acea )
by Felipe A.
01:05
created

TarFileStream.fill()   A

Complexity

Conditions 2

Size

Total Lines 15

Duplication

Lines 0
Ratio 0 %

Importance

Changes 6
Bugs 0 Features 1
Metric Value
cc 2
dl 0
loc 15
rs 9.4285
c 6
b 0
f 1
1
#!/usr/bin/env python
2
# -*- coding: UTF-8 -*-
3
4
import os
5
import os.path
6
import re
7
import shutil
8
import codecs
9
import threading
10
import string
11
import tarfile
12
import random
13
import datetime
14
import logging
15
16
from flask import current_app, send_from_directory
17
from werkzeug.utils import cached_property
18
19
from . import compat
20
from .compat import range, deprecated
21
22
23
logger = logging.getLogger(__name__)
24
unicode_underscore = '_'.decode('utf-8') if compat.PY_LEGACY else '_'
25
underscore_replace = '%s:underscore' % __name__
26
codecs.register_error(underscore_replace,
27
                      lambda error: (unicode_underscore, error.start + 1)
28
                      )
29
binary_units = ("B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB")
30
standard_units = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
31
common_path_separators = '\\/'
32
restricted_chars = '\\/\0'
33
restricted_names = ('.', '..', '::', os.sep)
34
nt_device_names = ('CON', 'AUX', 'COM1', 'COM2', 'COM3', 'COM4', 'LPT1',
35
                   'LPT2', 'LPT3', 'PRN', 'NUL')
36
fs_safe_characters = string.ascii_uppercase + string.digits
37
38
39
class Node(object):
40
    '''
41
    Abstract filesystem node class.
42
43
    This represents an unspecified entity with a filesystem's path suitable for
44
    being inherited by plugins.
45
46
    When inheriting, the following attributes should be overwritten in order
47
    to specify :meth:`from_urlpath` classmethod behavior:
48
49
    * :attr:`generic`, if true, an instance of directory_class or file_class
50
      will be created instead of an instance of this class tself.
51
    * :attr:`directory_class`, class will be used for directory nodes,
52
    * :attr:`file_class`, class will be used for file nodes.
53
    '''
54
    generic = True
55
    directory_class = None  # set later at import time
56
    file_class = None  # set later at import time
57
58
    re_charset = re.compile('; charset=(?P<charset>[^;]+)')
59
    can_download = False
60
61
    @cached_property
62
    def plugin_manager(self):
63
        '''
64
        Get current app's plugin manager.
65
66
        :returns: plugin manager instance
67
        '''
68
        return self.app.extensions['plugin_manager']
69
70
    @cached_property
71
    def widgets(self):
72
        '''
73
        List widgets with filter return True for this node (or without filter).
74
75
        Remove button is prepended if :property:can_remove returns true.
76
77
        :returns: list of widgets
78
        :rtype: list of namedtuple instances
79
        '''
80
        widgets = []
81
        if self.can_remove:
82
            widgets.append(
83
                self.plugin_manager.create_widget(
84
                    'entry-actions',
85
                    'button',
86
                    file=self,
87
                    css='remove',
88
                    endpoint='remove'
89
                    )
90
                )
91
        return widgets + self.plugin_manager.get_widgets(file=self)
92
93
    @cached_property
94
    def link(self):
95
        '''
96
        Get last widget with place "entry-link".
97
98
        :returns: widget on entry-link (ideally a link one)
99
        :rtype: namedtuple instance
100
        '''
101
        link = None
102
        for widget in self.widgets:
103
            if widget.place == 'entry-link':
104
                link = widget
105
        return link
106
107
    @cached_property
108
    def can_remove(self):
109
        '''
110
        Get if current node can be removed based on app config's
111
        directory_remove.
112
113
        :returns: True if current node can be removed, False otherwise.
114
        :rtype: bool
115
        '''
116
        dirbase = self.app.config["directory_remove"]
117
        return dirbase and self.path.startswith(dirbase + os.sep)
118
119
    @cached_property
120
    def stats(self):
121
        '''
122
        Get current stats object as returned by os.stat function.
123
124
        :returns: stats object
125
        :rtype: posix.stat_result or nt.stat_result
126
        '''
127
        return os.stat(self.path)
128
129
    @cached_property
130
    def parent(self):
131
        '''
132
        Get parent node if available based on app config's directory_base.
133
134
        :returns: parent object if available
135
        :rtype: Node instance or None
136
        '''
137
        if self.path == self.app.config['directory_base']:
138
            return None
139
        parent = os.path.dirname(self.path) if self.path else None
140
        return self.directory_class(parent, self.app) if parent else None
141
142
    @cached_property
143
    def ancestors(self):
144
        '''
145
        Get list of ancestors until app config's directory_base is reached.
146
147
        :returns: list of ancestors starting from nearest.
148
        :rtype: list of Node objects
149
        '''
150
        ancestors = []
151
        parent = self.parent
152
        while parent:
153
            ancestors.append(parent)
154
            parent = parent.parent
155
        return ancestors
156
157
    @property
158
    def modified(self):
159
        '''
160
        Get human-readable last modification date-time.
161
162
        :returns: iso9008-like date-time string (without timezone)
163
        :rtype: str
164
        '''
165
        dt = datetime.datetime.fromtimestamp(self.stats.st_mtime)
166
        return dt.strftime('%Y.%m.%d %H:%M:%S')
167
168
    @property
169
    def urlpath(self):
170
        '''
171
        Get the url substring corresponding to this node for those endpoints
172
        accepting a 'path' parameter, suitable for :meth:`from_urlpath`.
173
174
        :returns: relative-url-like for node's path
175
        :rtype: str
176
        '''
177
        return abspath_to_urlpath(self.path, self.app.config['directory_base'])
178
179
    @property
180
    def name(self):
181
        '''
182
        Get the basename portion of node's path.
183
184
        :returns: filename
185
        :rtype: str
186
        '''
187
        return os.path.basename(self.path)
188
189
    @property
190
    def type(self):
191
        '''
192
        Get the mime portion of node's mimetype (without the encoding part).
193
194
        :returns: mimetype
195
        :rtype: str
196
        '''
197
        return self.mimetype.split(";", 1)[0]
198
199
    @property
200
    def category(self):
201
        '''
202
        Get mimetype category (first portion of mimetype before the slash).
203
204
        :returns: mimetype category
205
        :rtype: str
206
207
        As of 2016-11-03's revision of RFC2046 it could be one of the
208
        following:
209
            * application
210
            * audio
211
            * example
212
            * image
213
            * message
214
            * model
215
            * multipart
216
            * text
217
            * video
218
        '''
219
        return self.type.split('/', 1)[0]
220
221
    def __init__(self, path=None, app=None, **defaults):
222
        '''
223
        :param path: local path
224
        :type path: str
225
        :param path: optional app instance
226
        :type path: flask.app
227
        :param **defaults: attributes will be set to object
228
        '''
229
        self.path = compat.fsdecode(path) if path else None
230
        self.app = current_app if app is None else app
231
        self.__dict__.update(defaults)  # only for attr and cached_property
232
233
    def remove(self):
234
        '''
235
        Does nothing except raising if can_remove property returns False.
236
237
        :raises: OutsideRemovableBase if :property:can_remove returns false
238
        '''
239
        if not self.can_remove:
240
            raise OutsideRemovableBase("File outside removable base")
241
242
    @classmethod
243
    def from_urlpath(cls, path, app=None):
244
        '''
245
        Alternative constructor which accepts a path as taken from URL and uses
246
        the given app or the current app config to get the real path.
247
248
        If class has attribute `generic` set to True, `directory_class` or
249
        `file_class` will be used as type.
250
251
        :param path: relative path as from URL
252
        :param app: optional, flask application
253
        :return: file object pointing to path
254
        :rtype: File
255
        '''
256
        app = app or current_app
257
        base = app.config['directory_base']
258
        path = urlpath_to_abspath(path, base)
259
        if not cls.generic:
260
            kls = cls
261
        elif os.path.isdir(path):
262
            kls = cls.directory_class
263
        else:
264
            kls = cls.file_class
265
        return kls(path=path, app=app)
266
267
    @classmethod
268
    def register_file_class(cls, kls):
269
        '''
270
        Convenience method for setting current class file_class property.
271
272
        :param kls: class to set
273
        :type kls: type
274
        :returns: given class (enabling using this as decorator)
275
        :rtype: type
276
        '''
277
        cls.file_class = kls
278
        return kls
279
280
    @classmethod
281
    def register_directory_class(cls, kls):
282
        '''
283
        Convenience method for setting current class directory_class property.
284
285
        :param kls: class to set
286
        :type kls: type
287
        :returns: given class (enabling using this as decorator)
288
        :rtype: type
289
        '''
290
        cls.directory_class = kls
291
        return kls
292
293
294
@Node.register_file_class
295
class File(Node):
296
    '''
297
    Filesystem file class.
298
299
    Some notes:
300
301
    * :attr:`can_download` is fixed to True, so Files can be downloaded
302
      inconditionaly.
303
    * :attr:`can_upload` is fixed to False, so nothing can be uploaded to
304
      file path.
305
    * :attr:`is_directory` is fixed to False, so no further checks are
306
      performed.
307
    * :attr:`generic` is set to False, so static method :meth:`from_urlpath`
308
      will always return instances of this class.
309
    '''
310
    can_download = True
311
    can_upload = False
312
    is_directory = False
313
    generic = False
314
315
    @cached_property
316
    def widgets(self):
317
        '''
318
        List widgets with filter return True for this file (or without filter).
319
320
        Entry link is prepended.
321
        Download button is prepended if :property:can_download returns true.
322
        Remove button is prepended if :property:can_remove returns true.
323
324
        :returns: list of widgets
325
        :rtype: list of namedtuple instances
326
        '''
327
        widgets = [
328
            self.plugin_manager.create_widget(
329
                'entry-link',
330
                'link',
331
                file=self,
332
                endpoint='open'
333
                )
334
            ]
335
        if self.can_download:
336
            widgets.append(
337
                self.plugin_manager.create_widget(
338
                    'entry-actions',
339
                    'button',
340
                    file=self,
341
                    css='download',
342
                    endpoint='download_file'
343
                    )
344
                )
345
        return widgets + super(File, self).widgets
346
347
    @cached_property
348
    def mimetype(self):
349
        '''
350
        Get full mimetype, with encoding if available.
351
352
        :returns: mimetype
353
        :rtype: str
354
        '''
355
        return self.plugin_manager.get_mimetype(self.path)
356
357
    @cached_property
358
    def is_file(self):
359
        '''
360
        Get if node is file.
361
362
        :returns: True if file, False otherwise
363
        :rtype: bool
364
        '''
365
        return os.path.isfile(self.path)
366
367
    @property
368
    def size(self):
369
        '''
370
        Get human-readable node size in bytes.
371
        If directory, this will corresponds with own inode size.
372
373
        :returns: fuzzy size with unit
374
        :rtype: str
375
        '''
376
        size, unit = fmt_size(
377
            self.stats.st_size,
378
            self.app.config["use_binary_multiples"]
379
            )
380
        if unit == binary_units[0]:
381
            return "%d %s" % (size, unit)
382
        return "%.2f %s" % (size, unit)
383
384
    @property
385
    def encoding(self):
386
        '''
387
        Get encoding part of mimetype, or "default" if not available.
388
389
        :returns: file conding as returned by mimetype function or "default"
390
        :rtype: str
391
        '''
392
        if ";" in self.mimetype:
393
            match = self.re_charset.search(self.mimetype)
394
            gdict = match.groupdict() if match else {}
395
            return gdict.get("charset") or "default"
396
        return "default"
397
398
    def remove(self):
399
        '''
400
        Remove file.
401
        :raises OutsideRemovableBase: when not under removable base directory
402
        '''
403
        super(File, self).remove()
404
        os.unlink(self.path)
405
406
    def download(self):
407
        '''
408
        Get a Flask's send_file Response object pointing to this file.
409
410
        :returns: Response object as returned by flask's send_file
411
        :rtype: flask.Response
412
        '''
413
        directory, name = os.path.split(self.path)
414
        return send_from_directory(directory, name, as_attachment=True)
415
416
417
@Node.register_directory_class
418
class Directory(Node):
419
    '''
420
    Filesystem directory class.
421
422
    Some notes:
423
424
    * :attr:`mimetype` is fixed to 'inode/directory', so mimetype detection
425
      functions won't be called in this case.
426
    * :attr:`is_file` is fixed to False, so no further checks are needed.
427
    * :attr:`size` is fixed to 0 (zero), so stats are not required for this.
428
    * :attr:`encoding` is fixed to 'default'.
429
    * :attr:`generic` is set to False, so static method :meth:`from_urlpath`
430
      will always return instances of this class.
431
    '''
432
    _listdir_cache = None
433
    mimetype = 'inode/directory'
434
    is_file = False
435
    size = 0
436
    encoding = 'default'
437
    generic = False
438
439
    @cached_property
440
    def widgets(self):
441
        '''
442
        List widgets with filter return True for this dir (or without filter).
443
444
        Entry link is prepended.
445
        Upload scripts and widget are added if :property:can_upload is true.
446
        Download button is prepended if :property:can_download returns true.
447
        Remove button is prepended if :property:can_remove returns true.
448
449
        :returns: list of widgets
450
        :rtype: list of namedtuple instances
451
        '''
452
        widgets = [
453
            self.plugin_manager.create_widget(
454
                'entry-link',
455
                'link',
456
                file=self,
457
                endpoint='browse'
458
                )
459
            ]
460
        if self.can_upload:
461
            widgets.extend((
462
                self.plugin_manager.create_widget(
463
                    'head',
464
                    'script',
465
                    file=self,
466
                    endpoint='static',
467
                    filename='browse.directory.head.js'
468
                ),
469
                self.plugin_manager.create_widget(
470
                    'scripts',
471
                    'script',
472
                    file=self,
473
                    endpoint='static',
474
                    filename='browse.directory.body.js'
475
                ),
476
                self.plugin_manager.create_widget(
477
                    'header',
478
                    'upload',
479
                    file=self,
480
                    text='Upload',
481
                    endpoint='upload'
482
                    )
483
                ))
484
        if self.can_download:
485
            widgets.append(
486
                self.plugin_manager.create_widget(
487
                    'entry-actions',
488
                    'button',
489
                    file=self,
490
                    css='download',
491
                    endpoint='download_directory'
492
                    )
493
                )
494
        return widgets + super(Directory, self).widgets
495
496
    @cached_property
497
    def is_directory(self):
498
        '''
499
        Get if path points to a real directory.
500
501
        :returns: True if real directory, False otherwise
502
        :rtype: bool
503
        '''
504
        return os.path.isdir(self.path)
505
506
    @cached_property
507
    def can_download(self):
508
        '''
509
        Get if path is downloadable (if app's `directory_downloadable` config
510
        property is True).
511
512
        :returns: True if downloadable, False otherwise
513
        :rtype: bool
514
        '''
515
        return self.app.config['directory_downloadable']
516
517
    @cached_property
518
    def can_upload(self):
519
        '''
520
        Get if a file can be uploaded to path (if directory path is under app's
521
        `directory_upload` config property).
522
523
        :returns: True if a file can be upload to directory, False otherwise
524
        :rtype: bool
525
        '''
526
        dirbase = self.app.config["directory_upload"]
527
        return dirbase and (
528
            dirbase == self.path or
529
            self.path.startswith(dirbase + os.sep)
530
            )
531
532
    @cached_property
533
    def is_empty(self):
534
        '''
535
        Get if directory is empty (based on :meth:`_listdir`).
536
537
        :returns: True if this directory has no entries, False otherwise.
538
        :rtype: bool
539
        '''
540
        if self._listdir_cache is not None:
541
            return bool(self._listdir_cache)
542
        for entry in self._listdir():
543
            return False
544
        return True
545
546
    def remove(self):
547
        '''
548
        Remove directory tree.
549
550
        :raises OutsideRemovableBase: when not under removable base directory
551
        '''
552
        super(Directory, self).remove()
553
        shutil.rmtree(self.path)
554
555
    def download(self):
556
        '''
557
        Get a Flask Response object streaming a tarball of this directory.
558
559
        :returns: Response object
560
        :rtype: flask.Response
561
        '''
562
        return self.app.response_class(
563
            TarFileStream(
564
                self.path,
565
                self.app.config["directory_tar_buffsize"]
566
                ),
567
            mimetype="application/octet-stream"
568
            )
569
570
    def contains(self, filename):
571
        '''
572
        Check if directory contains an entry with given filename.
573
574
        :param filename: filename will be check
575
        :type filename: str
576
        :returns: True if exists, False otherwise.
577
        :rtype: bool
578
        '''
579
        return os.path.exists(os.path.join(self.path, filename))
580
581
    def choose_filename(self, filename, attempts=999):
582
        '''
583
        Get a new filename which does not colide with any entry on directory,
584
        based on given filename.
585
586
        :param filename: base filename
587
        :type filename: str
588
        :param attempts: number of attempts, defaults to 999
589
        :type attempts: int
590
        :returns: filename
591
        :rtype: str
592
        '''
593
        new_filename = filename
594
        for attempt in range(2, attempts + 1):
595
            if not self.contains(new_filename):
596
                return new_filename
597
            new_filename = alternative_filename(filename, attempt)
598
        while self.contains(new_filename):
599
            new_filename = alternative_filename(filename)
600
        return new_filename
601
602
    def _listdir(self):
603
        '''
604
        Iter unsorted entries on this directory.
605
606
        :yields: Directory or File instance for each entry in directory
607
        :ytype: Node
608
        '''
609
        precomputed_stats = os.name == 'nt'
610
        for entry in compat.scandir(self.path):
611
            kwargs = {'path': entry.path, 'app': self.app, 'parent': self}
612
            if precomputed_stats and not entry.is_symlink():
613
                kwargs['stats'] = entry.stats()
614
            if entry.is_dir(follow_symlinks=True):
615
                yield self.directory_class(**kwargs)
616
                continue
617
            yield self.file_class(**kwargs)
618
619
    def listdir(self, sortkey=None, reverse=False):
620
        '''
621
        Get sorted list (by given sortkey and reverse params) of File objects.
622
623
        :return: sorted list of File instances
624
        :rtype: list of File
625
        '''
626
        if self._listdir_cache is None:
627
            if sortkey:
628
                data = sorted(self._listdir(), key=sortkey, reverse=reverse)
629
            elif reverse:
630
                data = list(reversed(self._listdir()))
631
            else:
632
                data = list(self._listdir())
633
            self._listdir_cache = data
634
        return self._listdir_cache
635
636
637
class TarFileStream(object):
638
    '''
639
    Tarfile which compresses while reading for streaming.
640
641
    Buffsize can be provided, it must be 512 multiple (the tar block size) for
642
    compression.
643
644
    Note on corroutines: this class uses threading by default, but
645
    corroutine-based applications can change this behavior overriding the
646
    :attr:`event_class` and :attr:`thread_class` values.
647
    '''
648
    event_class = threading.Event
649
    thread_class = threading.Thread
650
    tarfile_class = tarfile.open
651
652
    def __init__(self, path, buffsize=10240):
653
        '''
654
        Internal tarfile object will be created, and compression will start
655
        on a thread until buffer became full with writes becoming locked until
656
        a read occurs.
657
658
        :param path: local path of directory whose content will be compressed.
659
        :type path: str
660
        :param buffsize: size of internal buffer on bytes, defaults to 10KiB
661
        :type buffsize: int
662
        '''
663
        self.path = path
664
        self.name = os.path.basename(path) + ".tgz"
665
666
        self._finished = 0
667
        self._want = 0
668
        self._data = bytes()
669
        self._add = self.event_class()
670
        self._result = self.event_class()
671
        self._tarfile = self.tarfile_class(  # stream write
672
            fileobj=self,
673
            mode="w|gz",
674
            bufsize=buffsize
675
            )
676
        self._th = self.thread_class(target=self.fill)
677
        self._th.start()
678
679
    def fill(self):
680
        '''
681
        Writes data on internal tarfile instance, which writes to current
682
        object, using :meth:`write`.
683
684
        As this method is blocking, it is used inside a thread.
685
686
        This method is called automatically, on a thread, on initialization,
687
        so there is little need to call it manually.
688
        '''
689
        self._tarfile.add(self.path, "")
690
        self._tarfile.close()  # force stream flush
691
        self._finished += 1
692
        if not self._result.is_set():
693
            self._result.set()
694
695
    def write(self, data):
696
        '''
697
        Write method used by internal tarfile instance to output data.
698
        This method blocks tarfile execution once internal buffer is full.
699
700
        As this method is blocking, it is used inside the same thread of
701
        :meth:`fill`.
702
703
        :param data: bytes to write to internal buffer
704
        :type data: bytes
705
        :returns: number of bytes written
706
        :rtype: int
707
        '''
708
        self._add.wait()
709
        self._data += data
710
        if len(self._data) > self._want:
711
            self._add.clear()
712
            self._result.set()
713
        return len(data)
714
715
    def read(self, want=0):
716
        '''
717
        Read method, gets data from internal buffer while releasing
718
        :meth:`write` locks when needed.
719
720
        The lock usage means it must ran on a different thread than
721
        :meth:`fill`, ie. the main thread, otherwise will deadlock.
722
723
        The combination of both write and this method running on different
724
        threads makes tarfile being streamed on-the-fly, with data chunks being
725
        processed and retrieved on demand.
726
727
        :param want: number bytes to read, defaults to 0 (all available)
728
        :type want: int
729
        :returns: tarfile data as bytes
730
        :rtype: bytes
731
        '''
732
        if self._finished:
733
            if self._finished == 1:
734
                self._finished += 1
735
                return ""
736
            return EOFError("EOF reached")
737
738
        # Thread communication
739
        self._want = want
740
        self._add.set()
741
        self._result.wait()
742
        self._result.clear()
743
744
        if want:
745
            data = self._data[:want]
746
            self._data = self._data[want:]
747
        else:
748
            data = self._data
749
            self._data = bytes()
750
        return data
751
752
    def __iter__(self):
753
        '''
754
        Iterate through tarfile result chunks.
755
756
        Similarly to :meth:`read`, this methos must ran on a different thread
757
        than :meth:`write` calls.
758
759
        :yields: data chunks as taken from :meth:`read`.
760
        :ytype: bytes
761
        '''
762
        data = self.read()
763
        while data:
764
            yield data
765
            data = self.read()
766
767
768
class OutsideDirectoryBase(Exception):
769
    '''
770
    Exception thrown when trying to access to a file outside path defined on
771
    `directory_base` config property.
772
    '''
773
    pass
774
775
776
class OutsideRemovableBase(Exception):
777
    '''
778
    Exception thrown when trying to access to a file outside path defined on
779
    `directory_remove` config property.
780
    '''
781
    pass
782
783
784
def fmt_size(size, binary=True):
785
    '''
786
    Get size and unit.
787
788
    :param size: size in bytes
789
    :param binary: whether use binary or standard units, defaults to True
790
    :return: size and unit
791
    :rtype: tuple of int and unit as str
792
    '''
793
    if binary:
794
        fmt_sizes = binary_units
795
        fmt_divider = 1024.
796
    else:
797
        fmt_sizes = standard_units
798
        fmt_divider = 1000.
799
    for fmt in fmt_sizes[:-1]:
800
        if size < 1000:
801
            return (size, fmt)
802
        size /= fmt_divider
803
    return size, fmt_sizes[-1]
804
805
806
def relativize_path(path, base, os_sep=os.sep):
807
    '''
808
    Make absolute path relative to an absolute base.
809
810
    :param path: absolute path
811
    :param base: absolute base path
812
    :param os_sep: path component separator, defaults to current OS separator
813
    :return: relative path
814
    :rtype: str or unicode
815
    :raises OutsideDirectoryBase: if path is not below base
816
    '''
817
    if not check_under_base(path, base, os_sep):
818
        raise OutsideDirectoryBase("%r is not under %r" % (path, base))
819
    prefix_len = len(base)
820
    if not base.endswith(os_sep):
821
        prefix_len += len(os_sep)
822
    return path[prefix_len:]
823
824
825
def abspath_to_urlpath(path, base, os_sep=os.sep):
826
    '''
827
    Make filesystem absolute path uri relative using given absolute base path.
828
829
    :param path: absolute path
830
    :param base: absolute base path
831
    :param os_sep: path component separator, defaults to current OS separator
832
    :return: relative uri
833
    :rtype: str or unicode
834
    :raises OutsideDirectoryBase: if resulting path is not below base
835
    '''
836
    return relativize_path(path, base, os_sep).replace(os_sep, '/')
837
838
839
def urlpath_to_abspath(path, base, os_sep=os.sep):
840
    '''
841
    Make uri relative path fs absolute using a given absolute base path.
842
843
    :param path: relative path
844
    :param base: absolute base path
845
    :param os_sep: path component separator, defaults to current OS separator
846
    :return: absolute path
847
    :rtype: str or unicode
848
    :raises OutsideDirectoryBase: if resulting path is not below base
849
    '''
850
    prefix = base if base.endswith(os_sep) else base + os_sep
851
    realpath = os.path.abspath(prefix + path.replace('/', os_sep))
852
    if base == realpath or realpath.startswith(prefix):
853
        return realpath
854
    raise OutsideDirectoryBase("%r is not under %r" % (realpath, base))
855
856
857
def generic_filename(path):
858
    '''
859
    Extract filename of given path os-indepently, taking care of known path
860
    separators.
861
862
    :param path: path
863
    :return: filename
864
    :rtype: str or unicode (depending on given path)
865
    '''
866
867
    for sep in common_path_separators:
868
        if sep in path:
869
            _, path = path.rsplit(sep, 1)
870
    return path
871
872
873
def clean_restricted_chars(path, restricted_chars=restricted_chars):
874
    '''
875
    Get path without restricted characters.
876
877
    :param path: path
878
    :return: path without restricted characters
879
    :rtype: str or unicode (depending on given path)
880
    '''
881
    for character in restricted_chars:
882
        path = path.replace(character, '_')
883
    return path
884
885
886
def check_forbidden_filename(filename,
887
                             destiny_os=os.name,
888
                             restricted_names=restricted_names):
889
    '''
890
    Get if given filename is forbidden for current OS or filesystem.
891
892
    :param filename:
893
    :param destiny_os: destination operative system
894
    :param fs_encoding: destination filesystem filename encoding
895
    :return: wether is forbidden on given OS (or filesystem) or not
896
    :rtype: bool
897
    '''
898
    if destiny_os == 'nt':
899
        fpc = filename.split('.', 1)[0].upper()
900
        if fpc in nt_device_names:
901
            return True
902
903
    return filename in restricted_names
904
905
906
def check_under_base(path, base, os_sep=os.sep):
907
    '''
908
    Check if given absolute path is under given base.
909
910
    :param path: absolute path
911
    :param base: absolute base path
912
    :return: wether file is under given base or not
913
    :rtype: bool
914
    '''
915
    prefix = base if base.endswith(os_sep) else base + os_sep
916
    return path == base or path.startswith(prefix)
917
918
919
def secure_filename(path, destiny_os=os.name, fs_encoding=compat.FS_ENCODING):
920
    '''
921
    Get rid of parent path components and special filenames.
922
923
    If path is invalid or protected, return empty string.
924
925
    :param path: unsafe path
926
    :type: str
927
    :param destiny_os: destination operative system
928
    :type destiny_os: str
929
    :return: filename or empty string
930
    :rtype: str
931
    '''
932
    path = generic_filename(path)
933
    path = clean_restricted_chars(path)
934
935
    if check_forbidden_filename(path, destiny_os=destiny_os):
936
        return ''
937
938
    if isinstance(path, bytes):
939
        path = path.decode('latin-1', errors=underscore_replace)
940
941
    # Decode and recover from filesystem encoding in order to strip unwanted
942
    # characters out
943
    kwargs = dict(
944
        os_name=destiny_os,
945
        fs_encoding=fs_encoding,
946
        errors=underscore_replace
947
        )
948
    fs_encoded_path = compat.fsencode(path, **kwargs)
949
    fs_decoded_path = compat.fsdecode(fs_encoded_path, **kwargs)
950
    return fs_decoded_path
951
952
953
def alternative_filename(filename, attempt=None):
954
    '''
955
    Generates an alternative version of given filename.
956
957
    If an number attempt parameter is given, will be used on the alternative
958
    name, a random value will be used otherwise.
959
960
    :param filename: original filename
961
    :param attempt: optional attempt number, defaults to null
962
    :return: new filename
963
    :rtype: str or unicode
964
    '''
965
    filename_parts = filename.rsplit(u'.', 2)
966
    name = filename_parts[0]
967
    ext = ''.join(u'.%s' % ext for ext in filename_parts[1:])
968
    if attempt is None:
969
        choose = random.choice
970
        extra = u' %s' % ''.join(choose(fs_safe_characters) for i in range(8))
971
    else:
972
        extra = u' (%d)' % attempt
973
    return u'%s%s%s' % (name, extra, ext)
974