Completed
Push — master ( f7be8f...74d867 )
by Jaisen
01:58
created

FileSystem.get_folder_path()   C

↳ Parent: FileSystem

Complexity

Conditions 7

Duplication

Lines 0
Ratio 0 %

Size

Total Lines 36

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 7
c 1
b 0
f 0
dl 0
loc 36
rs 5.5
1
"""
2
General file system methods.
3
4
.. moduleauthor:: Jaisen Mathai <[email protected]>
5
"""
6
from __future__ import print_function
7
from builtins import object
8
9
import os
10
import re
11
import shutil
12
import time
13
14
from elodie import compatability
15
from elodie import geolocation
16
from elodie import log
17
from elodie.config import load_config
18
from elodie.localstorage import Db
19
from elodie.media.base import Base, get_all_subclasses
20
21
22
class FileSystem(object):
23
    """A class for interacting with the file system."""
24
25
    def __init__(self):
26
        # The default folder path is along the lines of 2015-01-Jan/Chicago
27
        self.default_folder_path_definition = [
28
            ('date', '%Y-%m-%b'), ('location', '%city')
29
        ]
30
        self.cached_folder_path_definition = None
31
32
    def create_directory(self, directory_path):
33
        """Create a directory if it does not already exist.
34
35
        :param str directory_name: A fully qualified path of the
36
            to create.
37
        :returns: bool
38
        """
39
        try:
40
            if os.path.exists(directory_path):
41
                return True
42
            else:
43
                os.makedirs(directory_path)
44
                return True
45
        except OSError:
46
            # OSError is thrown for cases like no permission
47
            pass
48
49
        return False
50
51
    def delete_directory_if_empty(self, directory_path):
52
        """Delete a directory only if it's empty.
53
54
        Instead of checking first using `len([name for name in
55
        os.listdir(directory_path)]) == 0`, we catch the OSError exception.
56
57
        :param str directory_name: A fully qualified path of the directory
58
            to delete.
59
        """
60
        try:
61
            os.rmdir(directory_path)
62
            return True
63
        except OSError:
64
            pass
65
66
        return False
67
68
    def get_all_files(self, path, extensions=None):
69
        """Recursively get all files which match a path and extension.
70
71
        :param str path string: Path to start recursive file listing
72
        :param tuple(str) extensions: File extensions to include (whitelist)
73
        :returns: generator
74
        """
75
        # If extensions is None then we get all supported extensions
76
        if not extensions:
77
            extensions = set()
78
            subclasses = get_all_subclasses(Base)
79
            for cls in subclasses:
80
                extensions.update(cls.extensions)
81
82
        for dirname, dirnames, filenames in os.walk(path):
83
            for filename in filenames:
84
                # If file extension is in `extensions` then append to the list
85
                if os.path.splitext(filename)[1][1:].lower() in extensions:
86
                    yield os.path.join(dirname, filename)
87
88
    def get_current_directory(self):
89
        """Get the current working directory.
90
91
        :returns: str
92
        """
93
        return os.getcwd()
94
95
    def get_file_name(self, media):
96
        """Generate file name for a photo or video using its metadata.
97
98
        We use an ISO8601-like format for the file name prefix. Instead of
99
        colons as the separator for hours, minutes and seconds we use a hyphen.
100
        https://en.wikipedia.org/wiki/ISO_8601#General_principles
101
102
        :param media: A Photo or Video instance
103
        :type media: :class:`~elodie.media.photo.Photo` or
104
            :class:`~elodie.media.video.Video`
105
        :returns: str or None for non-photo or non-videos
106
        """
107
        if(not media.is_valid()):
108
            return None
109
110
        metadata = media.get_metadata()
111
        if(metadata is None):
112
            return None
113
114
        # First we check if we have metadata['original_name'].
115
        # We have to do this for backwards compatibility because
116
        #   we original did not store this back into EXIF.
117
        if('original_name' in metadata and metadata['original_name']):
118
            base_name = os.path.splitext(metadata['original_name'])[0]
119
        else:
120
            # If the file has EXIF title we use that in the file name
121
            #   (i.e. my-favorite-photo-img_1234.jpg)
122
            # We want to remove the date prefix we add to the name.
123
            # This helps when re-running the program on file which were already
124
            #   processed.
125
            base_name = re.sub(
126
                '^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}-',
127
                '',
128
                metadata['base_name']
129
            )
130
            if(len(base_name) == 0):
131
                base_name = metadata['base_name']
132
133
        if(
134
            'title' in metadata and
135
            metadata['title'] is not None and
136
            len(metadata['title']) > 0
137
        ):
138
            title_sanitized = re.sub('\W+', '-', metadata['title'].strip())
139
            base_name = base_name.replace('-%s' % title_sanitized, '')
140
            base_name = '%s-%s' % (base_name, title_sanitized)
141
142
        file_name = '%s-%s.%s' % (
143
            time.strftime(
144
                '%Y-%m-%d_%H-%M-%S',
145
                metadata['date_taken']
146
            ),
147
            base_name,
148
            metadata['extension'])
149
        return file_name.lower()
150
151
    def get_folder_path_definition(self):
152
        # If we've done this already then return it immediately without
153
        # incurring any extra work
154
        if self.cached_folder_path_definition is not None:
155
            return self.cached_folder_path_definition
156
157
        config = load_config()
158
159
        # If Directory is in the config we assume full_path and its
160
        #  corresponding values (date, location) are also present
161
        if('Directory' not in config):
162
            return self.default_folder_path_definition
163
164
        config_directory = config['Directory']
165
166
        # Find all subpatterns of full_path that map to directories.
167
        #  I.e. %foo/%bar => ['foo', 'bar']
168
        path_parts = re.findall(
169
                         '\%([a-z]+)',
170
                         config_directory['full_path']
171
                     )
172
173
        if not path_parts or len(path_parts) == 0:
174
            return self.default_folder_path_definition
175
176
        self.cached_folder_path_definition = [
177
            (part, config_directory[part]) for part in path_parts
178
        ]
179
        return self.cached_folder_path_definition
180
181
    def get_folder_path(self, metadata):
182
        """Get folder path by various parameters.
183
184
        :param metadata dict: Metadata dictionary.
185
        :returns: str
186
        """
187
        path_parts = self.get_folder_path_definition()
188
        path = []
189
        for path_part in path_parts:
190
            part, mask = path_part
191
            if part in ('date', 'day', 'month', 'year'):
192
                path.append(time.strftime(mask, metadata['date_taken']))
193
            elif part in ('location', 'city', 'state', 'country'):
194
                place_name = geolocation.place_name(
195
                    metadata['latitude'],
196
                    metadata['longitude']
197
                )
198
199
                location_parts = re.findall('(%[^%]+)', mask)
200
                parsed_folder_name = self.parse_mask_for_location(
201
                    mask,
202
                    location_parts,
203
                    place_name,
204
                )
205
                path.append(parsed_folder_name)
206
207
        # For now we always make the leaf folder an album if it's in the EXIF.
208
        # This is to preserve backwards compatability until we figure out how
209
        # to include %album in the config.ini syntax.
210
        if(metadata['album'] is not None):
211
            if(len(path) == 1):
212
                path.append(metadata['album'])
213
            elif(len(path) == 2):
214
                path[1] = metadata['album']
215
216
        return os.path.join(*path)
217
218
    def parse_mask_for_location(self, mask, location_parts, place_name):
219
        """Takes a mask for a location and interpolates the actual place names.
220
221
        Given these parameters here are the outputs.
222
223
        mask=%city
224
        location_parts=[('%city','%city','city')]
225
        place_name={'city': u'Sunnyvale'}
226
        output=Sunnyvale
227
228
        mask=%city-%state
229
        location_parts=[('%city-','%city','city'), ('%state','%state','state')]
230
        place_name={'city': u'Sunnyvale', 'state': u'California'}
231
        output=Sunnyvale-California
232
233
        mask=%country
234
        location_parts=[('%country','%country','country')]
235
        place_name={'default': u'Sunnyvale', 'city': u'Sunnyvale'}
236
        output=Sunnyvale
237
238
239
        :param str mask: The location mask in the form of %city-%state, etc
240
        :param list location_parts: A list of tuples in the form of
241
            [('%city-', '%city', 'city'), ('%state', '%state', 'state')]
242
        :param dict place_name: A dictionary of place keywords and names like
243
            {'default': u'California', 'state': u'California'}
244
        :returns: str
245
        """
246
        found = False
247
        folder_name = mask
248
        for loc_part in location_parts:
249
            # We assume the search returns a tuple of length 2.
250
            # If not then it's a bad mask in config.ini.
251
            # loc_part = '%country-random'
252
            # component_full = '%country-random'
253
            # component = '%country'
254
            # key = 'country
255
            component_full, component, key = re.search(
256
                '((%([a-z]+))[^%]*)',
257
                loc_part
258
            ).groups()
259
260
            if(key in place_name):
261
                found = True
262
                replace_target = component
263
                replace_with = place_name[key]
264
            else:
265
                replace_target = component_full
266
                replace_with = ''
267
268
            folder_name = folder_name.replace(
269
                replace_target,
270
                replace_with,
271
            )
272
273
        if(not found and folder_name == ''):
274
            folder_name = place_name['default']
275
276
        return folder_name
277
278
    def process_file(self, _file, destination, media, **kwargs):
279
        move = False
280
        if('move' in kwargs):
281
            move = kwargs['move']
282
283
        allow_duplicate = False
284
        if('allowDuplicate' in kwargs):
285
            allow_duplicate = kwargs['allowDuplicate']
286
287
        if(not media.is_valid()):
288
            print('%s is not a valid media file. Skipping...' % _file)
289
            return
290
291
        media.set_original_name()
292
        metadata = media.get_metadata()
293
294
        directory_name = self.get_folder_path(metadata)
295
296
        dest_directory = os.path.join(destination, directory_name)
297
        file_name = self.get_file_name(media)
298
        dest_path = os.path.join(dest_directory, file_name)
299
300
        db = Db()
301
        checksum = db.checksum(_file)
302
        if(checksum is None):
303
            log.info('Could not get checksum for %s. Skipping...' % _file)
304
            return
305
306
        # If duplicates are not allowed then we check if we've seen this file
307
        #  before via checksum. We also check that the file exists at the
308
        #   location we believe it to be.
309
        # If we find a checksum match but the file doesn't exist where we
310
        #  believe it to be then we write a debug log and proceed to import.
311
        checksum_file = db.get_hash(checksum)
312
        if(allow_duplicate is False and checksum_file is not None):
313
            if(os.path.isfile(checksum_file)):
314
                log.info('%s already exists at %s. Skipping...' % (
315
                    _file,
316
                    checksum_file
317
                ))
318
                return
319
            else:
320
                log.info('%s matched checksum but file not found at %s. Importing again...' % (  # noqa
321
                    _file,
322
                    checksum_file
323
                ))
324
325
        self.create_directory(dest_directory)
326
327
        if(move is True):
328
            stat = os.stat(_file)
329
            shutil.move(_file, dest_path)
330
            os.utime(dest_path, (stat.st_atime, stat.st_mtime))
331
        else:
332
            compatability._copyfile(_file, dest_path)
333
            self.set_utime(media)
334
335
        db.add_hash(checksum, dest_path)
336
        db.update_hash_db()
337
338
        return dest_path
339
340
    def set_utime(self, media):
341
        """ Set the modification time on the file based on the file name.
342
        """
343
344
        # Initialize date taken to what's returned from the metadata function.
345
        # If the folder and file name follow a time format of
346
        #   YYYY-MM-DD_HH-MM-SS-IMG_0001.JPG then we override the date_taken
347
        file_path = media.get_file_path()
348
        metadata = media.get_metadata()
349
        date_taken = metadata['date_taken']
350
        base_name = metadata['base_name']
351
        year_month_day_match = re.search(
352
            '^(\d{4})-(\d{2})-(\d{2})_(\d{2})-(\d{2})-(\d{2})',
353
            base_name
354
        )
355
        if(year_month_day_match is not None):
356
            (year, month, day, hour, minute, second) = year_month_day_match.groups()  # noqa
357
            date_taken = time.strptime(
358
                '{}-{}-{} {}:{}:{}'.format(year, month, day, hour, minute, second),  # noqa
359
                '%Y-%m-%d %H:%M:%S'
360
            )
361
362
            os.utime(file_path, (time.time(), time.mktime(date_taken)))
363
        else:
364
            # We don't make any assumptions about time zones and
365
            # assume local time zone.
366
            date_taken_in_seconds = time.mktime(date_taken)
367
            os.utime(file_path, (time.time(), (date_taken_in_seconds)))
368