Test Failed
Push — test-coverage ( 7b8ae8...30721d )
by Konstantinos
02:24
created

StringParser.get_instance()   A

Complexity

Conditions 1

Size

Total Lines 3
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 3
nop 1
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
import os
2
import re
3
import time
4
5
6
class StringToDictParser:
7
    """Parses album information out of video title string"""
8
    check = re.compile(r'^s([1-9]\d*)$')
9
10
    def __init__(self, entities, separators):
11
        assert all(type(x) == str for x in separators)
12
        self.entities = {k: AlbumInfoEntity(k, v) for k, v in entities.items()}
13
        self.separators = separators
14
15
    def __call__(self, *args, **kwargs):
16
        title = args[0]
17
        design = kwargs['design']
18
        assert all(0 <= len(x) <= len(self.entities) + len(self.separators) and all(type(y) == str for y in x) for x in design)
19
        assert all(all(StringToDictParser.check.match(y) for y in x if y.startswith('s')) for x in design)
20
        rregs = [RegexSequence([_ for _ in self._yield_reg_comp(d)]) for d in design]
21
        return max([r.search_n_dict(title) for r in rregs], key=lambda x: len(x))
22
23
    def _yield_reg_comp(self, kati):
24
        for k in kati:
25
            if k.startswith('s'):
26
                yield self.separators[int(StringToDictParser.check.match(k).group(1)) - 1]
27
            else:
28
                yield self.entities[k]
29
30
class AlbumInfoEntity:
31
    def __init__(self, name, reg):
32
        self.name = name
33
        self.reg = reg
34
35
    def __str__(self):
36
        return self.reg
37
38
39
class RegexSequence:
40
    def __init__(self, data):
41
        self._keys = [d.name for d in data if hasattr(d, 'name')]
42
        self._regex = r'{}'.format(''.join(str(d) for d in data))
43
44
    def search_n_dict(self, string):
45
        return dict(_ for _ in zip(self._keys, list(getattr(re.search(self._regex, string), 'groups', lambda: len(self._keys)*[''])())) if _[1])
46
47
48
class StringParser:
49
    __instance = None
50
51
    track_number = r'\d{1,2}'
52
    track_name = r'[\w\'\(\) \-’]*[\w)]'
53
    sep = r'(?:[\t ]*[\.\-,]+[\t ]*|[\t ]+)'
54
    extension = r'\.mp3'
55
    hhmmss = r'(?:\d?\d:)*\d?\d'
56
57
    ## to parse from youtube video title string
58
    sep1 = r'[\t ]*[\-\.][\t ]*'
59
    sep2 = r'[\t \-\.]+'
60
    year = r'\(?(\d{4})\)?'
61
    art = r'([\w ]*\w)'
62
    alb = r'([\w ]*\w)'
63
64
    album_info_parser = StringToDictParser({'artist': art, 'album': alb, 'year': year}, [sep1, sep2])
65
66
    def __new__(cls, *args, **kwargs):
67
        if not cls.__instance:
68
            cls.__instance = super().__new__(cls)
69
        return cls.__instance
70
71
    ## STRING TO DICT
72
    @classmethod
73
    def parse_album_info(cls, video_title):
74
        """Call to parse a video title string into a hash (dictionary) of potentially all 'artist', 'album' and 'year' fields.\n
75
        Can parse patters:
76
         - Artist Album Year\n
77
         - Artist Album\n
78
         - Album Year\n
79
         - Album\n
80
        :param str video_title:
81
        :return: the exracted values as a dictionary having maximally keys: {'artist', 'album', 'year'}
82
        :rtype: dict
83
        """
84
        return cls.album_info_parser(video_title, design=[['artist', 's1', 'album', 's2', 'year'],
85
                                                          ['artist', 's1', 'album'],
86
                                                          ['album', 's2', 'year'],
87
                                                          ['album']])
88
89
    @classmethod
90
    def parse_track_number_n_name(cls, file_name):
91
        """Call this method to get a dict like {'track_number': 'number', 'track_name': 'name'} from input file name with format like '1. - Loyal to the Pack.mp3'; number must be included!"""
92
        return dict(zip(['track_number', 'track_name'], list(re.compile(r'({}){}({}){}$'.format(cls.track_number, cls.sep2, cls.track_name, cls.extension)).search(file_name).groups())))
93
94
    @classmethod
95
    def duration_data_to_timestamp_data(cls, duration_data):
96
        """Call this method to transform a list of 2-legnth lists of track_name - duration_hhmmss pairs to the equivalent list of lists but with starting timestamps in hhmmss format inplace of the durations.\n
97
        :param list duration_data: eg: [['Know your enemy', '3:45'], ['Wake up', '4:53'], ['Testify', '4:32']]
98
        :return: eg: [['Know your enemy', '0:00'], ['Wake up', '3:45'], ['Testify', '8:38']]
99
        :rtype: list
100
        """
101
        return [list(_) for _ in cls._gen_timestamp_data(duration_data)]
102
103
    @staticmethod
104
    def _gen_timestamp_data(duration_data):
105
        """
106
        :param list of lists duration_data: each inner list has as 1st element a track name and as 2nd the track duration in hh:mm:s format
107
        :return: list of lists with timestamps instead of durations ready to feed for segmentation
108
        :rtype: list
109
        """
110
        i = 1
111
        p = Timestamp('0:00')
112
        yield duration_data[0][0], str(p)
113
        while i < len(duration_data):
114
            try:
115
                yield duration_data[i][0], str(p + Timestamp(duration_data[i-1][1]))
116
            except WrongTimestampFormat as e:
117
                print(e)
118
                print('DATA\n[{}]'.format([str(_) for _ in duration_data]))
119
                import sys
120
                sys.exit(1)
121
            p += Timestamp(duration_data[i-1][1])
122
            i += 1
123
124
    # STRING TO LIST
125
    @classmethod
126
    def parse_hhmmss_string(cls, tracks):
127
        """Call this method to transform a '\n' separabale string of album tracks (eg copy-pasted from video description) to a list of lists. Inner lists contains [track_name, hhmmss_timestamp].\n
128
        :param str tracks:
129
        :return:
130
        """
131
        return [_ for _ in cls._parse_string(tracks)]
132
133
    @classmethod
134
    def _parse_string(cls, tracks):
135
        """
136
        :param str tracks: a '\n' separable string of lines coresponding to the tracks information
137
        :return:
138
        """
139
        # regex = re.compile('(?:\d{1,2}[ \t]*[\.\-,][ \t]*|[\t ]+)?([\w\'\(\) ]*[\w)])' + cls.sep + '((?:\d?\d:)*\d?\d)$')
140
        for i, line in enumerate(_.strip() for _ in tracks.split('\n')):
141
            if line == '':
142
                continue
143
            try:
144
                yield cls._parse_track_line(line)
145
            except AttributeError:
146
                raise WrongTimestampFormat("Couldn't parse line {}: '{}'. Please use a format as 'trackname - 3:45'".format(i + 1, line))
147
148
    @classmethod
149
    def _parse_track_line(cls, track_line):
150
        """Parses a string line such as '01. Doteru 3:45'"""
151
        # regex = re.compile(r"""^(?:\d{1,2}(?:[\ \t]*[\.\-,][\ \t]*|[\t\ ]+))?  # potential track number (eg 01) included is ignored
152
        #                             ([\w\'\(\) \-’]*[\w)])                       # track name
153
        #                             (?:[\t ]+|[\t ]*[\-\.]+[\t ]*)            # separator between name and time
154
        #                             ((?:\d?\d:)*\d?\d)$                       # time in hh:mm:ss format""", re.X)
155
        regex = re.compile(r"^(?:{}{})?({}){}({})$".format(cls.track_number, cls.sep, cls.track_name, cls.sep, cls.hhmmss))
156
        return list(regex.search(track_line.strip()).groups())
157
158
    # @classmethod
159
    # def hhmmss_durations_to_timestamps(cls, hhmmss_list):
160
    #     return [_ for _ in cls._generate_timestamps(hhmmss_list)]
161
    #
162
    # @classmethod
163
    # def _generate_timestamps(cls, hhmmss_list):
164
    #     p = '0:00'
165
    #     yield p
166
    #     for el in hhmmss_list[:-1]:
167
    #         _ = cls.add(p, el)
168
    #         yield _
169
    #         p = _
170
171
    @classmethod
172
    def convert_to_timestamps(cls, tracks_row_strings):
173
        """Call this method to transform a '\n' separabale string of album tracks (eg copy-pasted from the youtube video description) that represents durations (in hhmmss format)
174
        to a list of strings with each track's starting timestamp in hhmmss format.\n
175
        :param str tracks_row_strings:
176
        :return: the list of each track's timestamp
177
        :rtype: list
178
        """
179
        lines = cls.parse_hhmmss_string(tracks_row_strings)  # list of lists
180
        i = 1
181
        timestamps = ['0:00']
182
        while i < len(lines):
183
            timestamps.append(cls.add(timestamps[i-1], lines[i-1][-1]))
184
            i += 1
185
        return timestamps
186
187
    @classmethod
188
    def add(cls, timestamp1: str, duration: str) -> object:
189
        """
190
        :param str timestamp1: hh:mm:ss
191
        :param str duration: hh:mm:ss
192
        :return: hh:mm:ss
193
        :rtype: str
194
        """
195
        return cls.time_format(cls.to_seconds(timestamp1) + cls.to_seconds(duration))
196
197
    @staticmethod
198
    def to_seconds(timestamp):
199
        """Call this method to transform a hh:mm:ss formatted string timestamp to its equivalent duration in seconds as an integer"""
200
        return sum([60**i * int(x) for i, x in enumerate(reversed(timestamp.split(':')))])
201
202
    @staticmethod
203
    def time_format(seconds):
204
        """Call this method to transform an integer representing time duration in seconds to its equivalent hh:mm:ss formatted string representeation"""
205
        return time.strftime('%H:%M:%S', time.gmtime(seconds))
206
207
208
    @classmethod
209
    def convert_tracks_data(cls, data, album_file, target_directory=''):
210
        """
211
        Converts input Nx2 list of lists to Nx3 list of lists. The exception being the last list that has 2 elements\n
212
        The input list's inner lists' elements are 'track_name' and 'starting_timestamp' in hhmmss format.\m
213
        :param list of lists data: each inner list should contain track title (no need for number and without extension)
214
        and starting time stamp in hh:mm:ss format
215
        :param str album_file: the path to the audio file of the entire album to potentially segment
216
        :param str target_directory: path to desired directory path to store the potentially created album
217
        :return: each iner list contains track title and timestamp in seconds
218
        :rtype: list of lists
219
        """
220
        return [list(_) for _ in cls._generate_data(data, album_file, target_directory)]
221
222
    @classmethod
223
    def _generate_data(cls, data, album_file, target_directory):
224
        """
225
        Given a data list, with each element representing an album's track (as an inner 2-element list with 1st element the 'track_name' and 2nd a timetamp in hh:mm:ss format (the track starts it's playback at that timestamp in relation with the total album playtime), the path to the alum file at hand and the desired output directory or potentially storing the track files,
226
        generates 3-length tuples with the track_file_path, starting timestamp and ending timestamp. Purpose is for the yielded tripplets to be digested for audio segmentation. The exception being the last tuple yielded that has 2 elements; it naturally misses the ending timestamp.\n
227
        :param list data:
228
        :param str album_file:
229
        :param str target_directory:
230
        :returns: 3-element tuples with track_name, starting_timestamp, ending_timestamp
231
        :rtype: tuple
232
        """
233
        cls.__album_file = album_file
234
        cls.__target_directory = target_directory
235
        cls.__track_index_generator = iter((lambda x: str(x) if 9 < x else '0' + str(x))(_) for _ in range(1, len(data) + 1))
236
        for i in range(len(data)-1):
237
            if Timestamp(data[i + 1][1]) <= Timestamp(data[i][1]):
238
                raise TrackTimestampsSequenceError(
239
                    "Track '{} - {}' starting timestamp '{}' should be 'bigger' than track's '{} - {}'; '{}'".format(
240
                        i + 2, data[i + 1][0], data[i + 1][1],
241
                        i + 1, data[i][0], data[i][1]))
242
            yield (
243
                cls.__track_file(data[i][0]),
244
                str(int(Timestamp(data[i][1]))),
245
                str(int(Timestamp(data[i + 1][1])))
246
            )
247
        yield (
248
            cls.__track_file(data[-1][0]),
249
            str(int(Timestamp(data[-1][1]))),
250
        )
251
252
    @classmethod
253
    def __track_file(cls, track_name):
254
        return os.path.join(cls.__target_directory, '{} - {}{}'.format(
255
            next(cls.__track_index_generator),
256
            track_name,
257
            (lambda x: '.' + x.split('.')[-1] if len(x.split('.')) > 1 else '')(cls.__album_file)))
258
259
260
class Timestamp():
261
    instances = {}
262
263
    @classmethod
264
    def __str(cls, element):
265
        if len(element) == 1:
266
            return '0{}'.format(int(element))
267
        return element
268
269
    @classmethod
270
    def __pos(cls, array):
271
        for index, element in enumerate(array):
272
            if int(element) != 0:
273
                return index
274
        return len(array) - 1
275
276
    def __new__(cls, *args, **kwargs):
277
        hhmmss = args[0]
278
        m = re.fullmatch(r'(?:(\d?\d):){0,2}(\d?\d)', hhmmss)
279
        if not m:
280
            raise WrongTimestampFormat("Timestamp given: '{}'. Please use the 'hh:mm:ss' format.".format(hhmmss))
281
        groups = list(m.groups())
282
283
        # match = re.fullmatch(r'(?:(\d?\d):){0,2}(\d?\d)', hhmmss)
284
        # if not match:
285
        #     raise WrongTimestampFormat("Timestamp given: '{}'. Please use the 'hh:mm:ss' format.".format(hhmmss))
286
        # values = [int(_) for _ in hhmmss.split(':')]
287
        if not all([0 <= int(_) <= 60 for _ in groups]):
288
            raise WrongTimestampFormat("Timestamp given: '{}'. Please use the 'hh:mm:ss' format.".format(hhmmss))
289
290
        ind = cls.__pos(groups)
291
        if len(groups) - ind - 1 < 2:
292
            minlength_string = '{}:{}'.format(int(groups[-2]), cls.__str(groups[-1]))
293
        else:
294
            minlength_string = ':'.join([str(int(groups[ind]))] + [y for y in groups[ind + 1:]])
295
        stripped_string = ':'.join((str(int(_)) for _ in minlength_string.split(':')))
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable _ does not seem to be defined.
Loading history...
296
297
        if not re.fullmatch(r'(?:(\d?\d):){0,2}(\d?\d)', minlength_string):
298
            raise WrongTimestampFormat(
299
                "Timestamp given: '{}'. Please use the 'hh:mm:ss' format.".format(minlength_string))
300
301
        if not re.fullmatch(r'(?:(\d?\d):){0,2}(\d?\d)', stripped_string):
302
            raise WrongTimestampFormat(
303
                "Timestamp given: '{}'. Please use the 'hh:mm:ss' format.".format(stripped_string))
304
305
        if stripped_string in cls.instances:
306
            return cls.instances[stripped_string]
307
        x = super().__new__(cls)
308
        x.__minlength_string = minlength_string
309
        x.__stripped_string = stripped_string
310
        x._s = sum([60 ** i * int(x) for i, x in enumerate(reversed(groups))])
311
        cls.instances[x.__stripped_string] = x
312
        return x
313
314
        # if len(x.__stripped_string.split(':')) < 2:
315
        #     x.__minlength_string = '0:{}'.format(hhmmss.split(':')[-1])
316
        # elif re.match(r'0\d', hhmmss):
317
        #     x.__minlength_string = hhmmss[1:]
318
        # else:
319
        #     x.__minlength_string = hhmmss
320
        #
321
322
        #
323
        # try:
324
        #     values = [int(_) for _ in hhmmss.split(':')]
325
        # except ValueError:
326
        #     print("HMMSS is '{}'. Could not apply int to one of its elements separated by ':'")
327
        #     import sys
328
        #     sys.exit(1)
329
330
331
    def __init__(self, hhmmss):
332
        pass
333
334
    @staticmethod
335
    def from_duration(seconds):
336
        return Timestamp(time.strftime('%H:%M:%S', time.gmtime(seconds)))
337
338
    def __int__(self):
339
        return self._s
340
341
    def __str__(self):
342
        return self.__minlength_string
343
344
    def __hash__(self):
345
        return self.__stripped_string
346
347
    def __eq__(self, other):
348
        return hash(self) == hash(other)
349
350
    def __lt__(self, other):
351
        return int(self) < int(other)
352
353
    def __le__(self, other):
354
        return int(self) <= int(other)
355
356
    def __gt__(self, other):
357
        return int(other) < int(self)
358
359
    def __ge__(self, other):
360
        return int(other) <= int(self)
361
362
    def __add__(self, other):
363
        return Timestamp.from_duration(int(self) + int(other))
364
365
    def __sub__(self, other):
366
        return Timestamp.from_duration(int(self) - int(other))
367
368
369
class WrongTimestampFormat(Exception): pass
370
class TrackTimestampsSequenceError(Exception): pass
371