Test Failed
Push — test-coverage ( b95e5a )
by Konstantinos
02:34
created

StringParser.add()   A

Complexity

Conditions 1

Size

Total Lines 9
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 3
nop 3
dl 0
loc 9
rs 10
c 0
b 0
f 0
1
import os
2
import re
3
import time
4
5
6
class StringToDictParser:
7
    """Parses album information out of video title string"""
8
    check = re.compile(r'^s([1-9]\d*)$')
9
10
    def __init__(self, entities, separators):
11
        assert all(type(x) == str for x in separators)
12
        self.entities = {k: AlbumInfoEntity(k, v) for k, v in entities.items()}
13
        self.separators = separators
14
15
    def __call__(self, *args, **kwargs):
16
        title = args[0]
17
        design = kwargs['design']
18
        assert all(0 <= len(x) <= len(self.entities) + len(self.separators) and all(type(y) == str for y in x) for x in design)
19
        assert all(all(StringToDictParser.check.match(y) for y in x if y.startswith('s')) for x in design)
20
        rregs = [RegexSequence([_ for _ in self._yield_reg_comp(d)]) for d in design]
21
        return max([r.search_n_dict(title) for r in rregs], key=lambda x: len(x))
22
23
    def _yield_reg_comp(self, kati):
24
        for k in kati:
25
            if k.startswith('s'):
26
                yield self.separators[int(StringToDictParser.check.match(k).group(1)) - 1]
27
            else:
28
                yield self.entities[k]
29
30
class AlbumInfoEntity:
31
    def __init__(self, name, reg):
32
        self.name = name
33
        self.reg = reg
34
35
    def __str__(self):
36
        return self.reg
37
38
39
class RegexSequence:
40
    def __init__(self, data):
41
        self._keys = [d.name for d in data if hasattr(d, 'name')]
42
        self._regex = r'{}'.format(''.join(str(d) for d in data))
43
44
    def search_n_dict(self, string):
45
        return dict(_ for _ in zip(self._keys, list(getattr(re.search(self._regex, string), 'groups', lambda: ['', '', ''])())) if _[1])
46
47
    def __str__(self):
48
        return self._regex
49
50
51
class StringParser:
52
    __instance = None
53
54
    track_number = r'\d{1,2}'
55
    track_name = r'[\w\'\(\) \-’]*[\w)]'
56
    sep = r'(?:[\t ]+|[\t ]*[\.\-,]+[\t ]*)'
57
    extension = r'\.mp3'
58
    hhmmss = r'(?:\d?\d:)*\d?\d'
59
60
    ## to parse from youtube video title string
61
    sep1 = r'[\t ]*[\-\.][\t ]*'
62
    sep2 = r'[\t \-\.]+'
63
    year = r'\(?(\d{4})\)?'
64
    art = r'([\w ]*\w)'
65
    alb = r'([\w ]*\w)'
66
67
    album_info_parser = StringToDictParser({'artist': art, 'album': alb, 'year': year}, [sep1, sep2])
68
69
    def __new__(cls, *args, **kwargs):
70
        if not cls.__instance:
71
            cls.__instance = super().__new__(cls)
72
        return cls.__instance
73
74
    @classmethod
75
    def parse_track_number_n_name(cls, file_name):
76
        """Call this method to get a dict like {'track_number': 'number', 'track_name': 'name'} from input file name with format like '1. - Loyal to the Pack.mp3'; number must be included!"""
77
        return dict(zip(['track_number', 'track_name'], list(re.compile(r'({}){}({}){}$'.format(cls.track_number, cls.sep2, cls.track_name, cls.extension)).search(file_name).groups())))
78
79
    @classmethod
80
    def duration_data_to_timestamp_data(cls, duration_data):
81
        """Call this to transform data concerning tracks' starting timestamps to tracks' time duration. In both cases the format is hh:mm:ss"""
82
        return [list(_) for _ in cls._gen_timestamp_data(duration_data)]
83
84
    @staticmethod
85
    def _gen_timestamp_data(duration_data):
86
        """
87
        :param list of lists duration_data: each inner list has as 1st element a track name and as 2nd the track duration in hh:mm:s format
88
        :return: list of lists with timestamps instead of durations ready to feed for segmentation
89
        :rtype: list
90
        """
91
        i = 1
92
        p = Timestamp('0:00')
93
        yield duration_data[0][0], str(p)
94
        while i < len(duration_data):
95
            yield duration_data[i][0], str(p + Timestamp(duration_data[i-1][1]))
96
            p += Timestamp(duration_data[i-1][1])
97
            i += 1
98
99
    @classmethod
100
    def parse_hhmmss_string(cls, tracks):
101
        """Call this method to transform a '\n' separabale string of album tracks (eg copy-pasted from video description) to a list of lists. Inner lists contains [track_name, hhmmss_timestamp].\n
102
        :param str tracks:
103
        :return:
104
        """
105
        return [_ for _ in cls._parse_string(tracks)]
106
107
    @classmethod
108
    def _parse_string(cls, tracks):
109
        """
110
        :param str tracks: a '\n' separable string of lines coresponding to the tracks information
111
        :return:
112
        """
113
        # regex = re.compile('(?:\d{1,2}[ \t]*[\.\-,][ \t]*|[\t ]+)?([\w\'\(\) ]*[\w)])' + cls.sep + '((?:\d?\d:)*\d?\d)$')
114
        for i, line in enumerate(_.strip() for _ in tracks.split('\n')):
115
            if line == '':
116
                continue
117
            try:
118
                yield cls._parse_track_line(line)
119
            except AttributeError:
120
                raise WrongTimestampFormat("Couldn't parse line {}: '{}'. Please use a format as 'trackname - 3:45'".format(i + 1, line))
121
122
    @classmethod
123
    def _parse_track_line(cls, track_line):
124
        """Parses a string line such as '01. Doteru 3:45'"""
125
        # regex = re.compile(r"""^(?:\d{1,2}(?:[\ \t]*[\.\-,][\ \t]*|[\t\ ]+))?  # potential track number (eg 01) included is ignored
126
        #                             ([\w\'\(\) \-’]*[\w)])                       # track name
127
        #                             (?:[\t ]+|[\t ]*[\-\.]+[\t ]*)            # separator between name and time
128
        #                             ((?:\d?\d:)*\d?\d)$                       # time in hh:mm:ss format""", re.X)
129
        regex = re.compile(r"^(?:{}{})?({}){}({})$".format(cls.track_number, cls.sep, cls.track_name, cls.sep, cls.hhmmss))
130
        return list(regex.search(track_line.strip()).groups())
131
132
    @classmethod
133
    def get_instance(cls):
134
        return StringParser()
135
136
    @classmethod
137
    def parse_tracks_hhmmss(cls, tracks_row_strings):
138
        """
139
        Call this method to transform a
140
        Returns parsed tracks: track_title and timestamp in hh:mm:ss format given the multiline string. Ignores potentially
141
        found track numbers in the start of each line  Returs a list of lists. Each inner list holds the captured groups in the parenthesis'\n
142
        :param str tracks_row_strings:
143
        :return: a list of lists with each inner list corresponding to each input string row and having 2 elements: the track name and the timestamp
144
        :rtype: list
145
        """
146
        return cls.parse_hhmmss_string(tracks_row_strings)
147
148
    @classmethod
149
    def hhmmss_durations_to_timestamps(cls, hhmmss_list):
150
        return [_ for _ in cls._generate_timestamps(hhmmss_list)]
151
152
    @classmethod
153
    def _generate_timestamps(cls, hhmmss_list):
154
        p = '0:00'
155
        yield p
156
        for el in hhmmss_list[:-1]:
157
            _ = cls.add(p, el)
158
            yield _
159
            p = _
160
161
    @classmethod
162
    def convert_to_timestamps(cls, tracks_row_strings):
163
        """Call this method to transform a '\n' separabale string of album tracks (eg copy-pasted from the youtube video description) that represents durations (in hhmmss format)
164
        to a list of strings with each track's starting timestamp in hhmmss format.\n
165
        :param str tracks_row_strings:
166
        :return: the list of each track's timestamp
167
        :rtype: list
168
        """
169
        lines = cls.parse_tracks_hhmmss(tracks_row_strings)  # list of lists
170
        i = 1
171
        timestamps = ['0:00']
172
        while i < len(lines):
173
            timestamps.append(cls.add(timestamps[i-1], lines[i-1][-1]))
174
            i += 1
175
        return timestamps
176
177
    @classmethod
178
    def add(cls, timestamp1: str, duration: str) -> object:
179
        """
180
        :param str timestamp1: hh:mm:ss
181
        :param str duration: hh:mm:ss
182
        :return: hh:mm:ss
183
        :rtype: str
184
        """
185
        return cls.time_format(cls.to_seconds(timestamp1) + cls.to_seconds(duration))
186
187
    @staticmethod
188
    def to_seconds(timestamp):
189
        """Call this method to transform a hh:mm:ss formatted string timestamp to its equivalent duration in seconds as an integer"""
190
        return sum([60**i * int(x) for i, x in enumerate(reversed(timestamp.split(':')))])
191
192
    @staticmethod
193
    def time_format(seconds):
194
        """Call this method to transform an integer representing time duration in seconds to its equivalent hh:mm:ss formatted string representeation"""
195
        return time.strftime('%H:%M:%S', time.gmtime(seconds))
196
197
    @classmethod
198
    def parse_album_info(cls, video_title):
199
        """Call to parse a video title string into a hash (dictionary) of potentially all 'artist', 'album' and 'year' fields.\n
200
        Can parse patters:
201
         - Artist Album Year\n
202
         - Artist Album\n
203
         - Album Year\n
204
         - Album\n
205
        :param str video_title:
206
        :return: the exracted values as a dictionary having maximally keys: {'artist', 'album', 'year'}
207
        :rtype: dict
208
        """
209
        return cls.album_info_parser(video_title, design=[['artist', 's1', 'album', 's2', 'year'],
210
                                                          ['artist', 's1', 'album'],
211
                                                          ['album', 's2', 'year'],
212
                                                          ['album']])
213
214
    @classmethod
215
    def convert_tracks_data(cls, data, album_file, target_directory=''):
216
        """
217
        Converts input Nx2 list of lists to Nx3 list of lists. The exception being the last list that has 2 elements\n
218
        :param list of lists data: each inner list should contain track title (no need for number and without extension)
219
        and starting time stamp in hh:mm:ss format
220
        :param str album_file: the path to the audio file of the entire album to potentially segment
221
        :param str target_directory: path to desired directory path to store the potentially created album
222
        :return: each iner list contains track title and timestamp in seconds
223
        :rtype: list of lists
224
        """
225
        return [list(_) for _ in cls._generate_data(data, album_file, target_directory)]
226
227
    @classmethod
228
    def _generate_data(cls, data, album_file, target_directory):
229
        """
230
        Given a data list, with each element representing an album's track (as an inner 2-element list with 1st element the 'track_name' and 2nd a timetamp in hh:mm:ss format (the track starts it's playback at that timestamp in relation with the total album playtime), the path to the alum file at hand and the desired output directory or potentially storing the track files,
231
        generates 3-length tuples with the track_file_path, starting timestamp and ending timestamp. Purpose is for the yielded tripplets to be digested for audio segmentation. The exception being the last tuple yielded that has 2 elements; it naturally misses the ending timestamp.\n
232
        :param list data:
233
        :param str album_file:
234
        :param str target_directory:
235
        :returns: 3-element tuples with track_name, starting_timestamp, ending_timestamp
236
        :rtype: tuple
237
        """
238
        cls.__album_file = album_file
239
        cls.__target_directory = target_directory
240
        cls.__track_index_generator = iter((lambda x: str(x) if 9 < x else '0' + str(x))(_) for _ in range(1, len(data) + 1))
241
        for i in range(len(data)-1):
242
            if Timestamp(data[i + 1][1]) <= Timestamp(data[i][1]):
243
                raise TrackTimestampsSequenceError(
244
                    "Track '{} - {}' starting timestamp '{}' should be 'bigger' than track's '{} - {}'; '{}'".format(
245
                        i + 2, data[i + 1][0], data[i + 1][1],
246
                        i + 1, data[i][0], data[i][1]))
247
            yield (
248
                cls.__track_file(data[i][0]),
249
                str(int(Timestamp(data[i][1]))),
250
                str(int(Timestamp(data[i + 1][1])))
251
            )
252
        yield (
253
            cls.__track_file(data[-1][0]),
254
            str(int(Timestamp(data[-1][1]))),
255
        )
256
257
    @classmethod
258
    def __track_file(cls, track_name):
259
        return os.path.join(cls.__target_directory, '{} - {}{}'.format(
260
            next(cls.__track_index_generator),
261
            track_name,
262
            (lambda x: '.' + x.split('.')[-1] if len(x.split('.')) > 1 else '')(cls.__album_file)))
263
264
265
class Timestamp:
266
    instances = {}
267
268
    def __new__(cls, *args, **kwargs):
269
        hhmmss = args[0]
270
        if hhmmss in cls.instances:
271
            return cls.instances[hhmmss]
272
        match = re.fullmatch(r'((\d?\d):){0,2}(\d?\d)', hhmmss)
273
        if not match:
274
            raise WrongTimestampFormat("Timestamp given: '{}'. Please use the 'hh:mm:ss' format.".format(hhmmss))
275
        values = [int(_) for _ in hhmmss.split(':')]
276
        if not all([0 <= _ <= 60 for _ in values]):
277
            raise WrongTimestampFormat("Timestamp given: '{}'. Please use the 'hh:mm:ss' format.".format(hhmmss))
278
        x = super().__new__(cls)
279
        x._s = sum([60 ** i * int(x) for i, x in enumerate(reversed(values))])
280
        x._b = hhmmss
281
        cls.instances[hhmmss] = x
282
        return x
283
284
    def __init__(self, hhmmss):
285
        pass
286
287
    @staticmethod
288
    def from_duration(seconds):
289
        return Timestamp(time.strftime('%H:%M:%S', time.gmtime(seconds)))
290
291
    def __repr__(self):
292
        return self._b
293
294
    def __str__(self):
295
        return self._b
296
297
    def __eq__(self, other):
298
        return str(self) == str(other)
299
300
    def __int__(self):
301
        return self._s
302
303
    def __lt__(self, other):
304
        return int(self) < int(other)
305
306
    def __le__(self, other):
307
        return int(self) <= int(other)
308
309
    def __gt__(self, other):
310
        return int(other) < int(self)
311
312
    def __ge__(self, other):
313
        return int(other) <= int(self)
314
315
    def __add__(self, other):
316
        return Timestamp.from_duration(int(self) + int(other))
317
318
    def __sub__(self, other):
319
        return Timestamp.from_duration(int(self) - int(other))
320
321
322
class WrongTimestampFormat(Exception): pass
323
class TrackTimestampsSequenceError(Exception): pass
324