Test Failed
Push — splitters ( ca4311...6dbc6b )
by Konstantinos
02:12
created

StringParser.add()   A

Complexity

Conditions 1

Size

Total Lines 9
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 3
nop 3
dl 0
loc 9
rs 10
c 0
b 0
f 0
1
import os
2
import re
3
import time
4
5
6
class StringParser:
7
    __instance = None
8
    timestamp_objects = {}
9
    sep = r'(?:[\t ]+|[\t ]*[\-\.]+[\t ]*)'
10
11
    track_name = r'[\w\'\(\) \-’]*[\w)]'
12
    track_number = r'\d{1,2}'
13
    # sep_this = r'(?:[\t ]+|[\t ]*[\-\.]+[\t ]*)'
14
    sep_this = r'(?:[\t ]+|[\t ]*[\.\-,]+[\t ]*)'  # time in hh:mm:ss format
15
    extension = r'.mp3'
16
    album_info_fields = ('artist', 'album', 'year')
17
18
    def __new__(cls, *args, **kwargs):
19
        if not cls.__instance:
20
            cls.__instance = super().__new__(cls)
21
        return cls.__instance
22
23
    @classmethod
24
    def duration_data_to_timestamp_data(cls, duration_data):
25
        """Call this to transform data concerning tracks' starting timestamps to tracks' time duration. In both cases the format is hh:mm:ss"""
26
        return [list(_) for _ in cls._gen_timestamp_data(duration_data)]
27
28
    @staticmethod
29
    def _gen_timestamp_data(duration_data):
30
        """
31
        :param list of lists duration_data: each inner list has as 1st element a track name and as 2nd the track duration in hh:mm:s format
32
        :return: list of lists with timestamps instead of durations ready to feed for segmentation
33
        :rtype: list
34
        """
35
        i = 1
36
        p = Timestamp('0:00')
37
        yield duration_data[0][0], str(p)
38
        while i < len(duration_data):
39
            yield duration_data[i][0], str(p + Timestamp(duration_data[i-1][1]))
40
            p += Timestamp(duration_data[i-1][1])
41
            i += 1
42
43
    @classmethod
44
    def parse_hhmmss_string(cls, tracks):
45
        """Call this method to transform a '\n' separabale string of album tracks (eg copy-pasted from video description) to a list of lists. Inner lists contains [track_name, hhmmss_timestamp].\n
46
        :param str tracks:
47
        :return:
48
        """
49
        return [_ for _ in cls._parse_string(tracks)]
50
51
    @classmethod
52
    def _parse_string(cls, tracks):
53
        """
54
        :param str tracks: a '\n' separable string of lines coresponding to the tracks information
55
        :return:
56
        """
57
        # regex = re.compile('(?:\d{1,2}[ \t]*[\.\-,][ \t]*|[\t ]+)?([\w\'\(\) ]*[\w)])' + cls.sep + '((?:\d?\d:)*\d?\d)$')
58
        for i, line in enumerate(_.strip() for _ in tracks.split('\n')):
59
            if line == '':
60
                continue
61
            try:
62
                yield cls._parse_track_line(line)
63
            except AttributeError:
64
                raise WrongTimestampFormat("Couldn't parse line {}: '{}'. Please use a format as 'trackname - 3:45'".format(i + 1, line))
65
66
    @classmethod
67
    def _parse_track_line(cls, track_line):
68
        """Parses a string line such as '01. Doteru 3:45'"""
69
        # regex = re.compile(r"""^(?:\d{1,2}(?:[\ \t]*[\.\-,][\ \t]*|[\t\ ]+))?  # potential track number (eg 01) included is ignored
70
        #                             ([\w\'\(\) \-’]*[\w)])                       # track name
71
        #                             (?:[\t ]+|[\t ]*[\-\.]+[\t ]*)            # separator between name and time
72
        #                             ((?:\d?\d:)*\d?\d)$                       # time in hh:mm:ss format""", re.X)
73
        regex = re.compile(r"^(?:{}{})?({}){}({})$".format(cls.track_number, cls.sep_this, cls.track_name, cls.sep_this, cls.hhmmss))
74
75
        # regex = re.compile('^(?:\d{1,2}([\ \t]*[\.\-,][ \t]*|[\t ]+))?([\w\'\(\) ]*[\w)])' + cls.sep + '((?:\d?\d:)*\d?\d)$')
76
        # regex = re.compile(
77
        #     '^(?:\d{1,2}[ \t]*[\.\-,][ \t]*|[\t ]+)?([\w\'\(\) ]*[\w)])' + cls.sep + '((?:\d?\d:)*\d?\d)$')
78
        return list(regex.search(track_line.strip()).groups())
79
80
    @classmethod
81
    def get_instance(cls):
82
        return StringParser()
83
84
    @classmethod
85
    def parse_tracks_hhmmss(cls, tracks_row_strings):
86
        """
87
        Call this method to transform a
88
        Returns parsed tracks: track_title and timestamp in hh:mm:ss format given the multiline string. Ignores potentially
89
        found track numbers in the start of each line  Returs a list of lists. Each inner list holds the captured groups in the parenthesis'\n
90
        :param str tracks_row_strings:
91
        :return: a list of lists with each inner list corresponding to each input string row and having 2 elements: the track name and the timestamp
92
        :rtype: list
93
        """
94
        return cls.parse_hhmmss_string(tracks_row_strings)
95
        # regex   = re.compile(r'(?:\d{1,2}[ \t]*[\.\-,][ \t]*|[\t ]+)?([\w ]*\w)' + cls.sep + r'((?:\d?\d:)*\d?\d)')
96
        # regex = re.compile('(?:\d{1,2}(?:[ \t]*[\.\-,][ \t]*|[\t ])+)?([\w ]*\w)' + cls.sep + '((?:\d?\d:)*\d\d)')
97
98
        # return [list(_) for _ in regex.findall(tracks_row_strings)]
99
100
    @classmethod
101
    def hhmmss_durations_to_timestamps(cls, hhmmss_list):
102
        return [_ for _ in cls._generate_timestamps(hhmmss_list)]
103
104
    @classmethod
105
    def _generate_timestamps(cls, hhmmss_list):
106
        p = '0:00'
107
        yield p
108
        for el in hhmmss_list[:-1]:
109
            _ = cls.add(p, el)
110
            yield _
111
            p = _
112
113
    @classmethod
114
    def convert_to_timestamps(cls, tracks_row_strings):
115
        """Call this method to transform a '\n' separabale string of album tracks (eg copy-pasted from the youtube video description) that represents durations (in hhmmss format)
116
        to a list of strings with each track's starting timestamp in hhmmss format.\n
117
        :param str tracks_row_strings:
118
        :return: the list of each track's timestamp
119
        :rtype: list
120
        """
121
        lines = cls.parse_tracks_hhmmss(tracks_row_strings)  # list of lists
122
        i = 1
123
        timestamps = ['0:00']
124
        while i < len(lines):
125
            timestamps.append(cls.add(timestamps[i-1], lines[i-1][-1]))
126
            i += 1
127
        return timestamps
128
129
    @classmethod
130
    def add(cls, timestamp1: str, duration: str) -> object:
131
        """
132
        :param str timestamp1: hh:mm:ss
133
        :param str duration: hh:mm:ss
134
        :return: hh:mm:ss
135
        :rtype: str
136
        """
137
        return cls.time_format(cls.to_seconds(timestamp1) + cls.to_seconds(duration))
138
139
    @staticmethod
140
    def to_seconds(timestamp):
141
        """Call this method to transform a hh:mm:ss formatted string timestamp to its equivalent duration in seconds as an integer"""
142
        return sum([60**i * int(x) for i, x in enumerate(reversed(timestamp.split(':')))])
143
144
    @staticmethod
145
    def time_format(seconds):
146
        """Call this method to transform an integer representing time duration in seconds to its equivalent hh:mm:ss formatted string representeation"""
147
        return time.strftime('%H:%M:%S', time.gmtime(seconds))
148
149
    @staticmethod
150
    def parse_album_info(video_title):
151
        """Parses a video title string into 'artist', 'album' and 'year' fields.\n
152
        Can parse patters:
153
         - Artist Album Year\n
154
         - Album Year\n
155
         - Artist Album\n
156
         - Album\n
157
        :param video_title:
158
        :return: the exracted values as a dictionary having maximally keys: {'artist', 'album', 'year'}
159
        :rtype: dict
160
        """
161
        sep1 = r'[\t ]*[\-\.][\t ]*'
162
        sep2 = r'[\t \-\.]+'
163
        year = r'\(?(\d{4})\)?'
164
        art = r'([\w ]*\w)'
165
        alb = r'([\w ]*\w)'
166
167
        def _reg(x):
168
            return re.compile(str('{}' * len(x)).format(*x))
169
170
        reg1 = _reg([art, sep1, alb, sep2, year])
171
        m1 = reg1.search(video_title)
172
        if m1:
173
            return {'artist': m1.group(1), 'album': m1.group(2), 'year': m1.group(3)}
174
175
        m1 = _reg([alb, sep2, year]).search(video_title)
176
        if m1:
177
            return {'album': m1.group(1), 'year': m1.group(2)}
178
179
        reg2 = _reg([art, sep1, alb])
180
        m2 = reg2.search(video_title)
181
        if m2:
182
            return {'artist': m2.group(1), 'album': m2.group(2)}
183
184
        reg3 = _reg([alb])
185
        m3 = reg3.search(video_title)
186
        if m3:
187
            return {'album': m3.group(1)}
188
        return {}
189
190
    @classmethod
191
    def _get_album_info(cls, regex_list, video_title):
192
        c = [getattr(reg.search(video_title), 'groups', lambda: []) for reg in regex_list]
193
        return max([x() for x in c], key=lambda x: len(x))
194
195
    @classmethod
196
    def convert_tracks_data(cls, data, album_file, target_directory=''):
197
        """
198
        Converts input Nx2 list of lists to Nx3 list of lists. The exception being the last list that has 2 elements\n
199
        :param list of lists data: each inner list should contain track title (no need for number and without extension)
200
        and starting time stamp in hh:mm:ss format
201
        :param str album_file: the path to the audio file of the entire album to potentially segment
202
        :param str target_directory: path to desired directory path to store the potentially created album
203
        :return: each iner list contains track title and timestamp in seconds
204
        :rtype: list of lists
205
        """
206
        return [list(_) for _ in cls._generate_data(data, album_file, target_directory)]
207
208
    @classmethod
209
    def _generate_data(cls, data, album_file, target_directory):
210
        """
211
        Given a data list, with each element representing an album's track (as an inner 2-element list with 1st element the 'track_name' and 2nd a timetamp in hh:mm:ss format (the track starts it's playback at that timestamp in relation with the total album playtime), the path to the alum file at hand and the desired output directory or potentially storing the track files,
212
        generates 3-length tuples with the track_file_path, starting timestamp and ending timestamp. Purpose is for the yielded tripplets to be digested for audio segmentation. The exception being the last tuple yielded that has 2 elements; it naturally misses the ending timestamp.\n
213
        :param list data:
214
        :param str album_file:
215
        :param str target_directory:
216
        :returns: 3-element tuples with track_name, starting_timestamp, ending_timestamp
217
        :rtype: tuple
218
        """
219
        cls.__album_file = album_file
220
        cls.__target_directory = target_directory
221
        cls.__track_index_generator = iter((lambda x: str(x) if 9 < x else '0' + str(x))(_) for _ in range(1, len(data) + 1))
222
        for i in range(len(data)-1):
223
            if Timestamp(data[i + 1][1]) <= Timestamp(data[i][1]):
224
                raise TrackTimestampsSequenceError(
225
                    "Track '{} - {}' starting timestamp '{}' should be 'bigger' than track's '{} - {}'; '{}'".format(
226
                        i + 2, data[i + 1][0], data[i + 1][1],
227
                        i + 1, data[i][0], data[i][1]))
228
            yield (
229
                cls.__track_file(data[i][0]),
230
                str(int(Timestamp(data[i][1]))),
231
                str(int(Timestamp(data[i + 1][1])))
232
            )
233
        yield (
234
            cls.__track_file(data[-1][0]),
235
            str(int(Timestamp(data[-1][1]))),
236
        )
237
238
    @classmethod
239
    def __track_file(cls, track_name):
240
        return os.path.join(cls.__target_directory, '{} - {}{}'.format(
241
            next(cls.__track_index_generator),
242
            track_name,
243
            (lambda x: '.' + x.split('.')[-1] if len(x.split('.')) > 1 else '')(cls.__album_file)))
244
245
246
class Timestamp:
247
    instances = {}
248
249
    def __new__(cls, *args, **kwargs):
250
        hhmmss = args[0]
251
        if hhmmss in cls.instances:
252
            return cls.instances[hhmmss]
253
        match = re.fullmatch(r'((\d?\d):){0,2}(\d?\d)', hhmmss)
254
        if not match:
255
            raise WrongTimestampFormat("Timestamp given: '{}'. Please use the 'hh:mm:ss' format.".format(hhmmss))
256
        values = [int(_) for _ in hhmmss.split(':')]
257
        if not all([0 <= _ <= 60 for _ in values]):
258
            raise WrongTimestampFormat("Timestamp given: '{}'. Please use the 'hh:mm:ss' format.".format(hhmmss))
259
        x = super().__new__(cls)
260
        x._s = sum([60 ** i * int(x) for i, x in enumerate(reversed(values))])
261
        x._b = hhmmss
262
        cls.instances[hhmmss] = x
263
        return x
264
265
    def __init__(self, hhmmss):
266
        pass
267
268
    @staticmethod
269
    def from_duration(seconds):
270
        return Timestamp(time.strftime('%H:%M:%S', time.gmtime(seconds)))
271
272
    def __repr__(self):
273
        return self._b
274
275
    def __str__(self):
276
        return self._b
277
278
    def __eq__(self, other):
279
        return str(self) == str(other)
280
281
    def __int__(self):
282
        return self._s
283
284
    def __lt__(self, other):
285
        return int(self) < int(other)
286
287
    def __le__(self, other):
288
        return int(self) <= int(other)
289
290
    def __gt__(self, other):
291
        return int(other) < int(self)
292
293
    def __ge__(self, other):
294
        return int(other) <= int(self)
295
296
    def __add__(self, other):
297
        return Timestamp.from_duration(int(self) + int(other))
298
299
    def __sub__(self, other):
300
        return Timestamp.from_duration(int(self) - int(other))
301
302
303
class WrongTimestampFormat(Exception): pass
304
class TrackTimestampsSequenceError(Exception): pass
305