Passed
Push — dev ( ed36f9...ede60a )
by Konstantinos
05:48 queued 58s
created

music_album_creation.tracks_parsing   A

Complexity

Total Complexity 30

Size/Duplication

Total Lines 182
Duplicated Lines 0 %

Test Coverage

Coverage 92.68%

Importance

Changes 0
Metric Value
eloc 104
dl 0
loc 182
ccs 76
cts 82
cp 0.9268
rs 10
c 0
b 0
f 0
wmc 30

17 Methods

Rating   Name   Duplication   Size   Complexity  
A RegexSequence.search_n_dict() 0 2 2
A StringToDictParser._yield_reg_comp() 0 6 3
A AlbumInfoEntity.__init__() 0 3 1
A AlbumInfoEntity.__str__() 0 2 1
A StringToDictParser.__init__() 0 5 2
A RegexSequence.__init__() 0 3 1
A StringToDictParser.__call__() 0 9 5
A StringParser.add() 0 9 1
A StringParser._parse_track_line() 0 15 1
A StringParser.convert_to_timestamps() 0 15 2
A StringParser.to_seconds() 0 4 1
A StringParser.hhmmss_format() 0 4 1
A StringParser.parse_track_number_n_name() 0 6 1
A StringParser._parse_string() 0 15 4
A StringParser.parse_album_info() 0 16 1
A StringParser.__new__() 0 5 2
A StringParser.parse_hhmmss_string() 0 7 1
1
# -*- coding: utf-8 -*-
2
3 1
import os
4 1
import re
5 1
import time
6
7
8 1
class StringToDictParser(object):
9
    """Parses album information out of video title string"""
10 1
    check = re.compile(r'^s([1-9]\d*)$')
11
12 1
    def __init__(self, entities, separators):
13 1
        if not all(type(x) == str for x in separators):
14
            raise RuntimeError
15 1
        self.entities = {k: AlbumInfoEntity(k, v) for k, v in entities.items()}
16 1
        self.separators = separators
17
18 1
    def __call__(self, *args, **kwargs):
19 1
        title = args[0]
20 1
        design = kwargs['design']
21 1
        if not all(0 <= len(x) <= len(self.entities) + len(self.separators) and all(type(y) == str for y in x) for x in design):
22
            raise RuntimeError
23 1
        if not all(all(StringToDictParser.check.match(y) for y in x if y.startswith('s')) for x in design):
24
            raise RuntimeError
25 1
        rregs = [RegexSequence([_ for _ in self._yield_reg_comp(d)]) for d in design]
26 1
        return max([r.search_n_dict(title) for r in rregs], key=lambda x: len(x))
27
28 1
    def _yield_reg_comp(self, kati):
29 1
        for k in kati:
30 1
            if k.startswith('s'):
31 1
                yield self.separators[int(StringToDictParser.check.match(k).group(1)) - 1]
32
            else:
33 1
                yield self.entities[k]
34
35 1
class AlbumInfoEntity(object):
36 1
    def __init__(self, name, reg):
37 1
        self.name = name
38 1
        self.reg = reg
39
40 1
    def __str__(self):
41 1
        return self.reg
42
43
44 1
class RegexSequence(object):
45 1
    def __init__(self, data):
46 1
        self._keys = [d.name for d in data if hasattr(d, 'name')]
47 1
        self._regex = r'{}'.format(''.join(str(d) for d in data))
48
49 1
    def search_n_dict(self, string):
50 1
        return dict(_ for _ in zip(self._keys, list(getattr(re.search(self._regex, string), 'groups', lambda: len(self._keys)*[''])())) if _[1])
51
52
53 1
class StringParser(object):
54 1
    __instance = None
55 1
    regexes = {'track_number': r'\d{1,2}',
56
               'sep1': r"(?: [\t\ ]* [\.\-\)]+ )? [\t ]*",
57
               'track_word': r"\(?[\wα-ωΑ-Ω'\x86-\xce\u0384-\u03CE][\w\-’':!\xc3\xa8α-ωΑ\-Ω\x86-\xce\u0384-\u03CE]*\)?",
58
               'track_sep': r'[\t\ ,]+',
59
               'sep2': r'(?: [\t\ ]* [\-.]+ [\t\ ]* | [\t\ ]+ )',
60
               'extension': r'\.mp3',
61
               'hhmmss': r'(?:\d?\d:)*\d?\d'}
62
63
    ## to parse from youtube video title string
64 1
    sep1 = r'[\t ]*[\-\.][\t ]*'
65 1
    sep2 = r'[\t \-\.]+'
66 1
    year = r'\(?(\d{4})\)?'
67 1
    art = r'([\w ]*\w)'
68 1
    alb = r'([\w ]*\w)'
69
70 1
    album_info_parser = StringToDictParser({'artist': art, 'album': alb, 'year': year}, [sep1, sep2])
71
72 1
    def __new__(cls, *args, **kwargs):
73 1
        if not cls.__instance:
74 1
            cls.__instance = super(cls, StringParser).__new__(cls)
75 1
            cls.regexes['track_name'] = r'{track_word}(?:{track_sep}{track_word})*'.format(**cls.regexes)
76 1
        return cls.__instance
77
78
    ## STRING TO DICT
79 1
    @classmethod
80
    def parse_album_info(cls, video_title):
81
        """Call to parse a video title string into a hash (dictionary) of potentially all 'artist', 'album' and 'year' fields.\n
82
        Can parse patters:
83
         - Artist Album Year\n
84
         - Artist Album\n
85
         - Album Year\n
86
         - Album\n
87
        :param str video_title:
88
        :return: the exracted values as a dictionary having maximally keys: {'artist', 'album', 'year'}
89
        :rtype: dict
90
        """
91 1
        return cls.album_info_parser(video_title, design=[['artist', 's1', 'album', 's2', 'year'],
92
                                                          ['artist', 's1', 'album'],
93
                                                          ['album', 's2', 'year'],
94
                                                          ['album']])
95
    # PARSE filenames
96 1
    @classmethod
97
    def parse_track_number_n_name(cls, file_name):
98
        """Call this method to get a dict like {'track_number': 'number', 'track_name': 'name'} from input file name with format like '1. - Loyal to the Pack.mp3'; number must be included!"""
99 1
        return dict(zip(['track_number', 'track_name'], list(
100
            re.compile(r"(?: ({track_number}) {sep1})? ( {track_name} ) {extension}$".format(**cls.regexes), re.X).search(
101
                os.path.basename(file_name)).groups())))
102
        # return dict(zip(['track_number', 'track_name'], list(re.compile(r'({}){}({}){}$'.format(cls.track_number, cls.sep2, cls.track_name, cls.extension)).search(file_name).groups())))
103
104
    # PARSE tracks info multiline
105 1
    @classmethod
106
    def parse_hhmmss_string(cls, tracks):
107
        """Call this method to transform a '\n' separabale string of album tracks (eg copy-pasted from video description) to a list of lists. Inner lists contains [track_name, hhmmss_formated time].\n
108
        :param str tracks:
109
        :return:
110
        """
111 1
        return [_ for _ in cls._parse_string(tracks)]
112
113 1
    @classmethod
114
    def _parse_string(cls, tracks):
115
        """
116
        :param str tracks: a '\n' separable string of lines coresponding to the tracks information
117
        :return:
118
        """
119
        # regex = re.compile('(?:\d{1,2}[ \t]*[\.\-,][ \t]*|[\t ]+)?([\w\'\(\) ]*[\w)])' + cls.sep + '((?:\d?\d:)*\d?\d)$')
120 1
        for i, line in enumerate(_.strip() for _ in tracks.split('\n')):
121 1
            if line == '':
122 1
                continue
123 1
            try:
124 1
                yield cls._parse_track_line(line)
125
            except AttributeError as e:
126
                print("Couldn't parse line {}: '{}'. Please use a format as 'trackname - 3:45'".format(i + 1, line))
127
                raise e
128
129 1
    @classmethod
130
    def _parse_track_line(cls, track_line):
131
        """
132
        Parses a string line such as '01. Doteru 3:45' into ['Doteru', '3:45']\n
133
        :param track_line:
134
        :return: the parsed items
135
        :rtype: list
136
        """
137
        # regex = re.compile(r"""^(?:\d{1,2}(?:[\ \t]*[\.\-,][\ \t]*|[\t\ ]+))?  # potential track number (eg 01) included is ignored
138
        #                             ([\w\'\(\) \-’]*[\w)])                       # track name
139
        #                             (?:[\t ]+|[\t ]*[\-\.]+[\t ]*)            # separator between name and time
140
        #                             ((?:\d?\d:)*\d?\d)$                       # time in hh:mm:ss format""", re.X)
141
        # regex = re.compile(r"^(?:{}{})?({}){}({})$".format(cls.track_number, cls.number_name_sep, cls.track_name, cls.sep, cls.hhmmss))
142 1
        regex = re.compile(r"(?: {track_number} {sep1})? ( {track_name} ) {sep2} ({hhmmss})".format(**cls.regexes), re.X)
143 1
        return list(regex.search(track_line.strip()).groups())
144
145
    # CONVERT durations to timestamps tuples (segmentation start-end pair)
146 1
    @classmethod
147
    def convert_to_timestamps(cls, tracks_row_strings):
148
        """Call this method to transform a '\n' separabale string of album tracks (eg copy-pasted from the youtube video description) that represents durations (in hhmmss format)
149
        to a list of strings with each track's starting timestamp in hhmmss format.\n
150
        :param str tracks_row_strings:
151
        :return: the list of each track's timestamp
152
        :rtype: list
153
        """
154 1
        lines = cls.parse_hhmmss_string(tracks_row_strings)  # list of lists
155 1
        i = 1
156 1
        timestamps = ['0:00']
157 1
        while i < len(lines):
158 1
            timestamps.append(cls.add(timestamps[i-1], lines[i-1][-1]))
159 1
            i += 1
160 1
        return timestamps
161
162 1
    @classmethod
163
    def add(cls, timestamp1, duration):
164
        """
165
        :param str timestamp1: hh:mm:ss
166
        :param str duration: hh:mm:ss
167
        :return: hh:mm:ss
168
        :rtype: str
169
        """
170 1
        return cls.hhmmss_format(cls.to_seconds(timestamp1) + cls.to_seconds(duration))
171
172
    ###########################
173 1
    @staticmethod
174
    def to_seconds(timestamp):
175
        """Call this method to transform a hh:mm:ss formatted string timestamp to its equivalent duration in seconds as an integer"""
176 1
        return sum([60**i * int(x) for i, x in enumerate(reversed(timestamp.split(':')))])
177
178 1
    @staticmethod
179
    def hhmmss_format(seconds):
180
        """Call this method to transform an integer representing time duration in seconds to its equivalent hh:mm:ss formatted string representeation"""
181
        return time.strftime('%H:%M:%S', time.gmtime(seconds))
182