Completed
Pull Request — master (#297)
by Bart
01:19
created

fuel.converters.convert_youtube_audio()   B

Complexity

Conditions 4

Size

Total Lines 25

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 4
dl 0
loc 25
rs 8.5806
1
import os
2
3
import h5py
4
import scipy.io.wavfile
5
6
from fuel.converters.base import fill_hdf5_file
7
8
9
def convert_youtube_audio(directory, output_directory, youtube_id, channels,
10
                          sample, output_filename=None):
11
    input_file = os.path.join(directory, '{}.m4a'.format(youtube_id))
12
    wav_filename = '{}.wav'.format(youtube_id)
13
    wav_file = os.path.join(directory, wav_filename)
14
    command = "ffmpeg -y -i {} -ac {} -ar {} {}".format(
15
            input_file, channels, sample, wav_file)
16
    os.system(command)
17
18
    # Load WAV into array
19
    _, data = scipy.io.wavfile.read(wav_file)
20
    if data.ndim == 1:
21
        data = data[:, None]
22
23
    # Store in HDF5
24
    if output_filename is None:
25
        output_filename = '{}.hdf5'.format(youtube_id)
26
    output_file = os.path.join(output_directory, output_filename)
27
28
    with h5py.File(output_file, 'w') as h5file:
29
        fill_hdf5_file(h5file, (('train', 'features', data),))
30
        h5file['features'].dims[0].label = 'time'
31
        h5file['features'].dims[1].label = 'feature'
32
33
    return (output_file,)
34
35
36
def fill_subparser(subparser):
37
    subparser.add_argument(
38
        '--youtube-id', type=str, required=True,
39
        help=("The YouTube ID of the video from which to extract audio, "
40
              "usually an 11-character string.")
41
    )
42
    subparser.add_argument(
43
        '--channels', type=int, default=1,
44
        help="The number of audio channels to convert to"
45
    )
46
    subparser.add_argument(
47
        '--sample', type=int, default=16000,
48
        help="The sampling rate in Hz"
49
    )
50
    return convert_youtube_audio
51