fuel.converters.convert_youtube_audio() - Code Metrics - Inspection of "Add YouTube audio dataset" - mila-udem/fuel - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#297)

by Bart

created 2016-01-27 23:21 UTC

fuel.converters.convert_youtube_audio() B

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
cc	4
dl	0
loc	25
rs	8.5806

import os

import h5py
import scipy.io.wavfile

from fuel.converters.base import fill_hdf5_file


def convert_youtube_audio(directory, output_directory, youtube_id, channels,
                          sample, output_filename=None):
    input_file = os.path.join(directory, '{}.m4a'.format(youtube_id))
    wav_filename = '{}.wav'.format(youtube_id)
    wav_file = os.path.join(directory, wav_filename)
    command = "ffmpeg -y -i {} -ac {} -ar {} {}".format(
            input_file, channels, sample, wav_file)
    os.system(command)

    # Load WAV into array
    _, data = scipy.io.wavfile.read(wav_file)
    if data.ndim == 1:
        data = data[:, None]

    # Store in HDF5
    if output_filename is None:
        output_filename = '{}.hdf5'.format(youtube_id)
    output_file = os.path.join(output_directory, output_filename)

    with h5py.File(output_file, 'w') as h5file:
        fill_hdf5_file(h5file, (('train', 'features', data),))
        h5file['features'].dims[0].label = 'time'
        h5file['features'].dims[1].label = 'feature'

    return (output_file,)


def fill_subparser(subparser):
    subparser.add_argument(
        '--youtube-id', type=str, required=True,
        help=("The YouTube ID of the video from which to extract audio, "
              "usually an 11-character string.")
    )
    subparser.add_argument(
        '--channels', type=int, default=1,
        help="The number of audio channels to convert to"
    )
    subparser.add_argument(
        '--sample', type=int, default=16000,
        help="The sampling rate in Hz"
    )
    return convert_youtube_audio


1			import os
2
3			import h5py
4			import scipy.io.wavfile
5
6			from fuel.converters.base import fill_hdf5_file
7
8
9			def convert_youtube_audio(directory, output_directory, youtube_id, channels,
10			sample, output_filename=None):
11			input_file = os.path.join(directory, '{}.m4a'.format(youtube_id))
12			wav_filename = '{}.wav'.format(youtube_id)
13			wav_file = os.path.join(directory, wav_filename)
14			command = "ffmpeg -y -i {} -ac {} -ar {} {}".format(
15			input_file, channels, sample, wav_file)
16			os.system(command)
17
18			# Load WAV into array
19			_, data = scipy.io.wavfile.read(wav_file)
20			if data.ndim == 1:
21			data = data[:, None]
22
23			# Store in HDF5
24			if output_filename is None:
25			output_filename = '{}.hdf5'.format(youtube_id)
26			output_file = os.path.join(output_directory, output_filename)
27
28			with h5py.File(output_file, 'w') as h5file:
29			fill_hdf5_file(h5file, (('train', 'features', data),))
30			h5file['features'].dims[0].label = 'time'
31			h5file['features'].dims[1].label = 'feature'
32
33			return (output_file,)
34
35
36			def fill_subparser(subparser):
37			subparser.add_argument(
38			'--youtube-id', type=str, required=True,
39			help=("The YouTube ID of the video from which to extract audio, "
40			"usually an 11-character string.")
41			)
42			subparser.add_argument(
43			'--channels', type=int, default=1,
44			help="The number of audio channels to convert to"
45			)
46			subparser.add_argument(
47			'--sample', type=int, default=16000,
48			help="The sampling rate in Hz"
49			)
50			return convert_youtube_audio
51

mila-udem / fuel

Pull Request — master (#297)

fuel.converters.convert_youtube_audio() B

Complexity

Size

Duplication

Duplication Side-by-Side

Filter issues like