fuel.converters.convert_youtube_audio() - Code Metrics - Inspection of "Add YouTube audio dataset" - mila-udem/fuel - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#297)

by Bart

created 2016-01-28 04:37 UTC

fuel.converters.convert_youtube_audio() B

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
cc	5
dl	0
loc	52
rs	8.3534

How to fix Long Method

import os
import subprocess
import sys

import h5py
import scipy.io.wavfile

from fuel.converters.base import fill_hdf5_file


def convert_youtube_audio(directory, output_directory, youtube_id, channels,
                          sample, output_filename=None):
    """Converts downloaded YouTube audio to HDF5 format.

    Requires `ffmpeg` to be installed and available on the command line
    (i.e. available on your `PATH`).

    Parameters
    ----------
    directory : str
        Directory in which input files reside.
    output_directory : str
        Directory in which to save the converted dataset.
    youtube_id : str
        11-character video ID (taken from YouTube URL)
    channels : int
        The number of audio channels to use in the PCM Wave file.
    sample : int
        The sampling rate to use in Hz, e.g. 44100 or 16000.
    output_filename : str, optional
        Name of the saved dataset. If `None` (the default), `youtube_id.hdf5`
        is used.

    """
    input_file = os.path.join(directory, '{}.m4a'.format(youtube_id))
    wav_filename = '{}.wav'.format(youtube_id)
    wav_file = os.path.join(directory, wav_filename)
    ffmpeg_not_available = subprocess.call(['ffmpeg', '-version'])
    if ffmpeg_not_available:
        raise RuntimeError('conversion requires ffmpeg')
    subprocess.check_call(['ffmpeg', '-y', '-i', input_file, '-ac',
                           str(channels), '-ar', str(sample), wav_file],
                          stdout=sys.stdout)

    # Load WAV into array
    _, data = scipy.io.wavfile.read(wav_file)
    if data.ndim == 1:
        data = data[:, None]
    data = data[None, :]

    # Store in HDF5
    if output_filename is None:
        output_filename = '{}.hdf5'.format(youtube_id)
    output_file = os.path.join(output_directory, output_filename)

    with h5py.File(output_file, 'w') as h5file:
        fill_hdf5_file(h5file, (('train', 'features', data),))
        h5file['features'].dims[0].label = 'batch'
        h5file['features'].dims[1].label = 'time'
        h5file['features'].dims[2].label = 'feature'

    return (output_file,)


def fill_subparser(subparser):
    """Sets up a subparser to convert YouTube audio files.

    Adds the compulsory `--youtube-id` flag as well as the optional
    `sample` and `channels` flags.

    Parameters
    ----------
    subparser : :class:`argparse.ArgumentParser`
        Subparser handling the `youtube_audio` command.

    """
    subparser.add_argument(
        '--youtube-id', type=str, required=True,
        help=("The YouTube ID of the video from which to extract audio, "
              "usually an 11-character string.")
    )
    subparser.add_argument(
        '--channels', type=int, default=1,
        help=("The number of audio channels to convert to. The default of 1"
              "means audio is converted to mono.")
    )
    subparser.add_argument(
        '--sample', type=int, default=16000,
        help=("The sampling rate in Hz. The default of 16000 is "
              "significantly downsampled compared to normal WAVE files; "
              "pass 44100 for the usual sampling rate.")
    )
    return convert_youtube_audio


1			import os
2			import subprocess
3			import sys
4
5			import h5py
6			import scipy.io.wavfile
7
8			from fuel.converters.base import fill_hdf5_file
9
10
11			def convert_youtube_audio(directory, output_directory, youtube_id, channels,
12			sample, output_filename=None):
13			"""Converts downloaded YouTube audio to HDF5 format.
14
15			Requires `ffmpeg` to be installed and available on the command line
16			(i.e. available on your `PATH`).
17
18			Parameters
19			----------
20			directory : str
21			Directory in which input files reside.
22			output_directory : str
23			Directory in which to save the converted dataset.
24			youtube_id : str
25			11-character video ID (taken from YouTube URL)
26			channels : int
27			The number of audio channels to use in the PCM Wave file.
28			sample : int
29			The sampling rate to use in Hz, e.g. 44100 or 16000.
30			output_filename : str, optional
31			Name of the saved dataset. If `None` (the default), `youtube_id.hdf5`
32			is used.
33
34			"""
35			input_file = os.path.join(directory, '{}.m4a'.format(youtube_id))
36			wav_filename = '{}.wav'.format(youtube_id)
37			wav_file = os.path.join(directory, wav_filename)
38			ffmpeg_not_available = subprocess.call(['ffmpeg', '-version'])
39			if ffmpeg_not_available:
40			raise RuntimeError('conversion requires ffmpeg')
41			subprocess.check_call(['ffmpeg', '-y', '-i', input_file, '-ac',
42			str(channels), '-ar', str(sample), wav_file],
43			stdout=sys.stdout)
44
45			# Load WAV into array
46			_, data = scipy.io.wavfile.read(wav_file)
47			if data.ndim == 1:
48			data = data[:, None]
49			data = data[None, :]
50
51			# Store in HDF5
52			if output_filename is None:
53			output_filename = '{}.hdf5'.format(youtube_id)
54			output_file = os.path.join(output_directory, output_filename)
55
56			with h5py.File(output_file, 'w') as h5file:
57			fill_hdf5_file(h5file, (('train', 'features', data),))
58			h5file['features'].dims[0].label = 'batch'
59			h5file['features'].dims[1].label = 'time'
60			h5file['features'].dims[2].label = 'feature'
61
62			return (output_file,)
63
64
65			def fill_subparser(subparser):
66			"""Sets up a subparser to convert YouTube audio files.
67
68			Adds the compulsory `--youtube-id` flag as well as the optional
69			`sample` and `channels` flags.
70
71			Parameters
72			----------
73			subparser : :class:`argparse.ArgumentParser`
74			Subparser handling the `youtube_audio` command.
75
76			"""
77			subparser.add_argument(
78			'--youtube-id', type=str, required=True,
79			help=("The YouTube ID of the video from which to extract audio, "
80			"usually an 11-character string.")
81			)
82			subparser.add_argument(
83			'--channels', type=int, default=1,
84			help=("The number of audio channels to convert to. The default of 1"
85			"means audio is converted to mono.")
86			)
87			subparser.add_argument(
88			'--sample', type=int, default=16000,
89			help=("The sampling rate in Hz. The default of 16000 is "
90			"significantly downsampled compared to normal WAVE files; "
91			"pass 44100 for the usual sampling rate.")
92			)
93			return convert_youtube_audio
94

mila-udem / fuel

Pull Request — master (#297)

fuel.converters.convert_youtube_audio() B

Complexity

Size

Duplication

How to fix Long Method

Long Method

Duplication Side-by-Side

Filter issues like