| Conditions | 5 |
| Total Lines | 52 |
| Lines | 0 |
| Ratio | 0 % |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | import os |
||
| 11 | def convert_youtube_audio(directory, output_directory, youtube_id, channels, |
||
| 12 | sample, output_filename=None): |
||
| 13 | """Converts downloaded YouTube audio to HDF5 format. |
||
| 14 | |||
| 15 | Requires `ffmpeg` to be installed and available on the command line |
||
| 16 | (i.e. available on your `PATH`). |
||
| 17 | |||
| 18 | Parameters |
||
| 19 | ---------- |
||
| 20 | directory : str |
||
| 21 | Directory in which input files reside. |
||
| 22 | output_directory : str |
||
| 23 | Directory in which to save the converted dataset. |
||
| 24 | youtube_id : str |
||
| 25 | 11-character video ID (taken from YouTube URL) |
||
| 26 | channels : int |
||
| 27 | The number of audio channels to use in the PCM Wave file. |
||
| 28 | sample : int |
||
| 29 | The sampling rate to use in Hz, e.g. 44100 or 16000. |
||
| 30 | output_filename : str, optional |
||
| 31 | Name of the saved dataset. If `None` (the default), `youtube_id.hdf5` |
||
| 32 | is used. |
||
| 33 | |||
| 34 | """ |
||
| 35 | input_file = os.path.join(directory, '{}.m4a'.format(youtube_id)) |
||
| 36 | wav_filename = '{}.wav'.format(youtube_id) |
||
| 37 | wav_file = os.path.join(directory, wav_filename) |
||
| 38 | ffmpeg_not_available = subprocess.call(['ffmpeg', '-version']) |
||
| 39 | if ffmpeg_not_available: |
||
| 40 | raise RuntimeError('conversion requires ffmpeg') |
||
| 41 | subprocess.check_call(['ffmpeg', '-y', '-i', input_file, '-ac', |
||
| 42 | str(channels), '-ar', str(sample), wav_file], |
||
| 43 | stdout=sys.stdout) |
||
| 44 | |||
| 45 | # Load WAV into array |
||
| 46 | _, data = scipy.io.wavfile.read(wav_file) |
||
| 47 | if data.ndim == 1: |
||
| 48 | data = data[:, None] |
||
| 49 | data = data[None, :] |
||
| 50 | |||
| 51 | # Store in HDF5 |
||
| 52 | if output_filename is None: |
||
| 53 | output_filename = '{}.hdf5'.format(youtube_id) |
||
| 54 | output_file = os.path.join(output_directory, output_filename) |
||
| 55 | |||
| 56 | with h5py.File(output_file, 'w') as h5file: |
||
| 57 | fill_hdf5_file(h5file, (('train', 'features', data),)) |
||
| 58 | h5file['features'].dims[0].label = 'batch' |
||
| 59 | h5file['features'].dims[1].label = 'time' |
||
| 60 | h5file['features'].dims[2].label = 'feature' |
||
| 61 | |||
| 62 | return (output_file,) |
||
| 63 | |||
| 94 |