| Total Complexity | 1 |
| Total Lines | 23 |
| Duplicated Lines | 0 % |
| 1 | from fuel.datasets.hdf5 import H5PYDataset |
||
| 5 | class YouTubeAudio(H5PYDataset): |
||
| 6 | r"""Dataset of audio from YouTube video. |
||
| 7 | |||
| 8 | Assumes the existence of a dataset file with the name |
||
| 9 | `youtube_id.hdf5`. These datasets don't have any split; the entire |
||
| 10 | audio sequence is considered training. |
||
| 11 | |||
| 12 | Note that the data structured in the form `(batch, time, features)` |
||
| 13 | where `features` are the audio channels (dimension 1 or 2) and batch is |
||
| 14 | equal to 1 in this case (since there is only one audiotrack). |
||
| 15 | |||
| 16 | Parameters |
||
| 17 | ---------- |
||
| 18 | youtube_id : str |
||
| 19 | 11-character video ID (taken from YouTube URL) |
||
| 20 | \*\*kwargs |
||
| 21 | Passed to the `H5PYDataset` class. |
||
| 22 | |||
| 23 | """ |
||
| 24 | def __init__(self, youtube_id, **kwargs): |
||
| 25 | super(YouTubeAudio, self).__init__( |
||
| 26 | file_or_path=find_in_data_path('{}.hdf5'.format(youtube_id)), |
||
| 27 | which_sets=('train',), **kwargs |
||
| 28 | ) |
||
| 29 |