|
1
|
|
|
def test_segmenting_webm_preserves_stream_qualities(tmp_path_factory): |
|
2
|
|
|
import os |
|
3
|
|
|
from pathlib import Path |
|
4
|
|
|
from typing import List |
|
5
|
|
|
|
|
6
|
|
|
from music_album_creation.audio_segmentation import AudioSegmenter |
|
7
|
|
|
from music_album_creation.ffmpeg import FFProbe |
|
8
|
|
|
from music_album_creation.ffprobe_client import FFProbeClient |
|
9
|
|
|
|
|
10
|
|
|
ffprobe = FFProbe(os.environ.get('MUSIC_FFPROBE', 'ffprobe')) |
|
11
|
|
|
ffprobe_client = FFProbeClient(ffprobe) |
|
12
|
|
|
|
|
13
|
|
|
expected_bytes_size = 6560225 |
|
14
|
|
|
|
|
15
|
|
|
# GIVEN a webm file |
|
16
|
|
|
webm_file = Path(__file__).parent / 'data' / 'Burning.webm' |
|
17
|
|
|
# AND its metadata |
|
18
|
|
|
original_data = ffprobe_client.get_stream_info(str(webm_file)) |
|
19
|
|
|
# sanity check on metadata values BEFORE segmenting |
|
20
|
|
|
assert original_data['programs'] == [] |
|
21
|
|
|
assert len(original_data['streams']) == 1 |
|
22
|
|
|
|
|
23
|
|
|
assert original_data['streams'][0]['tags'] == {} |
|
24
|
|
|
# AND the audio stream has the expected Sample Rate (Hz) |
|
25
|
|
|
assert original_data['streams'][0]['sample_rate'] == '48000' |
|
26
|
|
|
|
|
27
|
|
|
# AND the audio stream has the expected codec |
|
28
|
|
|
assert original_data['streams'][0]['codec_name'] == 'opus' |
|
29
|
|
|
|
|
30
|
|
|
# AND the audio stream has the expected number of channels |
|
31
|
|
|
assert original_data['streams'][0]['channels'] == 2 |
|
32
|
|
|
|
|
33
|
|
|
assert original_data['format']['format_name'] == 'matroska,webm' |
|
34
|
|
|
assert original_data['format']['format_long_name'] == 'Matroska / WebM' |
|
35
|
|
|
assert original_data['format']['start_time'] == '-0.007000' |
|
36
|
|
|
assert original_data['format']['duration'] == '393.161000' |
|
37
|
|
|
assert original_data['format']['size'] == str(expected_bytes_size) |
|
38
|
|
|
assert webm_file.stat().st_size == expected_bytes_size |
|
39
|
|
|
assert original_data['format']['bit_rate'] == '133486' # bits per second |
|
40
|
|
|
assert original_data['format']['probe_score'] == 100 |
|
41
|
|
|
assert original_data['format']['tags']['encoder'] == 'google/video-file' |
|
42
|
|
|
|
|
43
|
|
|
# AND maths add up (size = track duration * bitrate) |
|
44
|
|
|
assert ( |
|
45
|
|
|
int(original_data['format']['size']) |
|
46
|
|
|
>= 0.9 |
|
47
|
|
|
* int(original_data['format']['bit_rate']) |
|
48
|
|
|
* float(original_data['format']['duration']) |
|
49
|
|
|
/ 8 |
|
50
|
|
|
) |
|
51
|
|
|
assert ( |
|
52
|
|
|
int(original_data['format']['size']) |
|
53
|
|
|
<= 1.1 |
|
54
|
|
|
* int(original_data['format']['bit_rate']) |
|
55
|
|
|
* float(original_data['format']['duration']) |
|
56
|
|
|
/ 8 |
|
57
|
|
|
) |
|
58
|
|
|
|
|
59
|
|
|
# WHEN segmenting the webm file |
|
60
|
|
|
output_dir = tmp_path_factory.mktemp("segmented") |
|
61
|
|
|
segmenter = AudioSegmenter(str(output_dir)) |
|
62
|
|
|
track_files: List[str] = segmenter.segment( |
|
63
|
|
|
str(webm_file), |
|
64
|
|
|
( |
|
65
|
|
|
('1 - track1', '0', '10'), |
|
66
|
|
|
('2 - track2', '10', '15'), |
|
67
|
|
|
), |
|
68
|
|
|
) |
|
69
|
|
|
# THEN the webm file is segmented |
|
70
|
|
|
assert len(track_files) == 2 |
|
71
|
|
|
|
|
72
|
|
|
# AND the webm file is segmented into segments with the expected duration |
|
73
|
|
|
|
|
74
|
|
|
expected_durations = (10, 5) |
|
75
|
|
|
expected_sizes = (160749, 80877) # in Bytes |
|
76
|
|
|
exp_bitrates = (128188, 128376) |
|
77
|
|
|
for track, expected_duration, expected_size, exp_bitrate in zip( |
|
78
|
|
|
track_files, expected_durations, expected_sizes, exp_bitrates |
|
79
|
|
|
): |
|
80
|
|
|
assert os.path.exists(track) |
|
81
|
|
|
data = ffprobe_client.get_stream_info(track) |
|
82
|
|
|
|
|
83
|
|
|
assert data['programs'] == [] |
|
84
|
|
|
assert len(data['streams']) == 1 |
|
85
|
|
|
assert data['streams'][0]['tags'] == {} |
|
86
|
|
|
# AND the track file has the same stream quality as original audio stream |
|
87
|
|
|
assert data['streams'][0]['sample_rate'] == '48000' |
|
88
|
|
|
assert data['streams'][0]['codec_name'] == 'mp3' |
|
89
|
|
|
assert data['streams'][0]['channels'] == 2 |
|
90
|
|
|
assert data['format']['format_name'] == 'mp3' |
|
91
|
|
|
assert data['format']['format_long_name'] == 'MP2/3 (MPEG audio layer 2/3)' |
|
92
|
|
|
# assert data['format']['start_time'] == '-0.007000' |
|
93
|
|
|
assert abs(float(data['format']['duration']) - expected_duration) < 0.1 |
|
94
|
|
|
assert int(data['format']['size']) == expected_size |
|
95
|
|
|
assert data['format']['bit_rate'] == str(exp_bitrate) # bits per second |
|
96
|
|
|
# assert data['format']['probe_score'] == 100 |
|
97
|
|
|
assert data['format']['probe_score'] == 51 |
|
98
|
|
|
assert data['format']['tags']['encoder'] == 'Lavf58.76.100' |
|
99
|
|
|
|
|
100
|
|
|
# AND maths add up (size = track duration * bitrate) |
|
101
|
|
|
estimated_size = ( |
|
102
|
|
|
int(data['format']['bit_rate']) * float(data['format']['duration']) / 8 |
|
103
|
|
|
) |
|
104
|
|
|
assert abs(int(data['format']['size']) - estimated_size) < 0.01 * estimated_size |
|
105
|
|
|
|
|
106
|
|
|
# AND bitrate has not changed more than 5% compared to original |
|
107
|
|
|
assert abs( |
|
108
|
|
|
int(data['format']['bit_rate']) - int(original_data['format']['bit_rate']) |
|
109
|
|
|
) < 0.05 * int(original_data['format']['bit_rate']) |
|
110
|
|
|
|
|
111
|
|
|
# AND file size is proportional to duration (track byte size = track duration * bitrate) |
|
112
|
|
|
estimated_track_byte_size = ( |
|
113
|
|
|
expected_bytes_size |
|
114
|
|
|
* expected_duration |
|
115
|
|
|
/ float(original_data['format']['duration']) |
|
116
|
|
|
) |
|
117
|
|
|
assert ( |
|
118
|
|
|
abs(int(data['format']['size']) - estimated_track_byte_size) |
|
119
|
|
|
< 0.05 * estimated_track_byte_size |
|
120
|
|
|
) |
|
121
|
|
|
|