Completed
Pull Request — master (#184)
by Martin
44s
created

test_log_metrics()   F

Complexity

Conditions 26

Size

Total Lines 90

Duplication

Lines 0
Ratio 0 %

Importance

Changes 6
Bugs 0 Features 0
Metric Value
cc 26
c 6
b 0
f 0
dl 0
loc 90
rs 2.0085

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like test_log_metrics() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
#!/usr/bin/env python
2
# coding=utf-8
3
from __future__ import division, print_function, unicode_literals
4
import datetime
5
import mock
6
import pytest
7
8
from sacred.metrics_logger import ScalarMetricLogEntry, linearize_metrics
9
10
pymongo = pytest.importorskip("pymongo")
11
mongomock = pytest.importorskip("mongomock")
12
13
from sacred.dependencies import get_digest
14
from sacred.observers.mongo import (MongoObserver, force_bson_encodeable)
15
16
T1 = datetime.datetime(1999, 5, 4, 3, 2, 1, 0)
17
T2 = datetime.datetime(1999, 5, 5, 5, 5, 5, 5)
18
19
20
@pytest.fixture
21
def mongo_obs():
22
    db = mongomock.MongoClient().db
23
    runs = db.runs
24
    metrics = db.metrics
25
    fs = mock.MagicMock()
26
    return MongoObserver(runs, fs, metrics_collection=metrics)
27
28
29
@pytest.fixture()
30
def sample_run():
31
    exp = {'name': 'test_exp', 'sources': [], 'doc': '', 'base_dir': '/tmp'}
32
    host = {'hostname': 'test_host', 'cpu_count': 1, 'python_version': '3.4'}
33
    config = {'config': 'True', 'foo': 'bar', 'answer': 42}
34
    command = 'run'
35
    meta_info = {'comment': 'test run'}
36
    return {
37
        '_id': 'FEDCBA9876543210',
38
        'ex_info': exp,
39
        'command': command,
40
        'host_info': host,
41
        'start_time': T1,
42
        'config': config,
43
        'meta_info': meta_info,
44
    }
45
46
47
def test_mongo_observer_started_event_creates_run(mongo_obs, sample_run):
48
    sample_run['_id'] = None
49
    _id = mongo_obs.started_event(**sample_run)
50
    assert _id is not None
51
    assert mongo_obs.runs.count() == 1
52
    db_run = mongo_obs.runs.find_one()
53
    assert db_run == {
54
        '_id': _id,
55
        'experiment': sample_run['ex_info'],
56
        'format': mongo_obs.VERSION,
57
        'command': sample_run['command'],
58
        'host': sample_run['host_info'],
59
        'start_time': sample_run['start_time'],
60
        'heartbeat': None,
61
        'info': {},
62
        'captured_out': '',
63
        'artifacts': [],
64
        'config': sample_run['config'],
65
        'meta': sample_run['meta_info'],
66
        'status': 'RUNNING',
67
        'resources': []
68
    }
69
70
71
def test_mongo_observer_started_event_uses_given_id(mongo_obs, sample_run):
72
    _id = mongo_obs.started_event(**sample_run)
73
    assert _id == sample_run['_id']
74
    assert mongo_obs.runs.count() == 1
75
    db_run = mongo_obs.runs.find_one()
76
    assert db_run['_id'] == sample_run['_id']
77
78
79
def test_mongo_observer_equality(mongo_obs):
80
    runs = mongo_obs.runs
81
    fs = mock.MagicMock()
82
    m = MongoObserver(runs, fs)
83
    assert mongo_obs == m
84
    assert not mongo_obs != m
85
86
    assert not mongo_obs == 'foo'
87 View Code Duplication
    assert mongo_obs != 'foo'
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
88
89
90
def test_mongo_observer_heartbeat_event_updates_run(mongo_obs, sample_run):
91
    mongo_obs.started_event(**sample_run)
92
93
    info = {'my_info': [1, 2, 3], 'nr': 7}
94
    outp = 'some output'
95
    mongo_obs.heartbeat_event(info=info, captured_out=outp, beat_time=T2,
96
                              result=1337)
97
98
    assert mongo_obs.runs.count() == 1
99
    db_run = mongo_obs.runs.find_one()
100
    assert db_run['heartbeat'] == T2
101
    assert db_run['result'] == 1337
102
    assert db_run['info'] == info
103 View Code Duplication
    assert db_run['captured_out'] == outp
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
104
105
106
def test_mongo_observer_completed_event_updates_run(mongo_obs, sample_run):
107
    mongo_obs.started_event(**sample_run)
108
109
    mongo_obs.completed_event(stop_time=T2, result=42)
110
111
    assert mongo_obs.runs.count() == 1
112
    db_run = mongo_obs.runs.find_one()
113
    assert db_run['stop_time'] == T2
114
    assert db_run['result'] == 42
115
    assert db_run['status'] == 'COMPLETED'
116
117
118
def test_mongo_observer_interrupted_event_updates_run(mongo_obs, sample_run):
119
    mongo_obs.started_event(**sample_run)
120
121
    mongo_obs.interrupted_event(interrupt_time=T2, status='INTERRUPTED')
122
123
    assert mongo_obs.runs.count() == 1
124
    db_run = mongo_obs.runs.find_one()
125
    assert db_run['stop_time'] == T2
126 View Code Duplication
    assert db_run['status'] == 'INTERRUPTED'
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
127
128
129
def test_mongo_observer_failed_event_updates_run(mongo_obs, sample_run):
130
    mongo_obs.started_event(**sample_run)
131
132
    fail_trace = "lots of errors and\nso\non..."
133
    mongo_obs.failed_event(fail_time=T2,
134
                           fail_trace=fail_trace)
135
136
    assert mongo_obs.runs.count() == 1
137
    db_run = mongo_obs.runs.find_one()
138
    assert db_run['stop_time'] == T2
139
    assert db_run['status'] == 'FAILED'
140
    assert db_run['fail_trace'] == fail_trace
141
142
143
def test_mongo_observer_artifact_event(mongo_obs, sample_run):
144
    mongo_obs.started_event(**sample_run)
145
146
    filename = "setup.py"
147
    name = 'mysetup'
148
149
    mongo_obs.artifact_event(name, filename)
150
151
    assert mongo_obs.fs.put.called
152
    assert mongo_obs.fs.put.call_args[1]['filename'].endswith(name)
153
154
    db_run = mongo_obs.runs.find_one()
155
    assert db_run['artifacts']
156
157
158
def test_mongo_observer_resource_event(mongo_obs, sample_run):
159
    mongo_obs.started_event(**sample_run)
160
161
    filename = "setup.py"
162
    md5 = get_digest(filename)
163
164
    mongo_obs.resource_event(filename)
165
166
    assert mongo_obs.fs.exists.called
167
    mongo_obs.fs.exists.assert_any_call(filename=filename)
168
169
    db_run = mongo_obs.runs.find_one()
170
    assert db_run['resources'] == [(filename, md5)]
171
172
173
def test_force_bson_encodable_doesnt_change_valid_document():
174
    d = {'int': 1, 'string': 'foo', 'float': 23.87, 'list': ['a', 1, True],
175
         'bool': True, 'cr4zy: _but_ [legal) Key!': '$illegal.key.as.value',
176
         'datetime': datetime.datetime.utcnow(), 'tuple': (1, 2.0, 'three'),
177
         'none': None}
178
    assert force_bson_encodeable(d) == d
179
180
181
def test_force_bson_encodable_substitutes_illegal_value_with_strings():
182
    d = {
183
        'a_module': datetime,
184
        'some_legal_stuff': {'foo': 'bar', 'baz': [1, 23, 4]},
185
        'nested': {
186
            'dict': {
187
                'with': {
188
                    'illegal_module': mock
189
                }
190
            }
191
        },
192
        '$illegal': 'because it starts with a $',
193
        'il.legal': 'because it contains a .',
194
        12.7: 'illegal because it is not a string key'
195
    }
196
    expected = {
197
        'a_module': str(datetime),
198
        'some_legal_stuff': {'foo': 'bar', 'baz': [1, 23, 4]},
199
        'nested': {
200
            'dict': {
201
                'with': {
202
                    'illegal_module': str(mock)
203
                }
204
            }
205
        },
206
        '@illegal': 'because it starts with a $',
207
        'il,legal': 'because it contains a .',
208
        '12,7': 'illegal because it is not a string key'
209
    }
210
    assert force_bson_encodeable(d) == expected
211
212
213
@pytest.fixture
214
def logged_metrics():
215
    return [
216
        ScalarMetricLogEntry("training.loss", 10, datetime.datetime.utcnow(), 1),
217
        ScalarMetricLogEntry("training.loss", 20, datetime.datetime.utcnow(), 2),
218
        ScalarMetricLogEntry("training.loss", 30, datetime.datetime.utcnow(), 3),
219
220
        ScalarMetricLogEntry("training.accuracy", 10, datetime.datetime.utcnow(), 100),
221
        ScalarMetricLogEntry("training.accuracy", 20, datetime.datetime.utcnow(), 200),
222
        ScalarMetricLogEntry("training.accuracy", 30, datetime.datetime.utcnow(), 300),
223
224
        ScalarMetricLogEntry("training.loss", 40, datetime.datetime.utcnow(), 10),
225
        ScalarMetricLogEntry("training.loss", 50, datetime.datetime.utcnow(), 20),
226
        ScalarMetricLogEntry("training.loss", 60, datetime.datetime.utcnow(), 30)
227
    ]
228
229
230
def test_log_metrics(mongo_obs, sample_run, logged_metrics):
231
    """
232
    Test storing scalar measurements
233
    
234
    Test whether measurements logged using _run.metrics.log_scalar_metric
235
    are being stored in the 'metrics' collection
236
    and that the experiment 'info' dictionary contains a valid reference 
237
    to the metrics collection for each of the metric.
238
    
239
    Metrics are identified by name (e.g.: 'training.loss') and by the 
240
    experiment run that produced them. Each metric contains a list of x values
241
    (e.g. iteration step), y values (measured values) and timestamps of when 
242
    each of the measurements was taken.
243
    """
244
245
    # Start the experiment
246
    mongo_obs.started_event(**sample_run)
247
248
    # Initialize the info dictionary and standard output with arbitrary values
249
    info = {'my_info': [1, 2, 3], 'nr': 7}
250
    outp = 'some output'
251
252
    # Take first 6 measured events, group them by metric name
253
    # and store the measured series to the 'metrics' collection
254
    # and reference the newly created records in the 'info' dictionary.
255
    mongo_obs.log_metrics(linearize_metrics(logged_metrics[:6]), info)
256
    # Call standard heartbeat event (store the info dictionary to the database)
257
    mongo_obs.heartbeat_event(info=info, captured_out=outp, beat_time=T1)
258
259
    # There should be only one run stored
260
    assert mongo_obs.runs.count() == 1
261
    db_run = mongo_obs.runs.find_one()
262
    # ... and the info dictionary should contain a list of created metrics
263
    assert "metrics" in db_run['info']
264
    assert type(db_run['info']["metrics"]) == list
265
266
    # The metrics, stored in the metrics collection,
267
    # should be two (training.loss and training.accuracy)
268
    assert mongo_obs.metrics.count() == 2
269
    # Read the training.loss metric and make sure it references the correct run
270
    # and that the run (in the info dictionary) references the correct metric record.
271
    loss = mongo_obs.metrics.find_one({"name": "training.loss", "run_id": db_run['_id']})
272
    assert {"name": "training.loss", "id": str(loss["_id"])} in db_run['info']["metrics"]
273
    assert loss["steps"] == [10, 20, 30]
274
    assert loss["values"] == [1, 2, 3]
275
    for i in range(len(loss["timestamps"]) - 1):
276
        assert loss["timestamps"][i] <= loss["timestamps"][i + 1]
277
278
    # Read the training.accuracy metric and check the references as with the training.loss above
279
    accuracy = mongo_obs.metrics.find_one({"name": "training.accuracy", "run_id": db_run['_id']})
280
    assert {"name": "training.accuracy", "id": str(accuracy["_id"])} in db_run['info']["metrics"]
281
    assert accuracy["steps"] == [10, 20, 30]
282
    assert accuracy["values"] == [100, 200, 300]
283
284
    # Now, process the remaining events
285
    # The metrics shouldn't be overwritten, but appended instead.
286
    mongo_obs.log_metrics(linearize_metrics(logged_metrics[6:]), info)
287
    mongo_obs.heartbeat_event(info=info, captured_out=outp, beat_time=T2)
288
289
    assert mongo_obs.runs.count() == 1
290
    db_run = mongo_obs.runs.find_one()
291
    assert "metrics" in db_run['info']
292
293
    # The newly added metrics belong to the same run and have the same names, so the total number
294
    # of metrics should not change.
295
    assert mongo_obs.metrics.count() == 2
296
    loss = mongo_obs.metrics.find_one({"name": "training.loss", "run_id": db_run['_id']})
297
    assert {"name": "training.loss", "id": str(loss["_id"])} in db_run['info']["metrics"]
298
    # ... but the values should be appended to the original list
299
    assert loss["steps"] == [10, 20, 30, 40, 50, 60]
300
    assert loss["values"] == [1, 2, 3, 10, 20, 30]
301
    for i in range(len(loss["timestamps"]) - 1):
302
        assert loss["timestamps"][i] <= loss["timestamps"][i + 1]
303
304
    accuracy = mongo_obs.metrics.find_one({"name": "training.accuracy", "run_id": db_run['_id']})
305
    assert {"name": "training.accuracy", "id": str(accuracy["_id"])} in db_run['info']["metrics"]
306
    assert accuracy["steps"] == [10, 20, 30]
307
    assert accuracy["values"] == [100, 200, 300]
308
309
    # Make sure that when starting a new experiment, new records in metrics are created
310
    # instead of appending to the old ones.
311
    sample_run["_id"] = "NEWID"
312
    # Start the experiment
313
    mongo_obs.started_event(**sample_run)
314
    mongo_obs.log_metrics(linearize_metrics(logged_metrics[:4]), info)
315
    mongo_obs.heartbeat_event(info=info, captured_out=outp, beat_time=T1)
316
    # A new run has been created
317
    assert mongo_obs.runs.count() == 2
318
    # Another 2 metrics have been created
319
    assert mongo_obs.metrics.count() == 4