Completed
Pull Request — master (#183)
by Martin
33s
created

test_log_metrics()   F

Complexity

Conditions 26

Size

Total Lines 93

Duplication

Lines 0
Ratio 0 %

Importance

Changes 6
Bugs 0 Features 0
Metric Value
cc 26
c 6
b 0
f 0
dl 0
loc 93
rs 2

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like test_log_metrics() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
#!/usr/bin/env python
2
# coding=utf-8
3
from __future__ import division, print_function, unicode_literals
4
import datetime
5
import mock
6
import pytest
7
8
from sacred.metrics_logger import ScalarMetricLogEntry, linearize_metrics
9
10
pymongo = pytest.importorskip("pymongo")
11
mongomock = pytest.importorskip("mongomock")
12
13
from sacred.dependencies import get_digest
14
from sacred.observers.mongo import (MongoObserver, force_bson_encodeable)
15
16
T1 = datetime.datetime(1999, 5, 4, 3, 2, 1, 0)
17
T2 = datetime.datetime(1999, 5, 5, 5, 5, 5, 5)
18
19
20
@pytest.fixture
21
def mongo_obs():
22
    db = mongomock.MongoClient().db
23
    runs = db.runs
24
    metrics = db.metrics
25
    fs = mock.MagicMock()
26
    return MongoObserver(runs, fs, metrics_collection=metrics)
27
28
29
@pytest.fixture()
30
def sample_run():
31
    exp = {'name': 'test_exp', 'sources': [], 'doc': '', 'base_dir': '/tmp'}
32
    host = {'hostname': 'test_host', 'cpu_count': 1, 'python_version': '3.4'}
33
    config = {'config': 'True', 'foo': 'bar', 'answer': 42}
34
    command = 'run'
35
    meta_info = {'comment': 'test run'}
36
    return {
37
        '_id': 'FEDCBA9876543210',
38
        'ex_info': exp,
39
        'command': command,
40
        'host_info': host,
41
        'start_time': T1,
42
        'config': config,
43
        'meta_info': meta_info,
44
    }
45
46
47
def test_mongo_observer_started_event_creates_run(mongo_obs, sample_run):
48
    sample_run['_id'] = None
49
    _id = mongo_obs.started_event(**sample_run)
50
    assert _id is not None
51
    assert mongo_obs.runs.count() == 1
52
    db_run = mongo_obs.runs.find_one()
53
    assert db_run == {
54
        '_id': _id,
55
        'experiment': sample_run['ex_info'],
56
        'format': mongo_obs.VERSION,
57
        'command': sample_run['command'],
58
        'host': sample_run['host_info'],
59
        'start_time': sample_run['start_time'],
60
        'heartbeat': None,
61
        'info': {},
62
        'captured_out': '',
63
        'artifacts': [],
64
        'config': sample_run['config'],
65
        'meta': sample_run['meta_info'],
66
        'status': 'RUNNING',
67
        'resources': []
68
    }
69
70
71
def test_mongo_observer_started_event_uses_given_id(mongo_obs, sample_run):
72
    _id = mongo_obs.started_event(**sample_run)
73
    assert _id == sample_run['_id']
74
    assert mongo_obs.runs.count() == 1
75
    db_run = mongo_obs.runs.find_one()
76
    assert db_run['_id'] == sample_run['_id']
77
78
79
def test_mongo_observer_equality(mongo_obs):
80
    runs = mongo_obs.runs
81
    fs = mock.MagicMock()
82
    m = MongoObserver(runs, fs)
83
    assert mongo_obs == m
84
    assert not mongo_obs != m
85
86
    assert not mongo_obs == 'foo'
87 View Code Duplication
    assert mongo_obs != 'foo'
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
88
89
90
def test_mongo_observer_heartbeat_event_updates_run(mongo_obs, sample_run):
91
    mongo_obs.started_event(**sample_run)
92
93
    info = {'my_info': [1, 2, 3], 'nr': 7}
94
    outp = 'some output'
95
    mongo_obs.heartbeat_event(info=info, captured_out=outp, beat_time=T2,
96
                              result=1337)
97
98
    assert mongo_obs.runs.count() == 1
99
    db_run = mongo_obs.runs.find_one()
100
    assert db_run['heartbeat'] == T2
101
    assert db_run['result'] == 1337
102
    assert db_run['info'] == info
103 View Code Duplication
    assert db_run['captured_out'] == outp
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
104
105
106
def test_mongo_observer_completed_event_updates_run(mongo_obs, sample_run):
107
    mongo_obs.started_event(**sample_run)
108
109
    mongo_obs.completed_event(stop_time=T2, result=42)
110
111
    assert mongo_obs.runs.count() == 1
112
    db_run = mongo_obs.runs.find_one()
113
    assert db_run['stop_time'] == T2
114
    assert db_run['result'] == 42
115
    assert db_run['status'] == 'COMPLETED'
116
117
118
def test_mongo_observer_interrupted_event_updates_run(mongo_obs, sample_run):
119
    mongo_obs.started_event(**sample_run)
120
121
    mongo_obs.interrupted_event(interrupt_time=T2, status='INTERRUPTED')
122
123
    assert mongo_obs.runs.count() == 1
124
    db_run = mongo_obs.runs.find_one()
125
    assert db_run['stop_time'] == T2
126 View Code Duplication
    assert db_run['status'] == 'INTERRUPTED'
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
127
128
129
def test_mongo_observer_failed_event_updates_run(mongo_obs, sample_run):
130
    mongo_obs.started_event(**sample_run)
131
132
    fail_trace = "lots of errors and\nso\non..."
133
    mongo_obs.failed_event(fail_time=T2,
134
                           fail_trace=fail_trace)
135
136
    assert mongo_obs.runs.count() == 1
137
    db_run = mongo_obs.runs.find_one()
138
    assert db_run['stop_time'] == T2
139
    assert db_run['status'] == 'FAILED'
140
    assert db_run['fail_trace'] == fail_trace
141
142
143
def test_mongo_observer_artifact_event(mongo_obs, sample_run):
144
    mongo_obs.started_event(**sample_run)
145
146
    filename = "setup.py"
147
    name = 'mysetup'
148
149
    mongo_obs.artifact_event(name, filename)
150
151
    assert mongo_obs.fs.put.called
152
    assert mongo_obs.fs.put.call_args[1]['filename'].endswith(name)
153
154
    db_run = mongo_obs.runs.find_one()
155
    assert db_run['artifacts']
156
157
158
def test_mongo_observer_resource_event(mongo_obs, sample_run):
159
    mongo_obs.started_event(**sample_run)
160
161
    filename = "setup.py"
162
    md5 = get_digest(filename)
163
164
    mongo_obs.resource_event(filename)
165
166
    assert mongo_obs.fs.exists.called
167
    mongo_obs.fs.exists.assert_any_call(filename=filename)
168
169
    db_run = mongo_obs.runs.find_one()
170
    assert db_run['resources'] == [(filename, md5)]
171
172
173
def test_force_bson_encodable_doesnt_change_valid_document():
174
    d = {'int': 1, 'string': 'foo', 'float': 23.87, 'list': ['a', 1, True],
175
         'bool': True, 'cr4zy: _but_ [legal) Key!': '$illegal.key.as.value',
176
         'datetime': datetime.datetime.utcnow(), 'tuple': (1, 2.0, 'three'),
177
         'none': None}
178
    assert force_bson_encodeable(d) == d
179
180
181
def test_force_bson_encodable_substitutes_illegal_value_with_strings():
182
    d = {
183
        'a_module': datetime,
184
        'some_legal_stuff': {'foo': 'bar', 'baz': [1, 23, 4]},
185
        'nested': {
186
            'dict': {
187
                'with': {
188
                    'illegal_module': mock
189
                }
190
            }
191
        },
192
        '$illegal': 'because it starts with a $',
193
        'il.legal': 'because it contains a .',
194
        12.7: 'illegal because it is not a string key'
195
    }
196
    expected = {
197
        'a_module': str(datetime),
198
        'some_legal_stuff': {'foo': 'bar', 'baz': [1, 23, 4]},
199
        'nested': {
200
            'dict': {
201
                'with': {
202
                    'illegal_module': str(mock)
203
                }
204
            }
205
        },
206
        '@illegal': 'because it starts with a $',
207
        'il,legal': 'because it contains a .',
208
        '12,7': 'illegal because it is not a string key'
209
    }
210
    assert force_bson_encodeable(d) == expected
211
212
213
@pytest.fixture
214
def logged_metrics():
215
    return [
216
        ScalarMetricLogEntry("training.loss", 10, datetime.datetime.utcnow(), 1),
217
        ScalarMetricLogEntry("training.loss", 20, datetime.datetime.utcnow(), 2),
218
        ScalarMetricLogEntry("training.loss", 30, datetime.datetime.utcnow(), 3),
219
220
        ScalarMetricLogEntry("training.accuracy", 10, datetime.datetime.utcnow(), 100),
221
        ScalarMetricLogEntry("training.accuracy", 20, datetime.datetime.utcnow(), 200),
222
        ScalarMetricLogEntry("training.accuracy", 30, datetime.datetime.utcnow(), 300),
223
224
        ScalarMetricLogEntry("training.loss", 40, datetime.datetime.utcnow(), 10),
225
        ScalarMetricLogEntry("training.loss", 50, datetime.datetime.utcnow(), 20),
226
        ScalarMetricLogEntry("training.loss", 60, datetime.datetime.utcnow(), 30)
227
    ]
228
229
230
def test_log_metrics(mongo_obs, sample_run, logged_metrics):
231
    """
232
    Test storing scalar measurements
233
    
234
    Test whether measurements logged using _run.metrics.log_scalar_metric
235
    are being stored in the 'metrics' collection
236
    and that the experiment 'info' dictionary contains a valid reference 
237
    to the metrics collection for each of the metric.
238
    
239
    Metrics are identified by name (e.g.: 'training.loss') and by the 
240
    experiment run that produced them. Each metric contains a list of x values
241
    (e.g. iteration step), y values (measured values) and timestamps of when 
242
    each of the measurements was taken.
243
    """
244
245
    # Start the experiment
246
    mongo_obs.started_event(**sample_run)
247
248
    # Initialize the info dictionary and standard output with arbitrary values
249
    info = {'my_info': [1, 2, 3], 'nr': 7}
250
    outp = 'some output'
251
252
    # Take first 6 measured events, group them by metric name
253
    # and store the measured series to the 'metrics' collection
254
    # and reference the newly created records in the 'info' dictionary.
255
    mongo_obs.log_metrics(linearize_metrics(logged_metrics[:6]), info)
256
    # Call the standard heartbeat event (store the info to the database)
257
    mongo_obs.heartbeat_event(info=info, captured_out=outp, beat_time=T1,
258
                              result=0)
259
260
    # There should be only one run stored
261
    assert mongo_obs.runs.count() == 1
262
    db_run = mongo_obs.runs.find_one()
263
    # ... and the info dictionary should contain a list of created metrics
264
    assert "metrics" in db_run['info']
265
    assert type(db_run['info']["metrics"]) == list
266
267
    # The metrics, stored in the metrics collection,
268
    # should be two (training.loss and training.accuracy)
269
    assert mongo_obs.metrics.count() == 2
270
    # Read the training.loss metric and make sure it references the correct run
271
    # and that the run (in the info dictionary) references the correct metric record.
272
    loss = mongo_obs.metrics.find_one({"name": "training.loss", "run_id": db_run['_id']})
273
    assert {"name": "training.loss", "id": str(loss["_id"])} in db_run['info']["metrics"]
274
    assert loss["steps"] == [10, 20, 30]
275
    assert loss["values"] == [1, 2, 3]
276
    for i in range(len(loss["timestamps"]) - 1):
277
        assert loss["timestamps"][i] <= loss["timestamps"][i + 1]
278
279
    # Read the training.accuracy metric and check the references as with the training.loss above
280
    accuracy = mongo_obs.metrics.find_one({"name": "training.accuracy", "run_id": db_run['_id']})
281
    assert {"name": "training.accuracy", "id": str(accuracy["_id"])} in db_run['info']["metrics"]
282
    assert accuracy["steps"] == [10, 20, 30]
283
    assert accuracy["values"] == [100, 200, 300]
284
285
    # Now, process the remaining events
286
    # The metrics shouldn't be overwritten, but appended instead.
287
    mongo_obs.log_metrics(linearize_metrics(logged_metrics[6:]), info)
288
    mongo_obs.heartbeat_event(info=info, captured_out=outp, beat_time=T2,
289
                              result=0)
290
291
    assert mongo_obs.runs.count() == 1
292
    db_run = mongo_obs.runs.find_one()
293
    assert "metrics" in db_run['info']
294
295
    # The newly added metrics belong to the same run and have the same names, so the total number
296
    # of metrics should not change.
297
    assert mongo_obs.metrics.count() == 2
298
    loss = mongo_obs.metrics.find_one({"name": "training.loss", "run_id": db_run['_id']})
299
    assert {"name": "training.loss", "id": str(loss["_id"])} in db_run['info']["metrics"]
300
    # ... but the values should be appended to the original list
301
    assert loss["steps"] == [10, 20, 30, 40, 50, 60]
302
    assert loss["values"] == [1, 2, 3, 10, 20, 30]
303
    for i in range(len(loss["timestamps"]) - 1):
304
        assert loss["timestamps"][i] <= loss["timestamps"][i + 1]
305
306
    accuracy = mongo_obs.metrics.find_one({"name": "training.accuracy", "run_id": db_run['_id']})
307
    assert {"name": "training.accuracy", "id": str(accuracy["_id"])} in db_run['info']["metrics"]
308
    assert accuracy["steps"] == [10, 20, 30]
309
    assert accuracy["values"] == [100, 200, 300]
310
311
    # Make sure that when starting a new experiment, new records in metrics are created
312
    # instead of appending to the old ones.
313
    sample_run["_id"] = "NEWID"
314
    # Start the experiment
315
    mongo_obs.started_event(**sample_run)
316
    mongo_obs.log_metrics(linearize_metrics(logged_metrics[:4]), info)
317
    mongo_obs.heartbeat_event(info=info, captured_out=outp, beat_time=T1,
318
                              result=0)
319
    # A new run has been created
320
    assert mongo_obs.runs.count() == 2
321
    # Another 2 metrics have been created
322
    assert mongo_obs.metrics.count() == 4