Passed
Push — master ( 7596de...7d2c3d )
by Simon
04:29
created

DataIO.locked_write()   A

Complexity

Conditions 3

Size

Total Lines 8
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 5
nop 3
dl 0
loc 8
rs 10
c 0
b 0
f 0
1
# Author: Simon Blanke
2
# Email: [email protected]
3
# License: MIT License
4
5
6
import os
7
import contextlib
8
import pandas as pd
9
from filelock import FileLock
10
11
12
@contextlib.contextmanager
13
def atomic_overwrite(filename):
14
    # from: https://stackoverflow.com/questions/42409707/pandas-to-csv-overwriting-prevent-data-loss
15
    temp = filename + "~"
16
    with open(temp, "w") as f:
17
        yield f
18
    os.rename(temp, filename)  # this will only happen if no exception was raised
19
20
21
class DataIO:
22
    def __init__(self, path, drop_duplicates):
23
        self.path = path
24
        self.replace_existing = False
25
        self.drop_duplicates = drop_duplicates
26
27
        if self.replace_existing:
28
            self.mode = "w"
29
        else:
30
            self.mode = "a"
31
32
    def _get_header(self, search_data, path):
33
        if os.path.isfile(path):
34
            if self.replace_existing:
35
                header = search_data.columns
36
            else:
37
                header = False
38
        else:
39
            header = search_data.columns
40
        return header
41
42
    def _save_dataframe(self, dataframe, io_wrap):
43
        if self.drop_duplicates:
44
            dataframe.drop_duplicates(subset=self.drop_duplicates, inplace=True)
45
46
        dataframe.to_csv(io_wrap, index=False, header=not io_wrap.tell())
47
48
    def atomic_write(self, dataframe, path, replace_existing):
49
        self.replace_existing = replace_existing
50
51
        with atomic_overwrite(path) as io_wrap:
52
            self._save_dataframe(dataframe, io_wrap)
53
54
    def locked_write(self, dataframe, path):
55
56
        lock = FileLock(path + ".lock")
57
        with lock:
58
            with open(path, self.mode) as io_wrap:
59
                self._save_dataframe(dataframe, io_wrap)
60
61
        """
62
        import fcntl
63
64
        with open(path, self.mode) as io_wrap:
65
            fcntl.flock(io_wrap, fcntl.LOCK_EX)
66
            self._save_dataframe(dataframe, io_wrap)
67
            fcntl.flock(io_wrap, fcntl.LOCK_UN)
68
        """
69
70
    def load(self, path):
71
        if os.path.isfile(self.path) and os.path.getsize(self.path) > 0:
72
            return pd.read_csv(self.path)
73
74
75
class DataCollector:
76
    def __init__(self, path, drop_duplicates=False):
77
        self.path = path
78
        self.drop_duplicates = drop_duplicates
79
80
        self.path2file = path.rsplit("/", 1)[0] + "/"
81
        self.file_name = path.rsplit("/", 1)[1]
82
83
        self.io = DataIO(path, drop_duplicates)
84
85
    def load(self):
86
        return self.io.load(self.path)
87
88
    def append(self, dictionary):
89
        dataframe = pd.DataFrame(dictionary, index=[0])
90
        self.io.locked_write(dataframe, self.path)
91
92
    def save(self, dataframe, replace_existing=False):
93
        self.io.atomic_write(dataframe, self.path, replace_existing)
94