hyperactive.data_tools.data_collector.DataIO._get_header() - Code Metrics - Inspection of "Merge branch 'master' of https://github.com/SimonB..." - SimonBlanke/Hyperactive - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( 7d2c3d...b4b259 )

by Simon

created 2021-07-02 12:20 UTC

DataIO._get_header() A

↳ Parent: hyperactive.data_tools.data_collector

Complexity

Conditions

Size

Total Lines	9
Code Lines	7

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	3
eloc	7
nop	3
dl	0
loc	9
rs	10
c	0
b	0
f	0

# Author: Simon Blanke
# Email: [email protected]
# License: MIT License


import os
import contextlib
import pandas as pd
from filelock import FileLock


@contextlib.contextmanager
def atomic_overwrite(filename):
    # from: https://stackoverflow.com/questions/42409707/pandas-to-csv-overwriting-prevent-data-loss
    temp = filename + "~"
    with open(temp, "w") as f:
        yield f
    os.rename(temp, filename)  # this will only happen if no exception was raised


class DataIO:
    def __init__(self, path, drop_duplicates):
        self.path = path
        self.replace_existing = False
        self.drop_duplicates = drop_duplicates

        if self.replace_existing:
            self.mode = "w"
        else:
            self.mode = "a"

    def _save_dataframe(self, dataframe, io_wrap):
        if self.drop_duplicates:
            dataframe.drop_duplicates(subset=self.drop_duplicates, inplace=True)

        dataframe.to_csv(io_wrap, index=False, header=not io_wrap.tell())

    def atomic_write(self, dataframe, path, replace_existing):
        self.replace_existing = replace_existing

        with atomic_overwrite(path) as io_wrap:
            self._save_dataframe(dataframe, io_wrap)

    def locked_write(self, dataframe, path):

        lock = FileLock(path + ".lock~")
        with lock:
            with open(path, self.mode) as io_wrap:
                self._save_dataframe(dataframe, io_wrap)

        """
        import fcntl

        with open(path, self.mode) as io_wrap:
            fcntl.flock(io_wrap, fcntl.LOCK_EX)
            self._save_dataframe(dataframe, io_wrap)
            fcntl.flock(io_wrap, fcntl.LOCK_UN)
        """

    def load(self, path):
        if os.path.isfile(self.path) and os.path.getsize(self.path) > 0:
            return pd.read_csv(self.path)


class DataCollector:
    def __init__(self, path, drop_duplicates=False):
        self.path = path
        self.drop_duplicates = drop_duplicates

        self.path2file = path.rsplit("/", 1)[0] + "/"
        self.file_name = path.rsplit("/", 1)[1]

        self.io = DataIO(path, drop_duplicates)

    def load(self):
        return self.io.load(self.path)

    def append(self, dictionary):
        dataframe = pd.DataFrame(dictionary, index=[0])
        self.io.locked_write(dataframe, self.path)

    def save(self, dataframe, replace_existing=False):
        self.io.atomic_write(dataframe, self.path, replace_existing)


1			# Author: Simon Blanke
2			# Email: [email protected]
3			# License: MIT License
4
5
6			import os
7			import contextlib
8			import pandas as pd
9			from filelock import FileLock
10
11
12			@contextlib.contextmanager
13			def atomic_overwrite(filename):
14			# from: https://stackoverflow.com/questions/42409707/pandas-to-csv-overwriting-prevent-data-loss
15			temp = filename + "~"
16			with open(temp, "w") as f:
17			yield f
18			os.rename(temp, filename) # this will only happen if no exception was raised
19
20
21			class DataIO:
22			def __init__(self, path, drop_duplicates):
23			self.path = path
24			self.replace_existing = False
25			self.drop_duplicates = drop_duplicates
26
27			if self.replace_existing:
28			self.mode = "w"
29			else:
30			self.mode = "a"
31
32			def _save_dataframe(self, dataframe, io_wrap):
33			if self.drop_duplicates:
34			dataframe.drop_duplicates(subset=self.drop_duplicates, inplace=True)
35
36			dataframe.to_csv(io_wrap, index=False, header=not io_wrap.tell())
37
38			def atomic_write(self, dataframe, path, replace_existing):
39			self.replace_existing = replace_existing
40
41			with atomic_overwrite(path) as io_wrap:
42			self._save_dataframe(dataframe, io_wrap)
43
44			def locked_write(self, dataframe, path):
45
46			lock = FileLock(path + ".lock~")
47			with lock:
48			with open(path, self.mode) as io_wrap:
49			self._save_dataframe(dataframe, io_wrap)
50
51			"""
52			import fcntl
53
54			with open(path, self.mode) as io_wrap:
55			fcntl.flock(io_wrap, fcntl.LOCK_EX)
56			self._save_dataframe(dataframe, io_wrap)
57			fcntl.flock(io_wrap, fcntl.LOCK_UN)
58			"""
59
60			def load(self, path):
61			if os.path.isfile(self.path) and os.path.getsize(self.path) > 0:
62			return pd.read_csv(self.path)
63
64
65			class DataCollector:
66			def __init__(self, path, drop_duplicates=False):
67			self.path = path
68			self.drop_duplicates = drop_duplicates
69
70			self.path2file = path.rsplit("/", 1)[0] + "/"
71			self.file_name = path.rsplit("/", 1)[1]
72
73			self.io = DataIO(path, drop_duplicates)
74
75			def load(self):
76			return self.io.load(self.path)
77
78			def append(self, dictionary):
79			dataframe = pd.DataFrame(dictionary, index=[0])
80			self.io.locked_write(dataframe, self.path)
81
82			def save(self, dataframe, replace_existing=False):
83			self.io.atomic_write(dataframe, self.path, replace_existing)
84

SimonBlanke / Hyperactive

Push — master ( 7d2c3d...b4b259 )

DataIO._get_header() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like