knowyourdata.kyd - Code Metrics - Inspection of "Added Badges on Readme and updated display code, a..." - mubdi/knowyourdata - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 8492fe...523f19 )

by Mubdi

created 2018-03-12 19:50 UTC

knowyourdata.kyd A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	294
Duplicated Lines	0 %

Importance

Changes

Metric	Value
eloc	167
dl	0
loc	294
rs	10
c	0
b	0
f	0
wmc	23

8 Methods

Rating	Name	Size	Complexity
A	KYD.get_basic_stats()	16	1
A	KYD.clear_memory()	4	1
A	KYD.check_struct()	8	1
B	KYD.display_basic_stats()	91	1
A	KYD.__init__()	13	2
B	KYD.display_struct()	38	1
A	KYD.check_finite()	13	4
C	KYD.display()	38	7

2 Functions

Rating	Name	Duplication	Size	Complexity
A	kyd()	0	18	2
A	sizeof_fmt()	0	10	3

"""
KnowYourData
============

A rapid and lightweight module to describe the statistics and structure of
data arrays for interactive use.

The most simple use case to display data is if you have a numpy array 'x':

    >>> from knowyourdata import kyd
    >>> kyd(x)

"""

import sys
import numpy as np


class KYD(object):

    """The Central Class for KYD"""

    # Variable for Data Vector
    data = None

    # Initial Flags
    f_allfinite = False
    f_hasnan = False
    f_hasinf = False

    # Display Settings
    col_width = 10
    precision = 4

    def check_finite(self):
        """Checking to see if all elements are finite and setting flags"""
        if np.all(np.isfinite(self.data)):
            self.filt_data = self.data
            self.f_allfinite = True
        else:
            finite_inds = np.where(np.isfinite(self.data))
            self.filt_data = self.data[finite_inds]

            if np.any(np.isnan(self.data)):
                self.f_hasnan = True
            if np.any(np.isinf(self.data)):
                self.f_hasinf = True

    def check_struct(self):
        """Determining the Structure of the Numpy Array"""
        self.dtype = self.data.dtype
        self.ndim = self.data.ndim
        self.shape = self.data.shape
        self.size = self.data.size
        self.memsize = sys.getsizeof(self.data)
        self.human_memsize = sizeof_fmt(self.memsize)

    def get_basic_stats(self):
        """Get basic statistics about array"""
        self.min = np.float_(np.min(self.filt_data))
        self.max = np.float_(np.max(self.filt_data))
        self.range = self.max - self.min
        self.mean = np.mean(self.filt_data)
        self.std = np.std(self.filt_data)
        self.median = np.float_(np.median(self.filt_data))
        self.firstquartile = np.float_(np.percentile(self.filt_data, 25))
        self.thirdquartile = np.float_(np.percentile(self.filt_data, 75))
        self.cl_99 = np.float_(
            np.percentile(self.filt_data, np.array([0.5, 99.5])))
        self.cl_95 = np.float_(
            np.percentile(self.filt_data, np.array([2.5, 97.5])))
        self.cl_68 = np.float_(
            np.percentile(self.filt_data, np.array([16.0, 84.0])))

    def display_basic_stats(self):
        """Display basic statistics of array"""
        pstr_list = []

        # Heading for Section

        pstr_struct_header1 = '\033[1m' + "Basic Statistics  " + '\033[0m'
        pstr_struct_header2 = ''

        pstr_list.append(pstr_struct_header1)
        pstr_list.append(pstr_struct_header2)

        # Mean and Standard Deviation

        pstr_meanstdhead = (
            "{0:^15}"
            "{1:^15}"
        ).format("Mean", "Std Dev")
        pstr_meanstdhead = (
            "{0:^{self.col_width}}"
        ).format(pstr_meanstdhead, self=self)
        pstr_list.append(pstr_meanstdhead)

        pstr_meanstdstat = (
            "{self.mean:^15.{self.precision}}"
            "{self.std:^15.{self.precision}}"
        ).format(self=self)
        pstr_meanstdstat = (
            "{0:^{self.col_width}}"
        ).format(pstr_meanstdstat, self=self)
        pstr_list.append(pstr_meanstdstat)

        pstr_list.append("")

        # Three point statistics

        pstr_3pthead = (
            "{0:^10}"
            "{1:^10}"
            "{2:^10}"
            "{3:^10}"
            "{4:^10}"
        ).format('Min,', '1Q', 'Median', '3Q', 'Max')
        pstr_3pthead = (
            "{0:^{self.col_width}}"
        ).format(pstr_3pthead, self=self)
        pstr_list.append(pstr_3pthead)

        pstr_3ptstat = (
            "{self.min:^10.{self.precision}}"
            "{self.firstquartile:^10.{self.precision}}"
            "{self.median:^10.{self.precision}}"
            "{self.thirdquartile:^10.{self.precision}}"
            "{self.max:^10.{self.precision}}"
        ).format(self=self)
        pstr_3ptstat = (
            "{0:^{self.col_width}}"
        ).format(pstr_3ptstat, self=self)
        pstr_list.append(pstr_3ptstat)

        pstr_list.append("")

        # Confidence Levels

        pstr_clhead = (
            "{0:^10}"
            "{1:^10}"
            "{2:^10}"
            "{3:^10}"
            "{4:^10}"
            "{5:^10}"
        ).format('-99 CL', '-95 CL', '-68 CL', '+68 CL', '+95 CL', '+99 CL')
        pstr_clhead = (
            "{0:^{self.col_width}}"
        ).format(pstr_clhead, self=self)
        pstr_list.append(pstr_clhead)

        pstr_clstat = (
            "{self.cl_99[0]:^10.{self.precision}}"
            "{self.cl_95[0]:^10.{self.precision}}"
            "{self.cl_68[0]:^10.{self.precision}}"
            "{self.cl_68[1]:^10.{self.precision}}"
            "{self.cl_95[1]:^10.{self.precision}}"
            "{self.cl_99[1]:^10.{self.precision}}"
        ).format(self=self)
        pstr_clstat = (
            "{0:^{self.col_width}}"
        ).format(pstr_clstat, self=self)
        pstr_list.append(pstr_clstat)

        return pstr_list

    def display_struct(self):
        """Display information about array structure"""

        pstr_list = []

        # pstr_struct_header0 = "................."
        pstr_struct_header1 = '\033[1m' + "Array Structure  " + '\033[0m'
        pstr_struct_header2 = "                 "

        # pstr_list.append(pstr_struct_header0)
        pstr_list.append(pstr_struct_header1)
        pstr_list.append(pstr_struct_header2)

        pstr_n_dim = (
            "Number of Dimensions:\t"
            "{self.ndim}").format(
                self=self)
        pstr_list.append(pstr_n_dim)

        pstr_shape = (
            "Shape of Dimensions:\t"
            "{self.shape}").format(
                self=self)
        pstr_list.append(pstr_shape)

        pstr_dtype = (
            "Array Data Type:\t"
            "{self.dtype}").format(
                self=self)
        pstr_list.append(pstr_dtype)

        pstr_memsize = (
            "Memory Size:\t\t"
            "{self.human_memsize}").format(
                self=self)
        pstr_list.append(pstr_memsize)

        return pstr_list

    def display(self, short=False):
        """Displaying all relevant statistics"""

        if short:
            pass

        print()
        pstr_basic = self.display_basic_stats()
        pstr_struct = self.display_struct()
        n_basic = len(pstr_basic)
        n_struct = len(pstr_struct)

        l_colwidth = max([len(x) for x in pstr_basic]) + 1

        r_colwidth = max([len(x) for x in pstr_struct]) + 2

        # new_colwidth = self.col_width + 20

        # Finding the longest string
        len_list = max([n_basic, n_struct])

        for i in range(len_list):
            tmp_str = '| '
            if i < n_basic:
                tmp_str += (pstr_basic[i].ljust(l_colwidth))
            else:
                tmp_str += ''.ljust(l_colwidth)
            tmp_str += '\t| '

            if i < n_struct:
                tmp_str += (pstr_struct[i].expandtabs().ljust(r_colwidth))
            else:
                tmp_str += ''.ljust(r_colwidth)
            tmp_str += '\t|'

            print(tmp_str)

        print()

    def clear_memory(self):
        """Ensuring the Numpy Array does not exist in memory"""
        del self.data
        del self.filt_data

    def __init__(self, data):
        super(KYD, self).__init__()

        # Ensuring that the array is a numpy array
        if not isinstance(data, np.ndarray):
            data = np.array(data)

        self.data = data

        self.check_finite()
        self.check_struct()
        self.get_basic_stats()
        self.clear_memory()


def sizeof_fmt(num, suffix='B'):
    """Return human readable version of in-memory size.
    Code from Fred Cirera from Stack Overflow:
    https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
    """
    for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
        if abs(num) < 1024.0:
            return "%3.1f%s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f%s%s" % (num, 'Yi', suffix)


def kyd(data, full_statistics=False):
    """Print statistics of any numpy array

    data -- Numpy Array of Data

    Keyword arguments:
    full_statistics -- printing all detailed statistics of the sources
    (Currently Not Implemented)

    """

    data_kyd = KYD(data)
    if full_statistics:
        data_kyd.display()
    else:
        data_kyd.display(short=True)

    return data_kyd


1			"""
2			KnowYourData
3			============
4
5			A rapid and lightweight module to describe the statistics and structure of
6			data arrays for interactive use.
7
8			The most simple use case to display data is if you have a numpy array 'x':
9
10			>>> from knowyourdata import kyd
11			>>> kyd(x)
12
13			"""
14
15			import sys
16			import numpy as np
17
18
19			class KYD(object):
			0 ignored issues – show best-practice introduced 2018-03-12 18:01 UTC by Report Bug Copy Issue Report Too many instance attributes (22/7) Loading history... Unused Code introduced 2018-03-12 18:01 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
20			"""The Central Class for KYD"""
21
22			# Variable for Data Vector
23			data = None
24
25			# Initial Flags
26			f_allfinite = False
27			f_hasnan = False
28			f_hasinf = False
29
30			# Display Settings
31			col_width = 10
32			precision = 4
33
34			def check_finite(self):
35			"""Checking to see if all elements are finite and setting flags"""
36			if np.all(np.isfinite(self.data)):
37			self.filt_data = self.data
38			self.f_allfinite = True
39			else:
40			finite_inds = np.where(np.isfinite(self.data))
41			self.filt_data = self.data[finite_inds]
42
43			if np.any(np.isnan(self.data)):
44			self.f_hasnan = True
45			if np.any(np.isinf(self.data)):
46			self.f_hasinf = True
47
48			def check_struct(self):
49			"""Determining the Structure of the Numpy Array"""
50			self.dtype = self.data.dtype
51			self.ndim = self.data.ndim
52			self.shape = self.data.shape
53			self.size = self.data.size
54			self.memsize = sys.getsizeof(self.data)
55			self.human_memsize = sizeof_fmt(self.memsize)
56
57			def get_basic_stats(self):
58			"""Get basic statistics about array"""
59			self.min = np.float_(np.min(self.filt_data))
60			self.max = np.float_(np.max(self.filt_data))
61			self.range = self.max - self.min
62			self.mean = np.mean(self.filt_data)
63			self.std = np.std(self.filt_data)
64			self.median = np.float_(np.median(self.filt_data))
65			self.firstquartile = np.float_(np.percentile(self.filt_data, 25))
66			self.thirdquartile = np.float_(np.percentile(self.filt_data, 75))
67			self.cl_99 = np.float_(
68			np.percentile(self.filt_data, np.array([0.5, 99.5])))
69			self.cl_95 = np.float_(
70			np.percentile(self.filt_data, np.array([2.5, 97.5])))
71			self.cl_68 = np.float_(
72			np.percentile(self.filt_data, np.array([16.0, 84.0])))
73
74			def display_basic_stats(self):
75			"""Display basic statistics of array"""
76			pstr_list = []
77
78			# Heading for Section
79
80			pstr_struct_header1 = '\033[1m' + "Basic Statistics " + '\033[0m'
81			pstr_struct_header2 = ''
82
83			pstr_list.append(pstr_struct_header1)
84			pstr_list.append(pstr_struct_header2)
85
86			# Mean and Standard Deviation
87
88			pstr_meanstdhead = (
89			"{0:^15}"
90			"{1:^15}"
91			).format("Mean", "Std Dev")
92			pstr_meanstdhead = (
93			"{0:^{self.col_width}}"
94			).format(pstr_meanstdhead, self=self)
95			pstr_list.append(pstr_meanstdhead)
96
97			pstr_meanstdstat = (
98			"{self.mean:^15.{self.precision}}"
99			"{self.std:^15.{self.precision}}"
100			).format(self=self)
101			pstr_meanstdstat = (
102			"{0:^{self.col_width}}"
103			).format(pstr_meanstdstat, self=self)
104			pstr_list.append(pstr_meanstdstat)
105
106			pstr_list.append("")
107
108			# Three point statistics
109
110			pstr_3pthead = (
111			"{0:^10}"
112			"{1:^10}"
113			"{2:^10}"
114			"{3:^10}"
115			"{4:^10}"
116			).format('Min,', '1Q', 'Median', '3Q', 'Max')
117			pstr_3pthead = (
118			"{0:^{self.col_width}}"
119			).format(pstr_3pthead, self=self)
120			pstr_list.append(pstr_3pthead)
121
122			pstr_3ptstat = (
123			"{self.min:^10.{self.precision}}"
124			"{self.firstquartile:^10.{self.precision}}"
125			"{self.median:^10.{self.precision}}"
126			"{self.thirdquartile:^10.{self.precision}}"
127			"{self.max:^10.{self.precision}}"
128			).format(self=self)
129			pstr_3ptstat = (
130			"{0:^{self.col_width}}"
131			).format(pstr_3ptstat, self=self)
132			pstr_list.append(pstr_3ptstat)
133
134			pstr_list.append("")
135
136			# Confidence Levels
137
138			pstr_clhead = (
139			"{0:^10}"
140			"{1:^10}"
141			"{2:^10}"
142			"{3:^10}"
143			"{4:^10}"
144			"{5:^10}"
145			).format('-99 CL', '-95 CL', '-68 CL', '+68 CL', '+95 CL', '+99 CL')
146			pstr_clhead = (
147			"{0:^{self.col_width}}"
148			).format(pstr_clhead, self=self)
149			pstr_list.append(pstr_clhead)
150
151			pstr_clstat = (
152			"{self.cl_99[0]:^10.{self.precision}}"
153			"{self.cl_95[0]:^10.{self.precision}}"
154			"{self.cl_68[0]:^10.{self.precision}}"
155			"{self.cl_68[1]:^10.{self.precision}}"
156			"{self.cl_95[1]:^10.{self.precision}}"
157			"{self.cl_99[1]:^10.{self.precision}}"
158			).format(self=self)
159			pstr_clstat = (
160			"{0:^{self.col_width}}"
161			).format(pstr_clstat, self=self)
162			pstr_list.append(pstr_clstat)
163
164			return pstr_list
165
166			def display_struct(self):
167			"""Display information about array structure"""
168
169			pstr_list = []
170
171			# pstr_struct_header0 = "................."
172			pstr_struct_header1 = '\033[1m' + "Array Structure " + '\033[0m'
173			pstr_struct_header2 = " "
174
175			# pstr_list.append(pstr_struct_header0)
176			pstr_list.append(pstr_struct_header1)
177			pstr_list.append(pstr_struct_header2)
178
179			pstr_n_dim = (
180			"Number of Dimensions:\t"
181			"{self.ndim}").format(
182			self=self)
183			pstr_list.append(pstr_n_dim)
184
185			pstr_shape = (
186			"Shape of Dimensions:\t"
187			"{self.shape}").format(
188			self=self)
189			pstr_list.append(pstr_shape)
190
191			pstr_dtype = (
192			"Array Data Type:\t"
193			"{self.dtype}").format(
194			self=self)
195			pstr_list.append(pstr_dtype)
196
197			pstr_memsize = (
198			"Memory Size:\t\t"
199			"{self.human_memsize}").format(
200			self=self)
201			pstr_list.append(pstr_memsize)
202
203			return pstr_list
204
205			def display(self, short=False):
206			"""Displaying all relevant statistics"""
207
208			if short:
209			pass
210
211			print()
212			pstr_basic = self.display_basic_stats()
213			pstr_struct = self.display_struct()
214			n_basic = len(pstr_basic)
215			n_struct = len(pstr_struct)
216
217			l_colwidth = max([len(x) for x in pstr_basic]) + 1
218
219			r_colwidth = max([len(x) for x in pstr_struct]) + 2
220
221			# new_colwidth = self.col_width + 20
222
223			# Finding the longest string
224			len_list = max([n_basic, n_struct])
225
226			for i in range(len_list):
227			tmp_str = '\| '
228			if i < n_basic:
229			tmp_str += (pstr_basic[i].ljust(l_colwidth))
230			else:
231			tmp_str += ''.ljust(l_colwidth)
232			tmp_str += '\t\| '
233
234			if i < n_struct:
235			tmp_str += (pstr_struct[i].expandtabs().ljust(r_colwidth))
236			else:
237			tmp_str += ''.ljust(r_colwidth)
238			tmp_str += '\t\|'
239
240			print(tmp_str)
241
242			print()
243
244			def clear_memory(self):
245			"""Ensuring the Numpy Array does not exist in memory"""
246			del self.data
247			del self.filt_data
248
249			def __init__(self, data):
250			super(KYD, self).__init__()
251
252			# Ensuring that the array is a numpy array
253			if not isinstance(data, np.ndarray):
254			data = np.array(data)
255
256			self.data = data
257
258			self.check_finite()
259			self.check_struct()
260			self.get_basic_stats()
261			self.clear_memory()
262
263
264			def sizeof_fmt(num, suffix='B'):
265			"""Return human readable version of in-memory size.
266			Code from Fred Cirera from Stack Overflow:
267			https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
268			"""
269			for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
270			if abs(num) < 1024.0:
271			return "%3.1f%s%s" % (num, unit, suffix)
272			num /= 1024.0
273			return "%.1f%s%s" % (num, 'Yi', suffix)
274
275
276			def kyd(data, full_statistics=False):
277			"""Print statistics of any numpy array
278
279			data -- Numpy Array of Data
280
281			Keyword arguments:
282			full_statistics -- printing all detailed statistics of the sources
283			(Currently Not Implemented)
284
285			"""
286
287			data_kyd = KYD(data)
288			if full_statistics:
289			data_kyd.display()
290			else:
291			data_kyd.display(short=True)
292
293			return data_kyd
294

mubdi / knowyourdata

Push — master ( 8492fe...523f19 )

knowyourdata.kyd A

Complexity

Size/Duplication

Importance

8 Methods

2 Functions

Duplication Side-by-Side

Filter issues like