Issues in stream.py (master) - Issues in master - posterior/distributions - Measure and Improve Code Quality continuously with Scrutinizer

Issues (8)

distributions/io/stream.py (2 issues)

Labels

Duplication 2

Severity

Informational 2

# Copyright (c) 2014, Salesforce.com, Inc.  All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# - Redistributions of source code must retain the above copyright
#   notice, this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright
#   notice, this list of conditions and the following disclaimer in the
#   documentation and/or other materials provided with the distribution.
# - Neither the name of Salesforce.com nor the names of its contributors
#   may be used to endorse or promote products derived from this
#   software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import bz2
import gzip
import simplejson
import struct


def mkdir_p(dirname):
    'like mkdir -p'
    if not os.path.exists(dirname):
        try:
            os.makedirs(dirname)
        except OSError as e:
            if not os.path.exists(dirname):
                raise e


def open_compressed(filename, mode='r'):
    if 'w' in mode:
        dirname = os.path.dirname(filename)
        if dirname:
            mkdir_p(dirname)
    if filename.endswith('.bz2'):
        return bz2.BZ2File(filename, mode.replace('b', ''))
    elif filename.endswith('.gz'):
        return gzip.GzipFile(filename, mode)
    else:
        return file(filename, mode)


def json_dump(data, filename, **kwargs):
    with open_compressed(filename, 'w') as f:
        simplejson.dump(data, f, **kwargs)


def json_load(filename):
    with open_compressed(filename, 'rb') as f:
        return simplejson.load(f)


def json_stream_dump(stream, filename, **kwargs):

    kwargs['separators'] = (',', ':')
    stream = iter(stream)
    with open_compressed(filename, 'w') as f:
        f.write('[')
        try:
            item = next(stream)
            f.write('\n')
            simplejson.dump(item, f, **kwargs)
            for item in stream:
                f.write(',\n')
                simplejson.dump(item, f, **kwargs)
        except StopIteration:
            pass
        f.write('\n]')


def json_costream_dump(filename, **kwargs):

    kwargs['separators'] = (',', ':')
    with open_compressed(filename, 'w') as f:
        f.write('[')
        try:
            item = (yield)
            f.write('\n')
            simplejson.dump(item, f, **kwargs)
            while True:
                item = (yield)
                f.write(',\n')
                simplejson.dump(item, f, **kwargs)
        except GeneratorExit:
            pass
        f.write('\n]')


class json_stream_load(object):
    '''
    Read json data that was created by json_stream_dump or json_costream_dump.

    Note that this exploits newline formatting in the above dumpers.
    In particular:
    - the first line is '['
    - intermediate lines are of the form '{},'.format(json_parsable_content)
    - the penultimate line is of the form '{}'.format(json_parsable_content)
    - the last line is ']'
    - there is no trailing whitespace

    An alternative would be to use ijson to streamingly load arbitrary json
    files, however in practice this is ~40x slower.
    '''
    def __init__(self, filename):
        self.fd = open_compressed(filename, 'rb')
        line = self.fd.readline(2)
        if line != '[\n':
            raise IOError(
                'Unhandled format for json_stream_load. '
                'Try recreating json file with the compatible '
                'json_stream_dump or json_costream_dump.')

    def __iter__(self):
        return self

    def next(self):
        line = self.fd.readline().rstrip(',\n')
        if line == ']':
            self.close()
            raise StopIteration
        else:
            return simplejson.loads(line)

    def close(self):
        self.fd.close()


def protobuf_stream_write(item, fd):
    assert isinstance(item, str), item
    fd.write(struct.pack('<I', len(item)))
    fd.write(item)


def protobuf_stream_read(fd):
    size_str = fd.read(4)
    if len(size_str) < 4:
        raise StopIteration
    size = struct.unpack('<I', size_str)[0]
    return fd.read(size)


def protobuf_stream_dump(stream, filename):
    with open_compressed(filename, 'wb') as f:
        for item in stream:
            protobuf_stream_write(item, f)


class protobuf_stream_load(object):
    def __init__(self, filename):
        self.fd = open_compressed(filename, 'rb')

    def __iter__(self):
        return self

    def next(self):
        return protobuf_stream_read(self.fd)

    def close(self):
        self.fd.close()


1		# Copyright (c) 2014, Salesforce.com, Inc. All rights reserved.
2		#
3		# Redistribution and use in source and binary forms, with or without
4		# modification, are permitted provided that the following conditions
5		# are met:
6		#
7		# - Redistributions of source code must retain the above copyright
8		# notice, this list of conditions and the following disclaimer.
9		# - Redistributions in binary form must reproduce the above copyright
10		# notice, this list of conditions and the following disclaimer in the
11		# documentation and/or other materials provided with the distribution.
12		# - Neither the name of Salesforce.com nor the names of its contributors
13		# may be used to endorse or promote products derived from this
14		# software without specific prior written permission.
15		#
16		# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17		# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18		# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19		# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20		# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21		# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22		# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
23		# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24		# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
25		# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
26		# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28		import os
29		import bz2
30		import gzip
31		import simplejson
32		import struct
33
34
35		def mkdir_p(dirname):
36		'like mkdir -p'
37		if not os.path.exists(dirname):
38		try:
39		os.makedirs(dirname)
40		except OSError as e:
41		if not os.path.exists(dirname):
42		raise e
43
44
45		def open_compressed(filename, mode='r'):
46		if 'w' in mode:
47		dirname = os.path.dirname(filename)
48		if dirname:
49		mkdir_p(dirname)
50		if filename.endswith('.bz2'):
51		return bz2.BZ2File(filename, mode.replace('b', ''))
52		elif filename.endswith('.gz'):
53		return gzip.GzipFile(filename, mode)
54		else:
55		return file(filename, mode)
56
57
58		def json_dump(data, filename, **kwargs):
59		with open_compressed(filename, 'w') as f:
60		simplejson.dump(data, f, **kwargs)
61
62
63		def json_load(filename):
64		with open_compressed(filename, 'rb') as f:
65		return simplejson.load(f)
66
67
68	View Code Duplication	def json_stream_dump(stream, filename, **kwargs):
		0 ignored issues – show Duplication introduced 2017-06-28 22:26 UTC by Report Bug Copy Issue Report Show Similar Issues like this This code seems to be duplicated in your project. Loading history...
69		kwargs['separators'] = (',', ':')
70		stream = iter(stream)
71		with open_compressed(filename, 'w') as f:
72		f.write('[')
73		try:
74		item = next(stream)
75		f.write('\n')
76		simplejson.dump(item, f, **kwargs)
77		for item in stream:
78		f.write(',\n')
79		simplejson.dump(item, f, **kwargs)
80		except StopIteration:
81		pass
82		f.write('\n]')
83
84
85	View Code Duplication	def json_costream_dump(filename, **kwargs):
		0 ignored issues – show Duplication introduced 2017-06-28 22:26 UTC by Report Bug Copy Issue Report Show Similar Issues like this This code seems to be duplicated in your project. Loading history...
86		kwargs['separators'] = (',', ':')
87		with open_compressed(filename, 'w') as f:
88		f.write('[')
89		try:
90		item = (yield)
91		f.write('\n')
92		simplejson.dump(item, f, **kwargs)
93		while True:
94		item = (yield)
95		f.write(',\n')
96		simplejson.dump(item, f, **kwargs)
97		except GeneratorExit:
98		pass
99		f.write('\n]')
100
101
102		class json_stream_load(object):
103		'''
104		Read json data that was created by json_stream_dump or json_costream_dump.
105
106		Note that this exploits newline formatting in the above dumpers.
107		In particular:
108		- the first line is '['
109		- intermediate lines are of the form '{},'.format(json_parsable_content)
110		- the penultimate line is of the form '{}'.format(json_parsable_content)
111		- the last line is ']'
112		- there is no trailing whitespace
113
114		An alternative would be to use ijson to streamingly load arbitrary json
115		files, however in practice this is ~40x slower.
116		'''
117		def __init__(self, filename):
118		self.fd = open_compressed(filename, 'rb')
119		line = self.fd.readline(2)
120		if line != '[\n':
121		raise IOError(
122		'Unhandled format for json_stream_load. '
123		'Try recreating json file with the compatible '
124		'json_stream_dump or json_costream_dump.')
125
126		def __iter__(self):
127		return self
128
129		def next(self):
130		line = self.fd.readline().rstrip(',\n')
131		if line == ']':
132		self.close()
133		raise StopIteration
134		else:
135		return simplejson.loads(line)
136
137		def close(self):
138		self.fd.close()
139
140
141		def protobuf_stream_write(item, fd):
142		assert isinstance(item, str), item
143		fd.write(struct.pack('<I', len(item)))
144		fd.write(item)
145
146
147		def protobuf_stream_read(fd):
148		size_str = fd.read(4)
149		if len(size_str) < 4:
150		raise StopIteration
151		size = struct.unpack('<I', size_str)[0]
152		return fd.read(size)
153
154
155		def protobuf_stream_dump(stream, filename):
156		with open_compressed(filename, 'wb') as f:
157		for item in stream:
158		protobuf_stream_write(item, f)
159
160
161		class protobuf_stream_load(object):
162		def __init__(self, filename):
163		self.fd = open_compressed(filename, 'rb')
164
165		def __iter__(self):
166		return self
167
168		def next(self):
169		return protobuf_stream_read(self.fd)
170
171		def close(self):
172		self.fd.close()
173

GitHub Access Token became invalid

Issues (8)

distributions/io/stream.py (2 issues)

Labels

Severity

Introduced By

posterior / distributions

GitHub Access Token became invalid

Issues (8)

distributions/io/stream.py (2 issues)

Labels

Severity

Introduced By

Duplication Side-by-Side

Filter issues like