e2edutch.predict.main() - Code Metrics - Inspection of "Merge branch 'master' of github.com:Filter-Bubble/..." - Filter-Bubble/e2e-Dutch - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( fcc4be...d6f0a9 )

by Dafne van

created 2020-11-23 10:51 UTC

e2edutch.predict.main() F

↳ Parent: e2edutch.predict

Complexity

Conditions

Size

Total Lines	86
Code Lines	71

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	71
dl	0
loc	86
rs	3.469
c	0
b	0
f	0
cc	14
nop	1

How to fix Long Method Complexity

import sys

import json
import os
import io
import collections
import argparse
import logging

from e2edutch import conll
from e2edutch import minimize
from e2edutch import util
from e2edutch import coref_model as cm
from e2edutch import naf

import tensorflow.compat.v1 as tf

tf.disable_v2_behavior()


def get_parser():

    parser = argparse.ArgumentParser()
    parser.add_argument('config')
    parser.add_argument('input_filename')
    parser.add_argument('-o', '--output_file',
                        type=argparse.FileType('w'), default=sys.stdout)
    parser.add_argument('-f', '--format_out', default='conll',
                        choices=['conll', 'jsonlines', 'naf'])
    parser.add_argument('-c', '--word_col', type=int, default=2)
    parser.add_argument('--cfg_file',
                        type=str,
                        default=None,
                        help="config file")
    parser.add_argument('-v', '--verbose', action='store_true')
    return parser


def read_jsonlines(input_filename):

    for line in open(input_filename).readlines():
        example = json.loads(line)
        yield example


def main(args=None):

    parser = get_parser()
    args = parser.parse_args()
    if args.verbose:
        logging.basicConfig(level=logging.DEBUG)
    config = util.initialize_from_env(args.config, args.cfg_file)

    # Input file in .jsonlines format or .conll.
    input_filename = args.input_filename

    ext_input = os.path.splitext(input_filename)[-1]
    if ext_input not in ['.conll', '.jsonlines', '.txt', '.naf']:
        raise Exception(
            'Input file should be .naf, .conll, .txt or .jsonlines, but is {}.'
            .format(ext_input))

    if ext_input == '.conll':
        labels = collections.defaultdict(set)
        stats = collections.defaultdict(int)
        docs = minimize.minimize_partition(
            input_filename, labels, stats, args.word_col)
    elif ext_input == '.jsonlines':
        docs = read_jsonlines(input_filename)
    elif ext_input == '.naf':
        naf_obj = naf.get_naf(input_filename)
        jsonlines_obj, term_ids, tok_ids = naf.get_jsonlines(naf_obj)

        docs = [jsonlines_obj]
    else:
        text = open(input_filename).read()
        docs = [util.create_example(text)]

    output_file = args.output_file
    model = cm.CorefModel(config)
    sentences = {}
    predictions = {}
    with tf.Session() as session:
        model.restore(session)
        for example_num, example in enumerate(docs):
            # logging.info(example['doc_key'])
            tensorized_example = model.tensorize_example(
                example, is_training=False)
            feed_dict = {i: t for i, t in zip(

                model.input_tensors, tensorized_example)}
            _, _, _, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores = session.run(

                model.predictions, feed_dict=feed_dict)
            predicted_antecedents = model.get_predicted_antecedents(
                top_antecedents, top_antecedent_scores)
            example["predicted_clusters"], _ = model.get_predicted_clusters(
                top_span_starts, top_span_ends, predicted_antecedents)
            if args.format_out == 'jsonlines':
                output_file.write(json.dumps(example))
                output_file.write("\n")
            else:
                predictions[example['doc_key']] = example["predicted_clusters"]
                sentences[example['doc_key']] = example["sentences"]
            if example_num % 100 == 0:
                logging.info("Decoded {} examples.".format(example_num + 1))

        if args.format_out == 'conll':
            conll.output_conll(output_file, sentences, predictions)
        elif args.format_out == 'naf':
            # Check number of docs - what to do if multiple?
            # Create naf obj if input format was not naf
            if ext_input != '.naf':
                # To do: add linguistic processing layers for terms and tokens
                logging.warn(

                    'Outputting NAF when input was not naf,'
                    + 'no dependency information available')
                for doc_key in sentences:
                    naf_obj, term_ids = naf.get_naf_from_sentences(
                        sentences[doc_key])
                    naf_obj = naf.create_coref_layer(
                        naf_obj, predictions[doc_key], term_ids)
                    naf_obj = naf.add_linguistic_processors(naf_obj)
                    buffer = io.BytesIO()
                    naf_obj.dump(buffer)
                    output_file.write(buffer.getvalue().decode('utf-8'))
                    # To do, make sepearate outputs?
                    # TO do, use dependency information from conll?
            else:
                # We only have one input doc
                naf_obj = naf.create_coref_layer(
                    naf_obj, example["predicted_clusters"], term_ids)

                naf_obj = naf.add_linguistic_processors(naf_obj)
                buffer = io.BytesIO()
                naf_obj.dump(buffer)
                output_file.write(buffer.getvalue().decode('utf-8'))


if __name__ == "__main__":
    main()


1			import sys
			0 ignored issues – show introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report Missing module docstring Loading history...
2			import json
3			import os
4			import io
5			import collections
6			import argparse
7			import logging
8
9			from e2edutch import conll
10			from e2edutch import minimize
11			from e2edutch import util
12			from e2edutch import coref_model as cm
13			from e2edutch import naf
14
15			import tensorflow.compat.v1 as tf
			0 ignored issues – show introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report Unable to import 'tensorflow.compat.v1' Loading history... introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report third party import "import tensorflow.compat.v1 as tf" should be placed before "from e2edutch import conll" Loading history...
16			tf.disable_v2_behavior()
17
18
19			def get_parser():
			0 ignored issues – show introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
20			parser = argparse.ArgumentParser()
21			parser.add_argument('config')
22			parser.add_argument('input_filename')
23			parser.add_argument('-o', '--output_file',
24			type=argparse.FileType('w'), default=sys.stdout)
25			parser.add_argument('-f', '--format_out', default='conll',
26			choices=['conll', 'jsonlines', 'naf'])
27			parser.add_argument('-c', '--word_col', type=int, default=2)
28			parser.add_argument('--cfg_file',
29			type=str,
30			default=None,
31			help="config file")
32			parser.add_argument('-v', '--verbose', action='store_true')
33			return parser
34
35
36			def read_jsonlines(input_filename):
			0 ignored issues – show introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
37			for line in open(input_filename).readlines():
38			example = json.loads(line)
39			yield example
40
41
42			def main(args=None):
			0 ignored issues – show introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history... Comprehensibility introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report This function exceeds the maximum number of variables (30/15). Loading history...
43			parser = get_parser()
44			args = parser.parse_args()
45			if args.verbose:
46			logging.basicConfig(level=logging.DEBUG)
47			config = util.initialize_from_env(args.config, args.cfg_file)
48
49			# Input file in .jsonlines format or .conll.
50			input_filename = args.input_filename
51
52			ext_input = os.path.splitext(input_filename)[-1]
53			if ext_input not in ['.conll', '.jsonlines', '.txt', '.naf']:
54			raise Exception(
55			'Input file should be .naf, .conll, .txt or .jsonlines, but is {}.'
56			.format(ext_input))
57
58			if ext_input == '.conll':
59			labels = collections.defaultdict(set)
60			stats = collections.defaultdict(int)
61			docs = minimize.minimize_partition(
62			input_filename, labels, stats, args.word_col)
63			elif ext_input == '.jsonlines':
64			docs = read_jsonlines(input_filename)
65			elif ext_input == '.naf':
66			naf_obj = naf.get_naf(input_filename)
67			jsonlines_obj, term_ids, tok_ids = naf.get_jsonlines(naf_obj)
			0 ignored issues – show Unused Code introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report The variable `tok_ids` seems to be unused. Loading history...
68			docs = [jsonlines_obj]
69			else:
70			text = open(input_filename).read()
71			docs = [util.create_example(text)]
72
73			output_file = args.output_file
74			model = cm.CorefModel(config)
75			sentences = {}
76			predictions = {}
77			with tf.Session() as session:
78			model.restore(session)
79			for example_num, example in enumerate(docs):
80			# logging.info(example['doc_key'])
81			tensorized_example = model.tensorize_example(
82			example, is_training=False)
83			feed_dict = {i: t for i, t in zip(
			0 ignored issues – show Unused Code introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report Unnecessary use of a comprehension Loading history...
84			model.input_tensors, tensorized_example)}
85			_, _, _, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores = session.run(
			0 ignored issues – show Coding Style introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (106/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
86			model.predictions, feed_dict=feed_dict)
87			predicted_antecedents = model.get_predicted_antecedents(
88			top_antecedents, top_antecedent_scores)
89			example["predicted_clusters"], _ = model.get_predicted_clusters(
90			top_span_starts, top_span_ends, predicted_antecedents)
91			if args.format_out == 'jsonlines':
92			output_file.write(json.dumps(example))
93			output_file.write("\n")
94			else:
95			predictions[example['doc_key']] = example["predicted_clusters"]
96			sentences[example['doc_key']] = example["sentences"]
97			if example_num % 100 == 0:
98			logging.info("Decoded {} examples.".format(example_num + 1))
			0 ignored issues – show introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report Use lazy % formatting in logging functions Loading history...
99			if args.format_out == 'conll':
100			conll.output_conll(output_file, sentences, predictions)
101			elif args.format_out == 'naf':
102			# Check number of docs - what to do if multiple?
103			# Create naf obj if input format was not naf
104			if ext_input != '.naf':
105			# To do: add linguistic processing layers for terms and tokens
106			logging.warn(
			0 ignored issues – show introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report Using deprecated method warn() Loading history... Coding Style Best Practice introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report Use lazy % formatting in logging functions Loading history...
107			'Outputting NAF when input was not naf,'
108			+ 'no dependency information available')
109			for doc_key in sentences:
110			naf_obj, term_ids = naf.get_naf_from_sentences(
111			sentences[doc_key])
112			naf_obj = naf.create_coref_layer(
113			naf_obj, predictions[doc_key], term_ids)
114			naf_obj = naf.add_linguistic_processors(naf_obj)
115			buffer = io.BytesIO()
116			naf_obj.dump(buffer)
117			output_file.write(buffer.getvalue().decode('utf-8'))
118			# To do, make sepearate outputs?
119			# TO do, use dependency information from conll?
120			else:
121			# We only have one input doc
122			naf_obj = naf.create_coref_layer(
123			naf_obj, example["predicted_clusters"], term_ids)
			0 ignored issues – show introduced 2020-10-27 15:20 UTC by Report Bug Copy Issue Report The variable `naf_obj` does not seem to be defined for all execution paths. Loading history... introduced 2020-10-27 15:20 UTC by Report Bug Copy Issue Report The variable `term_ids` does not seem to be defined for all execution paths. Loading history... introduced 2020-10-27 15:20 UTC by Report Bug Copy Issue Report The variable `example` does not seem to be defined in case the `for` loop on line `79` is not entered. Are you sure this can never be the case? Loading history... Bug introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report The loop variable `example` might not be defined here. Loading history...
124			naf_obj = naf.add_linguistic_processors(naf_obj)
125			buffer = io.BytesIO()
126			naf_obj.dump(buffer)
127			output_file.write(buffer.getvalue().decode('utf-8'))
128
129
130			if __name__ == "__main__":
131			main()
132

Filter-Bubble / e2e-Dutch

Push — master ( fcc4be...d6f0a9 )

e2edutch.predict.main() F

Complexity

Size

Duplication

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like