e2edutch.predict - Code Metrics - Inspection of "Merge branch 'master' of github.com:Filter-Bubble/..." - Filter-Bubble/e2e-Dutch - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( fcc4be...d6f0a9 )

by Dafne van

created 2020-11-23 10:51 UTC

e2edutch.predict A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	132
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	17
eloc	108
dl	0
loc	132
rs	10
c	0
b	0
f	0

3 Functions

Rating	Name	Size	Complexity
A	get_parser()	15	1
A	read_jsonlines()	4	2
F	main()	86	14

import sys

import json
import os
import io
import collections
import argparse
import logging

from e2edutch import conll
from e2edutch import minimize
from e2edutch import util
from e2edutch import coref_model as cm
from e2edutch import naf

import tensorflow.compat.v1 as tf

tf.disable_v2_behavior()


def get_parser():

    parser = argparse.ArgumentParser()
    parser.add_argument('config')
    parser.add_argument('input_filename')
    parser.add_argument('-o', '--output_file',
                        type=argparse.FileType('w'), default=sys.stdout)
    parser.add_argument('-f', '--format_out', default='conll',
                        choices=['conll', 'jsonlines', 'naf'])
    parser.add_argument('-c', '--word_col', type=int, default=2)
    parser.add_argument('--cfg_file',
                        type=str,
                        default=None,
                        help="config file")
    parser.add_argument('-v', '--verbose', action='store_true')
    return parser


def read_jsonlines(input_filename):

    for line in open(input_filename).readlines():
        example = json.loads(line)
        yield example


def main(args=None):

    parser = get_parser()
    args = parser.parse_args()
    if args.verbose:
        logging.basicConfig(level=logging.DEBUG)
    config = util.initialize_from_env(args.config, args.cfg_file)

    # Input file in .jsonlines format or .conll.
    input_filename = args.input_filename

    ext_input = os.path.splitext(input_filename)[-1]
    if ext_input not in ['.conll', '.jsonlines', '.txt', '.naf']:
        raise Exception(
            'Input file should be .naf, .conll, .txt or .jsonlines, but is {}.'
            .format(ext_input))

    if ext_input == '.conll':
        labels = collections.defaultdict(set)
        stats = collections.defaultdict(int)
        docs = minimize.minimize_partition(
            input_filename, labels, stats, args.word_col)
    elif ext_input == '.jsonlines':
        docs = read_jsonlines(input_filename)
    elif ext_input == '.naf':
        naf_obj = naf.get_naf(input_filename)
        jsonlines_obj, term_ids, tok_ids = naf.get_jsonlines(naf_obj)

        docs = [jsonlines_obj]
    else:
        text = open(input_filename).read()
        docs = [util.create_example(text)]

    output_file = args.output_file
    model = cm.CorefModel(config)
    sentences = {}
    predictions = {}
    with tf.Session() as session:
        model.restore(session)
        for example_num, example in enumerate(docs):
            # logging.info(example['doc_key'])
            tensorized_example = model.tensorize_example(
                example, is_training=False)
            feed_dict = {i: t for i, t in zip(

                model.input_tensors, tensorized_example)}
            _, _, _, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores = session.run(

                model.predictions, feed_dict=feed_dict)
            predicted_antecedents = model.get_predicted_antecedents(
                top_antecedents, top_antecedent_scores)
            example["predicted_clusters"], _ = model.get_predicted_clusters(
                top_span_starts, top_span_ends, predicted_antecedents)
            if args.format_out == 'jsonlines':
                output_file.write(json.dumps(example))
                output_file.write("\n")
            else:
                predictions[example['doc_key']] = example["predicted_clusters"]
                sentences[example['doc_key']] = example["sentences"]
            if example_num % 100 == 0:
                logging.info("Decoded {} examples.".format(example_num + 1))

        if args.format_out == 'conll':
            conll.output_conll(output_file, sentences, predictions)
        elif args.format_out == 'naf':
            # Check number of docs - what to do if multiple?
            # Create naf obj if input format was not naf
            if ext_input != '.naf':
                # To do: add linguistic processing layers for terms and tokens
                logging.warn(

                    'Outputting NAF when input was not naf,'
                    + 'no dependency information available')
                for doc_key in sentences:
                    naf_obj, term_ids = naf.get_naf_from_sentences(
                        sentences[doc_key])
                    naf_obj = naf.create_coref_layer(
                        naf_obj, predictions[doc_key], term_ids)
                    naf_obj = naf.add_linguistic_processors(naf_obj)
                    buffer = io.BytesIO()
                    naf_obj.dump(buffer)
                    output_file.write(buffer.getvalue().decode('utf-8'))
                    # To do, make sepearate outputs?
                    # TO do, use dependency information from conll?
            else:
                # We only have one input doc
                naf_obj = naf.create_coref_layer(
                    naf_obj, example["predicted_clusters"], term_ids)

                naf_obj = naf.add_linguistic_processors(naf_obj)
                buffer = io.BytesIO()
                naf_obj.dump(buffer)
                output_file.write(buffer.getvalue().decode('utf-8'))


if __name__ == "__main__":
    main()


1			import sys
			0 ignored issues – show introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report Missing module docstring Loading history...
2			import json
3			import os
4			import io
5			import collections
6			import argparse
7			import logging
8
9			from e2edutch import conll
10			from e2edutch import minimize
11			from e2edutch import util
12			from e2edutch import coref_model as cm
13			from e2edutch import naf
14
15			import tensorflow.compat.v1 as tf
			0 ignored issues – show introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report Unable to import 'tensorflow.compat.v1' Loading history... introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report third party import "import tensorflow.compat.v1 as tf" should be placed before "from e2edutch import conll" Loading history...
16			tf.disable_v2_behavior()
17
18
19			def get_parser():
			0 ignored issues – show introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
20			parser = argparse.ArgumentParser()
21			parser.add_argument('config')
22			parser.add_argument('input_filename')
23			parser.add_argument('-o', '--output_file',
24			type=argparse.FileType('w'), default=sys.stdout)
25			parser.add_argument('-f', '--format_out', default='conll',
26			choices=['conll', 'jsonlines', 'naf'])
27			parser.add_argument('-c', '--word_col', type=int, default=2)
28			parser.add_argument('--cfg_file',
29			type=str,
30			default=None,
31			help="config file")
32			parser.add_argument('-v', '--verbose', action='store_true')
33			return parser
34
35
36			def read_jsonlines(input_filename):
			0 ignored issues – show introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
37			for line in open(input_filename).readlines():
38			example = json.loads(line)
39			yield example
40
41
42			def main(args=None):
			0 ignored issues – show introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history... Comprehensibility introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report This function exceeds the maximum number of variables (30/15). Loading history...
43			parser = get_parser()
44			args = parser.parse_args()
45			if args.verbose:
46			logging.basicConfig(level=logging.DEBUG)
47			config = util.initialize_from_env(args.config, args.cfg_file)
48
49			# Input file in .jsonlines format or .conll.
50			input_filename = args.input_filename
51
52			ext_input = os.path.splitext(input_filename)[-1]
53			if ext_input not in ['.conll', '.jsonlines', '.txt', '.naf']:
54			raise Exception(
55			'Input file should be .naf, .conll, .txt or .jsonlines, but is {}.'
56			.format(ext_input))
57
58			if ext_input == '.conll':
59			labels = collections.defaultdict(set)
60			stats = collections.defaultdict(int)
61			docs = minimize.minimize_partition(
62			input_filename, labels, stats, args.word_col)
63			elif ext_input == '.jsonlines':
64			docs = read_jsonlines(input_filename)
65			elif ext_input == '.naf':
66			naf_obj = naf.get_naf(input_filename)
67			jsonlines_obj, term_ids, tok_ids = naf.get_jsonlines(naf_obj)
			0 ignored issues – show Unused Code introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report The variable `tok_ids` seems to be unused. Loading history...
68			docs = [jsonlines_obj]
69			else:
70			text = open(input_filename).read()
71			docs = [util.create_example(text)]
72
73			output_file = args.output_file
74			model = cm.CorefModel(config)
75			sentences = {}
76			predictions = {}
77			with tf.Session() as session:
78			model.restore(session)
79			for example_num, example in enumerate(docs):
80			# logging.info(example['doc_key'])
81			tensorized_example = model.tensorize_example(
82			example, is_training=False)
83			feed_dict = {i: t for i, t in zip(
			0 ignored issues – show Unused Code introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report Unnecessary use of a comprehension Loading history...
84			model.input_tensors, tensorized_example)}
85			_, _, _, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores = session.run(
			0 ignored issues – show Coding Style introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (106/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
86			model.predictions, feed_dict=feed_dict)
87			predicted_antecedents = model.get_predicted_antecedents(
88			top_antecedents, top_antecedent_scores)
89			example["predicted_clusters"], _ = model.get_predicted_clusters(
90			top_span_starts, top_span_ends, predicted_antecedents)
91			if args.format_out == 'jsonlines':
92			output_file.write(json.dumps(example))
93			output_file.write("\n")
94			else:
95			predictions[example['doc_key']] = example["predicted_clusters"]
96			sentences[example['doc_key']] = example["sentences"]
97			if example_num % 100 == 0:
98			logging.info("Decoded {} examples.".format(example_num + 1))
			0 ignored issues – show introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report Use lazy % formatting in logging functions Loading history...
99			if args.format_out == 'conll':
100			conll.output_conll(output_file, sentences, predictions)
101			elif args.format_out == 'naf':
102			# Check number of docs - what to do if multiple?
103			# Create naf obj if input format was not naf
104			if ext_input != '.naf':
105			# To do: add linguistic processing layers for terms and tokens
106			logging.warn(
			0 ignored issues – show introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report Using deprecated method warn() Loading history... Coding Style Best Practice introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report Use lazy % formatting in logging functions Loading history...
107			'Outputting NAF when input was not naf,'
108			+ 'no dependency information available')
109			for doc_key in sentences:
110			naf_obj, term_ids = naf.get_naf_from_sentences(
111			sentences[doc_key])
112			naf_obj = naf.create_coref_layer(
113			naf_obj, predictions[doc_key], term_ids)
114			naf_obj = naf.add_linguistic_processors(naf_obj)
115			buffer = io.BytesIO()
116			naf_obj.dump(buffer)
117			output_file.write(buffer.getvalue().decode('utf-8'))
118			# To do, make sepearate outputs?
119			# TO do, use dependency information from conll?
120			else:
121			# We only have one input doc
122			naf_obj = naf.create_coref_layer(
123			naf_obj, example["predicted_clusters"], term_ids)
			0 ignored issues – show introduced 2020-10-27 15:20 UTC by Report Bug Copy Issue Report The variable `naf_obj` does not seem to be defined for all execution paths. Loading history... introduced 2020-10-27 15:20 UTC by Report Bug Copy Issue Report The variable `term_ids` does not seem to be defined for all execution paths. Loading history... introduced 2020-10-27 15:20 UTC by Report Bug Copy Issue Report The variable `example` does not seem to be defined in case the `for` loop on line `79` is not entered. Are you sure this can never be the case? Loading history... Bug introduced 2020-11-23 10:55 UTC by Report Bug Copy Issue Report The loop variable `example` might not be defined here. Loading history...
124			naf_obj = naf.add_linguistic_processors(naf_obj)
125			buffer = io.BytesIO()
126			naf_obj.dump(buffer)
127			output_file.write(buffer.getvalue().decode('utf-8'))
128
129
130			if __name__ == "__main__":
131			main()
132

Filter-Bubble / e2e-Dutch

Push — master ( fcc4be...d6f0a9 )

e2edutch.predict A

Complexity

Size/Duplication

Importance

3 Functions

Duplication Side-by-Side

Filter issues like