e2edutch.stanza.clusterSetter() - Code Metrics - Inspection of "Merge pull request #26 from Filter-Bubble/processo..." - Filter-Bubble/e2e-Dutch - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( cc3c01...072717 )

by Dafne van

created 2021-01-28 09:51 UTC

e2edutch.stanza.clusterSetter() A

↳ Parent: e2edutch.stanza

Complexity

Conditions

Size

Total Lines	5
Code Lines	4

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	4
dl	0
loc	5
rs	10
c	0
b	0
f	0
cc	2
nop	2

import os

import stanza
import logging


from pathlib import Path


from e2edutch import util
from e2edutch import coref_model as cm

from e2edutch.download import download_data
from e2edutch.predict import Predictor

from stanza.pipeline.processor import Processor, register_processor

from stanza.models.common.doc import Document, Span



# Add a Clusters property to documents as a List of List of Span:
# Clusters is a List of cluster, cluster is a List of Span
def clusterSetter(self, value):

    if isinstance(value, type([])):
        self._clusters = value
class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
    else:
        logger.error('Clusters must be a List')

stanza.models.common.doc.Document.add_property('clusters', default='[]', setter=clusterSetter)


import tensorflow.compat.v1 as tf


logger = logging.getLogger('e2edutch')
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler())


@register_processor('coref')
class CorefProcessor(Processor):
    ''' Processor that appends coreference information '''
    _requires = set(['tokenize'])
    _provides = set(['coref'])
    

    def __init__(self, config, pipeline, use_gpu):
class SomeParent:
    def __init__(self):
        self.x = 1

class SomeChild(SomeParent):
    def __init__(self):
        # Initialize the super class
        SomeParent.__init__(self)
        # Make e2edutch follow Stanza's GPU settings:
        # set the environment value for GPU, so that initialize_from_env picks it up.
        #if use_gpu:
        #    os.environ['GPU'] = ' '.join(tf.config.experimental.list_physical_devices('GPU'))
        #else:
        #    if 'GPU' in os.environ['GPU'] :
        #        os.environ.pop('GPU')

        self.e2econfig = util.initialize_from_env(model_name='final')

        # Override datapath and log_root:
        # store e2edata with the Stanza resources, ie. a 'stanza_resources/nl/coref' directory
        self.e2econfig['datapath'] = Path(config['model_path']).parent
        self.e2econfig['log_root'] = Path(config['model_path']).parent

        # Download data files if not present
        download_data(self.e2econfig)

        # Start and stop a session to cache all models
        predictor = Predictor(config=self.e2econfig)
        predictor.end_session()

    def _set_up_model(self, *args):
class Foo:
    def some_method(self, x, y):
        return x + y;
        print ('_set_up_model')

        pass


    def process(self, doc):


        predictor = Predictor(config=self.e2econfig)

        # build the example argument for predict:
        #   example (dict): dict with the following fields:
        #                     sentences ([[str]])
        #                     doc_id (str)
        #                     clusters ([[(int, int)]]) (optional)
        example = {}
        example['sentences'] = []
        example['doc_id'] = 'document_from_stanza'  # TODO check what this should be

        example['doc_key'] = 'undocumented'  # TODO check what this should be


        for sentence in doc.sentences:
            s = []

            for word in sentence.words:
                s.append(word.text)
            example['sentences'].append(s)

        predicted_clusters = predictor.predict(example)  # a list of tuples

        # Add the predicted clusters back to the Stanza document

        clusters = []
        for predicted_cluster in predicted_clusters:  # a tuple of entities
            cluster = []
            for predicted_reference in predicted_cluster:  # a tuple of (start, end) word
                start, end = predicted_reference

                # find the sentence_id of the sentence containing this reference
                sentence_id = 0
                sentence = doc.sentences[0]
                sentence_start_word = 0
                sentence_end_word = len(sentence.words) - 1

                while sentence_end_word < start:
                    sentence_start_word = sentence_end_word + 1

                    # move to the next sentence
                    sentence_id += 1
                    sentence = doc.sentences[sentence_id]

                    sentence_end_word = sentence_start_word + len(sentence.words) - 1

                # start counting words from the start of this sentence
                start -= sentence_start_word
                end -= sentence_start_word

                span = Span(  # a list of Tokens
                        tokens=[word.parent for word in sentence.words[start:end + 1]],

                        doc=doc,

                        type='COREF',

                        sent=doc.sentences[sentence_id]

                        )

                cluster.append(span)

            clusters.append(cluster)

        doc.clusters = clusters

        predictor.end_session()

        return doc


1			import os
			0 ignored issues – show introduced 2021-01-25 09:19 UTC by Report Bug Copy Issue Report Missing module docstring Loading history... Unused Code introduced 2021-01-25 09:19 UTC by Report Bug Copy Issue Report The import `os` seems to be unused. Loading history...
2			import stanza
3			import logging
			0 ignored issues – show introduced 2021-01-25 09:19 UTC by Report Bug Copy Issue Report standard import "import logging" should be placed before "import stanza" Loading history...
4
5			from pathlib import Path
			0 ignored issues – show introduced 2021-01-25 09:19 UTC by Report Bug Copy Issue Report standard import "from pathlib import Path" should be placed before "import stanza" Loading history...
6
7			from e2edutch import util
8			from e2edutch import coref_model as cm
			0 ignored issues – show Unused Code introduced 2021-01-25 09:19 UTC by Report Bug Copy Issue Report Unused coref_model imported from e2edutch as cm Loading history...
9			from e2edutch.download import download_data
10			from e2edutch.predict import Predictor
11
12			from stanza.pipeline.processor import Processor, register_processor
			0 ignored issues – show introduced 2021-01-25 13:55 UTC by Report Bug Copy Issue Report third party import "from stanza.pipeline.processor import Processor, register_processor" should be placed before "from e2edutch import util" Loading history...
13			from stanza.models.common.doc import Document, Span
			0 ignored issues – show Unused Code introduced 2021-01-25 09:19 UTC by Report Bug Copy Issue Report Unused Document imported from stanza.models.common.doc Loading history... introduced 2021-01-25 19:01 UTC by Report Bug Copy Issue Report third party import "from stanza.models.common.doc import Document, Span" should be placed before "from e2edutch import util" Loading history...
14
15
16			# Add a Clusters property to documents as a List of List of Span:
17			# Clusters is a List of cluster, cluster is a List of Span
18			def clusterSetter(self, value):
			0 ignored issues – show Coding Style Naming introduced 2021-01-25 19:01 UTC by Report Bug Copy Issue Report Function name "clusterSetter" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... introduced 2021-01-25 19:01 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
19			if isinstance(value, type([])):
20			self._clusters = value
			0 ignored issues – show Coding Style Best Practice introduced 2021-01-26 15:58 UTC by Report Bug Copy Issue Report It seems like `_clusters` was declared protected and should not be accessed from this context. Prefixing a member variable `_` is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class: class MyParent: def __init__(self): self._x = 1; self.y = 2; class MyChild(MyParent): def some_method(self): return self._x # Ok, since accessed from a child class class AnotherClass: def some_method(self, instance_of_my_child): return instance_of_my_child._x # Would be flagged as AnotherClass is not # a child class of MyParent Loading history...
21			else:
22			logger.error('Clusters must be a List')
23
24			stanza.models.common.doc.Document.add_property('clusters', default='[]', setter=clusterSetter)
25
26
27			import tensorflow.compat.v1 as tf
			0 ignored issues – show introduced 2021-01-25 09:19 UTC by Report Bug Copy Issue Report Unable to import 'tensorflow.compat.v1' Loading history... introduced 2020-12-17 15:16 UTC by Report Bug Copy Issue Report Import "import tensorflow.compat.v1 as tf" should be placed at the top of the module Loading history... Unused Code introduced 2021-01-25 09:19 UTC by Report Bug Copy Issue Report Unused tensorflow.compat.v1 imported as tf Loading history... introduced 2021-01-25 13:55 UTC by Report Bug Copy Issue Report third party import "import tensorflow.compat.v1 as tf" should be placed before "from e2edutch import util" Loading history...
28
29			logger = logging.getLogger('e2edutch')
30			logger.setLevel(logging.INFO)
31			logger.addHandler(logging.StreamHandler())
32
33
34			@register_processor('coref')
35			class CorefProcessor(Processor):
36			''' Processor that appends coreference information '''
37			_requires = set(['tokenize'])
38			_provides = set(['coref'])
39
			0 ignored issues – show Coding Style introduced 2021-01-25 09:19 UTC by Report Bug Copy Issue Report Trailing whitespace Loading history...
40			def __init__(self, config, pipeline, use_gpu):
			0 ignored issues – show Bug introduced 2021-01-25 13:55 UTC by Report Bug Copy Issue Report The `__init__` method of the super-class `Processor` is not called. It is generally advisable to initialize the super-class by calling its `__init__` method: class SomeParent: def __init__(self): self.x = 1 class SomeChild(SomeParent): def __init__(self): # Initialize the super class SomeParent.__init__(self) Loading history...
41			# Make e2edutch follow Stanza's GPU settings:
42			# set the environment value for GPU, so that initialize_from_env picks it up.
43			#if use_gpu:
44			# os.environ['GPU'] = ' '.join(tf.config.experimental.list_physical_devices('GPU'))
45			#else:
46			# if 'GPU' in os.environ['GPU'] :
47			# os.environ.pop('GPU')
48
49			self.e2econfig = util.initialize_from_env(model_name='final')
50
51			# Override datapath and log_root:
52			# store e2edata with the Stanza resources, ie. a 'stanza_resources/nl/coref' directory
53			self.e2econfig['datapath'] = Path(config['model_path']).parent
54			self.e2econfig['log_root'] = Path(config['model_path']).parent
55
56			# Download data files if not present
57			download_data(self.e2econfig)
58
59			# Start and stop a session to cache all models
60			predictor = Predictor(config=self.e2econfig)
61			predictor.end_session()
62
63			def _set_up_model(self, *args):
			0 ignored issues – show Unused Code introduced 2021-01-25 09:19 UTC by Report Bug Copy Issue Report The argument `args` seems to be unused. Loading history... Coding Style introduced 2021-01-25 09:19 UTC by Report Bug Copy Issue Report This method could be written as a function/class method. If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo: def some_method(self, x, y): return x + y; could be written as class Foo: @classmethod def some_method(cls, x, y): return x + y; Loading history...
64			print ('_set_up_model')
			0 ignored issues – show Coding Style introduced 2021-01-25 09:19 UTC by Report Bug Copy Issue Report No space allowed before bracket Loading history...
65			pass
			0 ignored issues – show Unused Code introduced 2021-01-25 09:19 UTC by Report Bug Copy Issue Report Unnecessary pass statement Loading history...
66
67			def process(self, doc):
			0 ignored issues – show Comprehensibility introduced 2021-01-25 19:01 UTC by Report Bug Copy Issue Report This function exceeds the maximum number of variables (18/15). Loading history...
68
69			predictor = Predictor(config=self.e2econfig)
70
71			# build the example argument for predict:
72			# example (dict): dict with the following fields:
73			# sentences ([[str]])
74			# doc_id (str)
75			# clusters ([[(int, int)]]) (optional)
76			example = {}
77			example['sentences'] = []
78			example['doc_id'] = 'document_from_stanza' # TODO check what this should be
			0 ignored issues – show Coding Style introduced 2021-01-25 19:01 UTC by Report Bug Copy Issue Report `TODO` and `FIXME` comments should generally be avoided. Loading history...
79			example['doc_key'] = 'undocumented' # TODO check what this should be
			0 ignored issues – show Coding Style introduced 2021-01-25 19:01 UTC by Report Bug Copy Issue Report `TODO` and `FIXME` comments should generally be avoided. Loading history...
80
81			for sentence in doc.sentences:
82			s = []
			0 ignored issues – show Coding Style Naming introduced 2021-01-25 19:01 UTC by Report Bug Copy Issue Report Variable name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
83			for word in sentence.words:
84			s.append(word.text)
85			example['sentences'].append(s)
86
87			predicted_clusters = predictor.predict(example) # a list of tuples
88
89			# Add the predicted clusters back to the Stanza document
90
91			clusters = []
92			for predicted_cluster in predicted_clusters: # a tuple of entities
93			cluster = []
94			for predicted_reference in predicted_cluster: # a tuple of (start, end) word
95			start, end = predicted_reference
96
97			# find the sentence_id of the sentence containing this reference
98			sentence_id = 0
99			sentence = doc.sentences[0]
100			sentence_start_word = 0
101			sentence_end_word = len(sentence.words) - 1
102
103			while sentence_end_word < start:
104			sentence_start_word = sentence_end_word + 1
105
106			# move to the next sentence
107			sentence_id += 1
108			sentence = doc.sentences[sentence_id]
109
110			sentence_end_word = sentence_start_word + len(sentence.words) - 1
111
112			# start counting words from the start of this sentence
113			start -= sentence_start_word
114			end -= sentence_start_word
115
116			span = Span( # a list of Tokens
117			tokens=[word.parent for word in sentence.words[start:end + 1]],
			0 ignored issues – show Coding Style introduced 2021-01-25 19:01 UTC by Report Bug Copy Issue Report Wrong hanging indentation (remove 4 spaces). Loading history...
118			doc=doc,
			0 ignored issues – show Coding Style introduced 2021-01-25 19:01 UTC by Report Bug Copy Issue Report Wrong hanging indentation (remove 4 spaces). Loading history...
119			type='COREF',
			0 ignored issues – show Coding Style introduced 2021-01-25 19:01 UTC by Report Bug Copy Issue Report Wrong hanging indentation (remove 4 spaces). Loading history...
120			sent=doc.sentences[sentence_id]
			0 ignored issues – show Coding Style introduced 2021-01-25 19:01 UTC by Report Bug Copy Issue Report Wrong hanging indentation (remove 4 spaces). Loading history...
121			)
			0 ignored issues – show Coding Style introduced 2021-01-25 19:01 UTC by Report Bug Copy Issue Report Wrong hanging indentation. Loading history...
122			cluster.append(span)
123
124			clusters.append(cluster)
125
126			doc.clusters = clusters
127
128			predictor.end_session()
129
130			return doc
131

Filter-Bubble / e2e-Dutch

Push — master ( cc3c01...072717 )

e2edutch.stanza.clusterSetter() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like