DependencyUtils.retrieve_edges() - Code Metrics - Inspection of "Version 3.0.0" - myedibleenso/py-processors - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 352884...922d0d )

by Gus

created 2017-03-26 10:12 UTC

DependencyUtils.retrieve_edges() B

↳ Parent: DependencyUtils

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	5
dl	0
loc	31
rs	8.0894
c	0
b	0
f	0

#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from processors.utils import LabelManager
import networkx as nx
import collections


class DependencyUtils(object):
    """
    A set of utilities for analyzing syntactic dependency graphs.

    Methods
    -------
    build_networkx_graph(roots, edges, name)
        Constructs a networkx.Graph
    shortest_path(g, start, end)
        Finds the shortest path in a `networkx.Graph` between any element in a list of start nodes and any element in a list of end nodes.
    retrieve_edges(dep_graph, path)
        Converts output of `shortest_path` into a list of triples that include the grammatical relation (and direction) for each node-node "hop" in the syntactic dependency graph.
    simplify_tag(tag)
        Maps part of speech (PoS) tag to a subset of PoS tags to better consolidate categorical labels.
    lexicalize_path(sentence, path, words=False, lemmas=False, tags=False, simple_tags=False, entities=False)
        Lexicalizes path in syntactic dependency graph using Odin-style token constraints.
    pagerank(networkx_graph, alpha=0.85, personalization=None, max_iter=1000, tol=1e-06, nstart=None, weight='weight', dangling=None)
        Measures node activity in a `networkx.Graph` using a thin wrapper around `networkx` implementation of pagerank algorithm (see `networkx.algorithms.link_analysis.pagerank`).  Use with `processors.ds.DirectedGraph.graph`.
    """

    UNKNOWN = LabelManager.UNKNOWN

    @staticmethod
    def build_networkx_graph(roots, edges, name):
        """
        Converts a `processors` dependency graph into a networkx graph
        """
        G = nx.Graph()
        graph_name = name
        # store roots
        G.graph["roots"] = roots
        edges = [(edge.source, edge.destination, {"relation": edge.relation}) for edge in edges]
        G.add_edges_from(edges)
        return G

    @staticmethod
    def shortest_path(g, start, end):
        """
        Find the shortest path between two nodes.

        Parameters
        ----------
        start : int or [int]
            A single token index or list of token indices serving as the start of the graph traversal.
        end : int or [int]
            A single token index or list of token indices serving as the end of the graph traversal.
        """
        start = start if isinstance(start, collections.Iterable) else [start]
        end = end if isinstance(end, collections.Iterable) else [end]
        # node list -> edges (i.e., (source, dest) pairs)
        def path_to_edges(g, path):
            return [(path[i], path[i+1]) for i in range(len(path) - 1)]

        shortest_paths = []
        for s in start:
            for e in end:
                try:
                    path = nx.algorithms.shortest_path(g, s, e)
                    shortest_paths.append(path_to_edges(g, path))
                # no path found...
                except:
                    #print("No path found between '{}' and '{}'".format(s, e))
                    continue
        return None if len(shortest_paths) == 0 else min(shortest_paths, key=lambda x: len(x))

    @staticmethod
    def retrieve_edges(dep_graph, path):
        """
        Converts output of Converts output of `DependencyUtils.shortest_path`
        into a list of triples that include the grammatical relation (and direction)
        for each node-node "hop" in the syntactic dependency graph.

        Parameters
        ----------
        dep_graph : processors.ds.DirectedGraph
            The `DirectedGraph` used to retrieve the grammatical relations for each edge in the `path`.
        path : [(int, int)]
            A list of tuples representing the shortest path from A to B in `dep_graph`.

        Returns
        -------
        [(int, str, int)]
            the shortest path (`path`) enhanced with the directed grammatical relations
            (ex. `>nsubj` for `predicate` to `subject` vs. `<nsubj` for `subject` to `predicate`).
        """

        shortest_path = []
        for (s, d) in path:
            # build dictionaries from incoming/outgoing
            outgoing = dep_graph.outgoing[s]
            outgoing = dict(outgoing) if len(outgoing) > 0 else dict()
            incoming = dep_graph.incoming[s]
            incoming = dict(incoming) if len(incoming) > 0 else dict()
            relation = ">{}".format(outgoing[d]) if d in outgoing else "<{}".format(incoming[d])
            shortest_path.append((s, relation, d))
        return shortest_path

    @staticmethod
    def simplify_tag(tag):
        """
        Maps part of speech (PoS) tag to a subset of PoS tags to better consolidate categorical labels.

        Parameters
        ----------
        tag : str
            The Penn-style PoS tag to be mapped to a simplified form.

        Returns
        -------
        str
            A simplified form of `tag`.  In some cases, the returned form may be identical to `tag`.
        """
        simple_tag = "\"{}\"".format(tag)
        # collapse plurals
        if tag.startswith("NNP"):
            simple_tag = "/^NNP/"
        # collapse plurals
        elif tag.startswith("NN"):
            simple_tag = "/^N/"
        elif tag.startswith("VB"):
            simple_tag = "/^V/"
        # collapse comparative, superlatives, etc.
        elif tag.startswith("JJ"):
            simple_tag = "/^J/"
        # collapse comparative, superlatives, etc.
        elif tag.startswith("RB"):
            simple_tag = "/^RB/"
        # collapse possessive/non-possesive pronouns
        elif tag.startswith("PRP"):
            simple_tag = "/^PRP/"
        # treat WH determiners as DT
        elif tag == "WDT":
            simple_tag = "/DT$/"
        # treat DT the same as WDT
        elif tag == "DT":
            simple_tag = "/DT$/"
        return simple_tag

    @staticmethod
    def lexicalize_path(sentence,
                        path,
                        words=False,
                        lemmas=False,
                        tags=False,
                        simple_tags=False,
                        entities=False):
        """
        Lexicalizes path in syntactic dependency graph using Odin-style token constraints.  Operates on output of `DependencyUtils.retrieve_edges`

        Parameters
        ----------
        sentence : processors.ds.Sentence
            The `Sentence` from which the `path` was found.  Used to lexicalize the `path`.
        words : bool
            Whether or not to encode nodes in the `path` with a token constraint constructed from `Sentence.words`
        lemmas : bool
            Whether or not to encode nodes in the `path` with a token constraint constructed from `Sentence.lemmas`
        tags : bool
            Whether or not to encode nodes in the `path` with a token constraint constructed from `Sentence.tags`
        simple_tags : bool
            Whether or not to encode nodes in the `path` with a token constraint constructed from `DependencyUtils.simplify_tag` applied to `Sentence.tags`
        entities : bool
            Whether or not to encode nodes in the `path` with a token constraint constructed from `Sentence._entities`

        Returns
        -------
        [str]
            The lexicalized form of `path`, encoded according to the specified parameters.
        """
        UNKNOWN = LabelManager.UNKNOWN
        lexicalized_path = []
        relations = []
        nodes = []
        # gather edges and nodes
        for edge in path:
            relations.append(edge[1])
            nodes.append(edge[0])
        nodes.append(path[-1][-1])

        for (i, node) in enumerate(nodes):
            # build token constraints
            token_constraints = []
            # words
            if words:
                token_constraints.append("word=\"{}\"".format(sentence.words[node]))
            # PoS tags
            if tags and sentence.tags[node] != UNKNOWN:
                token_constraints.append("tag=\"{}\"".format(sentence.tags[node]))
            # lemmas
            if lemmas and sentence.lemmas[node] != UNKNOWN:
                token_constraints.append("lemma=\"{}\"".format(sentence.lemmas[node]))
            # NE labels
            if entities and sentence._entities[node] != UNKNOWN:
                token_constraints.append("entity=\"{}\"".format(sentence.entity[node]))
            # simple tags
            if simple_tags and sentence.tags[node] != UNKNOWN:
                token_constraints.append("tag={}".format(DependencyUtils.simplify_tag(sentence.tags[node])))
            # build node pattern
            if len(token_constraints) > 0:
                node_pattern = "[{}]".format(" & ".join(token_constraints))
                # store lexicalized representation of node
                lexicalized_path.append(node_pattern)
            # append next edge
            if i < len(relations):
                lexicalized_path.append(relations[i])
        return lexicalized_path

    @staticmethod
    def pagerank(networkx_graph,
                 alpha=0.85,
                 personalization=None,
                 max_iter=1000,
                 tol=1e-06,
                 nstart=None,
                 weight='weight',
                 dangling=None):
        """
        Measures node activity in a `networkx.Graph` using a thin wrapper around `networkx` implementation of pagerank algorithm (see `networkx.algorithms.link_analysis.pagerank`).  Use with `processors.ds.DirectedGraph.graph`.

        Parameters
        ----------
        networkx_graph : networkx.Graph
            Corresponds to `G` parameter of `networkx.algorithms.link_analysis.pagerank`.

        See Also
        --------
        Method parameters correspond to those of [`networkx.algorithms.link_analysis.pagerank`](https://networkx.github.io/documentation/development/reference/generated/networkx.algorithms.link_analysis.pagerank_alg.pagerank.html#networkx.algorithms.link_analysis.pagerank_alg.pagerank)
        """
        return nx.algorithms.link_analysis.pagerank(G=networkx_graph, alpha=alpha, personalization=personalization, max_iter=max_iter, tol=tol, nstart=nstart, weight=weight, dangling=dangling)


1			#!/usr/bin/env python
2			# -- coding: utf-8 --
3			from __future__ import unicode_literals
4			from processors.utils import LabelManager
5			import networkx as nx
6			import collections
7
8
9			class DependencyUtils(object):
10			"""
11			A set of utilities for analyzing syntactic dependency graphs.
12
13			Methods
14			-------
15			build_networkx_graph(roots, edges, name)
16			Constructs a networkx.Graph
17			shortest_path(g, start, end)
18			Finds the shortest path in a `networkx.Graph` between any element in a list of start nodes and any element in a list of end nodes.
19			retrieve_edges(dep_graph, path)
20			Converts output of `shortest_path` into a list of triples that include the grammatical relation (and direction) for each node-node "hop" in the syntactic dependency graph.
21			simplify_tag(tag)
22			Maps part of speech (PoS) tag to a subset of PoS tags to better consolidate categorical labels.
23			lexicalize_path(sentence, path, words=False, lemmas=False, tags=False, simple_tags=False, entities=False)
24			Lexicalizes path in syntactic dependency graph using Odin-style token constraints.
25			pagerank(networkx_graph, alpha=0.85, personalization=None, max_iter=1000, tol=1e-06, nstart=None, weight='weight', dangling=None)
26			Measures node activity in a `networkx.Graph` using a thin wrapper around `networkx` implementation of pagerank algorithm (see `networkx.algorithms.link_analysis.pagerank`). Use with `processors.ds.DirectedGraph.graph`.
27			"""
28
29			UNKNOWN = LabelManager.UNKNOWN
30
31			@staticmethod
32			def build_networkx_graph(roots, edges, name):
33			"""
34			Converts a `processors` dependency graph into a networkx graph
35			"""
36			G = nx.Graph()
37			graph_name = name
38			# store roots
39			G.graph["roots"] = roots
40			edges = [(edge.source, edge.destination, {"relation": edge.relation}) for edge in edges]
41			G.add_edges_from(edges)
42			return G
43
44			@staticmethod
45			def shortest_path(g, start, end):
46			"""
47			Find the shortest path between two nodes.
48
49			Parameters
50			----------
51			start : int or [int]
52			A single token index or list of token indices serving as the start of the graph traversal.
53			end : int or [int]
54			A single token index or list of token indices serving as the end of the graph traversal.
55			"""
56			start = start if isinstance(start, collections.Iterable) else [start]
57			end = end if isinstance(end, collections.Iterable) else [end]
58			# node list -> edges (i.e., (source, dest) pairs)
59			def path_to_edges(g, path):
60			return [(path[i], path[i+1]) for i in range(len(path) - 1)]
61
62			shortest_paths = []
63			for s in start:
64			for e in end:
65			try:
66			path = nx.algorithms.shortest_path(g, s, e)
67			shortest_paths.append(path_to_edges(g, path))
68			# no path found...
69			except:
70			#print("No path found between '{}' and '{}'".format(s, e))
71			continue
72			return None if len(shortest_paths) == 0 else min(shortest_paths, key=lambda x: len(x))
73
74			@staticmethod
75			def retrieve_edges(dep_graph, path):
76			"""
77			Converts output of Converts output of `DependencyUtils.shortest_path`
78			into a list of triples that include the grammatical relation (and direction)
79			for each node-node "hop" in the syntactic dependency graph.
80
81			Parameters
82			----------
83			dep_graph : processors.ds.DirectedGraph
84			The `DirectedGraph` used to retrieve the grammatical relations for each edge in the `path`.
85			path : [(int, int)]
86			A list of tuples representing the shortest path from A to B in `dep_graph`.
87
88			Returns
89			-------
90			[(int, str, int)]
91			the shortest path (`path`) enhanced with the directed grammatical relations
92			(ex. `>nsubj` for `predicate` to `subject` vs. `<nsubj` for `subject` to `predicate`).
93			"""
94
95			shortest_path = []
96			for (s, d) in path:
97			# build dictionaries from incoming/outgoing
98			outgoing = dep_graph.outgoing[s]
99			outgoing = dict(outgoing) if len(outgoing) > 0 else dict()
100			incoming = dep_graph.incoming[s]
101			incoming = dict(incoming) if len(incoming) > 0 else dict()
102			relation = ">{}".format(outgoing[d]) if d in outgoing else "<{}".format(incoming[d])
103			shortest_path.append((s, relation, d))
104			return shortest_path
105
106			@staticmethod
107			def simplify_tag(tag):
108			"""
109			Maps part of speech (PoS) tag to a subset of PoS tags to better consolidate categorical labels.
110
111			Parameters
112			----------
113			tag : str
114			The Penn-style PoS tag to be mapped to a simplified form.
115
116			Returns
117			-------
118			str
119			A simplified form of `tag`. In some cases, the returned form may be identical to `tag`.
120			"""
121			simple_tag = "\"{}\"".format(tag)
122			# collapse plurals
123			if tag.startswith("NNP"):
124			simple_tag = "/^NNP/"
125			# collapse plurals
126			elif tag.startswith("NN"):
127			simple_tag = "/^N/"
128			elif tag.startswith("VB"):
129			simple_tag = "/^V/"
130			# collapse comparative, superlatives, etc.
131			elif tag.startswith("JJ"):
132			simple_tag = "/^J/"
133			# collapse comparative, superlatives, etc.
134			elif tag.startswith("RB"):
135			simple_tag = "/^RB/"
136			# collapse possessive/non-possesive pronouns
137			elif tag.startswith("PRP"):
138			simple_tag = "/^PRP/"
139			# treat WH determiners as DT
140			elif tag == "WDT":
141			simple_tag = "/DT$/"
142			# treat DT the same as WDT
143			elif tag == "DT":
144			simple_tag = "/DT$/"
145			return simple_tag
146
147			@staticmethod
148			def lexicalize_path(sentence,
149			path,
150			words=False,
151			lemmas=False,
152			tags=False,
153			simple_tags=False,
154			entities=False):
155			"""
156			Lexicalizes path in syntactic dependency graph using Odin-style token constraints. Operates on output of `DependencyUtils.retrieve_edges`
157
158			Parameters
159			----------
160			sentence : processors.ds.Sentence
161			The `Sentence` from which the `path` was found. Used to lexicalize the `path`.
162			words : bool
163			Whether or not to encode nodes in the `path` with a token constraint constructed from `Sentence.words`
164			lemmas : bool
165			Whether or not to encode nodes in the `path` with a token constraint constructed from `Sentence.lemmas`
166			tags : bool
167			Whether or not to encode nodes in the `path` with a token constraint constructed from `Sentence.tags`
168			simple_tags : bool
169			Whether or not to encode nodes in the `path` with a token constraint constructed from `DependencyUtils.simplify_tag` applied to `Sentence.tags`
170			entities : bool
171			Whether or not to encode nodes in the `path` with a token constraint constructed from `Sentence._entities`
172
173			Returns
174			-------
175			[str]
176			The lexicalized form of `path`, encoded according to the specified parameters.
177			"""
178			UNKNOWN = LabelManager.UNKNOWN
179			lexicalized_path = []
180			relations = []
181			nodes = []
182			# gather edges and nodes
183			for edge in path:
184			relations.append(edge[1])
185			nodes.append(edge[0])
186			nodes.append(path[-1][-1])
187
188			for (i, node) in enumerate(nodes):
189			# build token constraints
190			token_constraints = []
191			# words
192			if words:
193			token_constraints.append("word=\"{}\"".format(sentence.words[node]))
194			# PoS tags
195			if tags and sentence.tags[node] != UNKNOWN:
196			token_constraints.append("tag=\"{}\"".format(sentence.tags[node]))
197			# lemmas
198			if lemmas and sentence.lemmas[node] != UNKNOWN:
199			token_constraints.append("lemma=\"{}\"".format(sentence.lemmas[node]))
200			# NE labels
201			if entities and sentence._entities[node] != UNKNOWN:
202			token_constraints.append("entity=\"{}\"".format(sentence.entity[node]))
203			# simple tags
204			if simple_tags and sentence.tags[node] != UNKNOWN:
205			token_constraints.append("tag={}".format(DependencyUtils.simplify_tag(sentence.tags[node])))
206			# build node pattern
207			if len(token_constraints) > 0:
208			node_pattern = "[{}]".format(" & ".join(token_constraints))
209			# store lexicalized representation of node
210			lexicalized_path.append(node_pattern)
211			# append next edge
212			if i < len(relations):
213			lexicalized_path.append(relations[i])
214			return lexicalized_path
215
216			@staticmethod
217			def pagerank(networkx_graph,
218			alpha=0.85,
219			personalization=None,
220			max_iter=1000,
221			tol=1e-06,
222			nstart=None,
223			weight='weight',
224			dangling=None):
225			"""
226			Measures node activity in a `networkx.Graph` using a thin wrapper around `networkx` implementation of pagerank algorithm (see `networkx.algorithms.link_analysis.pagerank`). Use with `processors.ds.DirectedGraph.graph`.
227
228			Parameters
229			----------
230			networkx_graph : networkx.Graph
231			Corresponds to `G` parameter of `networkx.algorithms.link_analysis.pagerank`.
232
233			See Also
234			--------
235			Method parameters correspond to those of [`networkx.algorithms.link_analysis.pagerank`](https://networkx.github.io/documentation/development/reference/generated/networkx.algorithms.link_analysis.pagerank_alg.pagerank.html#networkx.algorithms.link_analysis.pagerank_alg.pagerank)
236			"""
237			return nx.algorithms.link_analysis.pagerank(G=networkx_graph, alpha=alpha, personalization=personalization, max_iter=max_iter, tol=tol, nstart=nstart, weight=weight, dangling=dangling)
238

myedibleenso / py-processors

Push — master ( 352884...922d0d )

DependencyUtils.retrieve_edges() B

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like