Completed
Push — master ( 41d21e...c78683 )
by Gus
54s queued 18s
created

Sentence.bag_of_unlabeled_dependencies_using()   A

Complexity

Conditions 2

Size

Total Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
dl 0
loc 7
rs 9.4285
c 0
b 0
f 0
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4
# Gus Hahn-Powell 2015
5
# data structures for storing processors-server output
6
# based on conventions from the CLU lab's processors library (https://github.com/clulab/processors)
7
from __future__ import unicode_literals
8
from itertools import chain
9
from collections import defaultdict, Counter
10
from processors.paths import DependencyUtils, HeadFinder
11
from processors.utils import LabelManager
12
#from six import text_type
13
import networkx as nx
14
import json
15
import re
16
17
18
class Document(object):
19
20
    """
21
    Storage class for annotated text. Based on [`org.clulab.processors.Document`](https://github.com/clulab/processors/blob/master/main/src/main/scala/org/clulab/processors/Document.scala)
22
23
    Parameters
24
    ----------
25
    sentences : [processors.ds.Sentence]
26
        The sentences comprising the `Document`.
27
28
    Attributes
29
    ----------
30
    id : str or None
31
        A unique ID for the `Document`.
32
33
    size : int
34
        The number of `sentences`.
35
36
    sentences : sentences
37
        The sentences comprising the `Document`.
38
39
    words : [str]
40
        A list of the `Document`'s tokens.
41
42
    tags : [str]
43
        A list of the `Document`'s tokens represented using part of speech (PoS) tags.
44
45
    lemmas : [str]
46
        A list of the `Document`'s tokens represented using lemmas.
47
48
    _entities : [str]
49
        A list of the `Document`'s tokens represented using IOB-style named entity (NE) labels.
50
51
    nes : dict
52
        A dictionary of NE labels represented in the `Document` -> a list of corresponding text spans.
53
54
    bag_of_labeled_deps : [str]
55
        The labeled dependencies from all sentences in the `Document`.
56
57
    bag_of_unlabeled_deps : [str]
58
        The unlabeled dependencies from all sentences in the `Document`.
59
60
    text : str or None
61
        The original text of the `Document`.
62
63
    Methods
64
    -------
65
    bag_of_labeled_dependencies_using(form)
66
        Produces a list of syntactic dependencies where each edge is labeled with its grammatical relation.
67
68
    bag_of_unlabeled_dependencies_using(form)
69
        Produces a list of syntactic dependencies where each edge is left unlabeled without its grammatical relation.
70
    """
71
72
    def __init__(self, sentences):
73
        self.id = None
74
        self.size = len(sentences)
75
        self.sentences = sentences
76
        # easily access token attributes from all sentences
77
        self.words = list(chain(*[s.words for s in self.sentences]))
78
        self.tags = list(chain(*[s.tags for s in self.sentences]))
79
        self.lemmas = list(chain(*[s.lemmas for s in self.sentences]))
80
        self._entities = list(chain(*[s._entities for s in self.sentences]))
81
        self.nes = merge_entity_dicts = self._merge_ne_dicts()
82
        self.bag_of_labeled_deps = list(chain(*[s.dependencies.labeled for s in self.sentences]))
83
        self.bag_of_unlabeled_deps = list(chain(*[s.dependencies.unlabeled for s in self.sentences]))
84
        self.text = None
85
86
    def __hash__(self):
87
        return hash(self.to_JSON())
88
89
    def __unicode__(self):
90
        return self.text
91
92
    def __str__(self):
93
        return "Document w/ {} Sentence{}".format(self.size, "" if self.size == 1 else "s")
94
95
    def __eq__(self, other):
96
        if isinstance(other, self.__class__):
97
            return self.to_JSON() == other.to_JSON()
98
        else:
99
            return False
100
101
    def __ne__(self, other):
102
        return not self.__eq__(other)
103
104
    def bag_of_labeled_dependencies_using(self, form):
105
        return list(chain(*[s.labeled_dependencies_from_tokens(s._get_tokens(form)) for s in self.sentences]))
106
107
    def bag_of_unlabeled_dependencies_using(self, form):
108
        return list(chain(*[s.unlabeled_dependencies_from_tokens(s._get_tokens(form)) for s in self.sentences]))
109
110
    def _merge_ne_dicts(self):
111
        # Get the set of all NE labels found in the Doc's sentences
112
        entity_labels = set(chain(*[s.nes.keys() for s in self.sentences]))
113
        # Do we have any labels?
114
        if entity_labels == None:
115
            return None
116
        # If we have labels, consolidate the NEs under the appropriate label
117
        else:
118
            nes_dict = dict()
119
            for e in entity_labels:
120
                entities = []
121
                for s in self.sentences:
122
                    entities += s.nes[e]
123
                nes_dict[e] = entities
124
            return nes_dict
125
126
    def to_JSON_dict(self):
127
        doc_dict = dict()
128
        doc_dict["sentences"] = [s.to_JSON_dict() for s in self.sentences]
129
        doc_dict["text"] = self.text
130
        # can the ID be set?
131
        if self.id != None:
132
            doc_dict["id"] = self.id
133
        return doc_dict
134
135
    def to_JSON(self, pretty=True):
136
        """
137
        Returns JSON as String.
138
        """
139
        num_spaces = 4 if pretty else 0
140
        return json.dumps(self.to_JSON_dict(), sort_keys=True, indent=num_spaces)
141
142
    @staticmethod
143
    def load_from_JSON(json_dict):
144
        sentences = []
145
        for s in json_dict["sentences"]:
146
            kwargs = {
147
                "words": s["words"],
148
                "startOffsets": s["startOffsets"],
149
                "endOffsets": s["endOffsets"],
150
                "tags": s.get("tags", None),
151
                "lemmas": s.get("lemmas", None),
152
                "chunks": s.get("chunks", None),
153
                "entities": s.get("entities", None),
154
                "graphs": s.get("graphs", None)
155
            }
156
            sent = Sentence(**kwargs)
157
            sentences.append(sent)
158
        doc = Document(sentences)
159
        # set id and text
160
        doc.text = json_dict.get("text", None)
161
        doc.id = json_dict.get("id", None)
162
        return doc
163
164
165
class Sentence(object):
166
167
    """
168
    Storage class for an annotated sentence. Based on [`org.clulab.processors.Sentence`](https://github.com/clulab/processors/blob/master/main/src/main/scala/org/clulab/processors/Sentence.scala)
169
170
    Parameters
171
    ----------
172
    text : str or None
173
        The text of the `Sentence`.
174
175
    words : [str]
176
        A list of the `Sentence`'s tokens.
177
178
    startOffsets : [int]
179
        The character offsets starting each token (inclusive).
180
181
    endOffsets : [int]
182
        The character offsets marking the end of each token (exclusive).
183
184
    tags : [str]
185
        A list of the `Sentence`'s tokens represented using part of speech (PoS) tags.
186
187
    lemmas : [str]
188
        A list of the `Sentence`'s tokens represented using lemmas.
189
190
    chunks : [str]
191
        A list of the `Sentence`'s tokens represented using IOB-style phrase labels (ex. `B-NP`, `I-NP`, `B-VP`, etc.).
192
193
    entities : [str]
194
        A list of the `Sentence`'s tokens represented using IOB-style named entity (NE) labels.
195
196
    graphs : dict
197
        A dictionary of {graph-name -> {edges: [{source, destination, relation}], roots: [int]}}
198
199
    Attributes
200
    ----------
201
    text : str
202
        The text of the `Sentence`.
203
204
    startOffsets : [int]
205
        The character offsets starting each token (inclusive).
206
207
    endOffsets : [int]
208
        The character offsets marking the end of each token (exclusive).
209
210
    length : int
211
        The number of tokens in the `Sentence`
212
213
    graphs : dict
214
        A dictionary (str -> `processors.ds.DirectedGraph`) mapping the graph type/name to a `processors.ds.DirectedGraph`.
215
216
    basic_dependencies : processors.ds.DirectedGraph
217
        A `processors.ds.DirectedGraph` using basic Stanford dependencies.
218
219
    collapsed_dependencies : processors.ds.DirectedGraph
220
        A `processors.ds.DirectedGraph` using collapsed Stanford dependencies.
221
222
    dependencies : processors.ds.DirectedGraph
223
        A pointer to the prefered syntactic dependency graph type for this `Sentence`.
224
225
    _entities : [str]
226
        The IOB-style Named Entity (NE) labels corresponding to each token.
227
228
    _chunks : [str]
229
        The IOB-style chunk labels corresponding to each token.
230
231
    nes : dict
232
        A dictionary of NE labels represented in the `Document` -> a list of corresponding text spans (ex. {"PERSON": [phrase 1, ..., phrase n]}). Built from `Sentence._entities`
233
234
    phrases : dict
235
        A dictionary of chunk labels represented in the `Document` -> a list of corresponding text spans (ex. {"NP": [phrase 1, ..., phrase n]}). Built from `Sentence._chunks`
236
237
238
    Methods
239
    -------
240
    bag_of_labeled_dependencies_using(form)
241
        Produces a list of syntactic dependencies where each edge is labeled with its grammatical relation.
242
243
    bag_of_unlabeled_dependencies_using(form)
244
        Produces a list of syntactic dependencies where each edge is left unlabeled without its grammatical relation.
245
    """
246
247
    UNKNOWN = LabelManager.UNKNOWN
248
    # the O in IOB notation
249
    O = LabelManager.O
250
251
    def __init__(self, **kwargs):
252
        self.words = kwargs["words"]
253
        self.startOffsets = kwargs["startOffsets"]
254
        self.endOffsets = kwargs["endOffsets"]
255
        self.length = len(self.words)
256
        self.tags = self._set_toks(kwargs.get("tags", None))
257
        self.lemmas = self._set_toks(kwargs.get("lemmas", None))
258
        self._chunks = self._set_toks(kwargs.get("chunks", None))
259
        self._entities = self._set_toks(kwargs.get("entities", None))
260
        self.text = kwargs.get("text", None) or " ".join(self.words)
261
        self.graphs = self._build_directed_graph_from_dict(kwargs.get("graphs", None))
262
        self.basic_dependencies = self.graphs.get(DirectedGraph.STANFORD_BASIC_DEPENDENCIES, None)
263
        self.collapsed_dependencies = self.graphs.get(DirectedGraph.STANFORD_COLLAPSED_DEPENDENCIES, None)
264
        self.dependencies = self.collapsed_dependencies if self.collapsed_dependencies != None else self.basic_dependencies
265
        # IOB tokens -> {label: [phrase 1, ..., phrase n]}
266
        self.nes = self._handle_iob(self._entities)
267
        self.phrases = self._handle_iob(self._chunks)
268
269
    def __eq__(self, other):
270
        if isinstance(other, self.__class__):
271
            return self.to_JSON() == other.to_JSON()
272
        else:
273
            return False
274
275
    def __ne__(self, other):
276
        return not self.__eq__(other)
277
278
    def __hash__(self):
279
        return hash(self.to_JSON())
280
281
    def _get_tokens(self, form):
282
        f = form.lower()
283
        if f == "words":
284
            tokens = self.words
285
        elif f == "tags":
286
            tokens = self.tags
287
        elif f == "lemmas":
288
            tokens = self.lemmas
289
        elif f == "entities":
290
            tokens = self.nes
291
        elif f == "index":
292
            tokens = list(range(self.length))
293
        # unrecognized form
294
        else:
295
            raise Exception("""form must be 'words', 'tags', 'lemmas', or 'index'""")
296
        return tokens
297
298
    def _set_toks(self, toks):
299
        return toks if toks else [Sentence.UNKNOWN]*self.length
300
301
    def _handle_iob(self, iob):
302
        """
303
        Consolidates consecutive tokens in IOB notation under the appropriate label.
304
        Regexs control for bionlp annotator, which uses IOB notation.
305
        """
306
        entity_dict = defaultdict(list)
307
        # initialize to empty label
308
        current = Sentence.O
309
        start = None
310
        end = None
311
        for i, tok in enumerate(iob):
312
            # we don't have an I or O
313
            if tok == Sentence.O:
314
                # did we have an entity with the last token?
315
                current = re.sub('(B-|I-)','', str(current))
316
                if current == Sentence.O:
317
                    continue
318
                else:
319
                    # the last sequence has ended
320
                    end = i
321
                    # store the entity
322
                    named_entity = ' '.join(self.words[start:end])
323
                    entity_dict[current].append(named_entity)
324
                    # reset our book-keeping vars
325
                    current = Sentence.O
326
                    start = None
327
                    end = None
328
            # we have a tag!
329
            else:
330
                # our old sequence continues
331
                current = re.sub('(B-|I-)','', str(current))
332
                tok = re.sub('(B-|I-)','', str(tok))
333
                if tok == current:
334
                    end = i
335
                # our old sequence has ended
336
                else:
337
                    # do we have a previous NE?
338
                    if current != Sentence.O:
339
                        end = i
340
                        named_entity = ' '.join(self.words[start:end])
341
                        entity_dict[current].append(named_entity)
342
                    # update our book-keeping vars
343
                    current = tok
344
                    start = i
345
                    end = None
346
        # this might be empty
347
        return entity_dict
348
349
    def _build_directed_graph_from_dict(self, graphs):
350
        deps_dict = dict()
351
        if graphs and len(graphs) > 0:
352
            # process each stored graph
353
            for (kind, deps) in graphs.items():
354
                deps_dict[kind] = DirectedGraph(kind, deps, self.words)
355
            return deps_dict
356
        return None
357
358
    def __unicode__(self):
359
        return self.text
360
361
    def to_string(self):
362
        return ' '.join("{w}__{p}".format(w=self.words[i],p=self.tags[i]) for i in range(self.length))
363
364
    def bag_of_labeled_dependencies_using(self, form):
365
        """
366
        Produces a list of syntactic dependencies
367
        where each edge is labeled with its grammatical relation.
368
        """
369
        tokens = self._get_tokens(form)
370
        return self.labeled_dependencies_from_tokens(tokens) if tokens else None
371
372
    def bag_of_unlabeled_dependencies_using(self, form):
373
        """
374
        Produces a list of syntactic dependencies
375
        where each edge is left unlabeled without its grammatical relation.
376
        """
377
        tokens = self._get_tokens(form)
378
        return self.unlabeled_dependencies_from_tokens(tokens) if tokens else None
379
380
    def labeled_dependencies_from_tokens(self, tokens):
381
        """
382
        Generates a list of labeled dependencies for a sentence
383
        using the provided tokens
384
        """
385
        deps = self.dependencies
386
        labeled = []
387
        return [(tokens[out], rel, tokens[dest]) \
388
                for out in deps.outgoing \
389
                for (dest, rel) in deps.outgoing[out]]
390
391
    def unlabeled_dependencies_from_tokens(self, tokens):
392
        """
393
        Generate a list of unlabeled dependencies for a sentence
394
        using the provided tokens
395
        """
396
        return [(head, dep) for (head, rel, dep) in self.labeled_dependencies_from_tokens(tokens)]
397
398
    def semantic_head(self, graph_name="stanford-collapsed", valid_tags={r"^N", "VBG"}, valid_indices=None):
399
        return HeadFinder.semantic_head(self, graph_name, valid_tags, valid_indices)
400
401
    def to_JSON_dict(self):
402
        sentence_dict = dict()
403
        sentence_dict["words"] = self.words
404
        sentence_dict["startOffsets"] = self.startOffsets
405
        sentence_dict["endOffsets"] = self.endOffsets
406
        sentence_dict["tags"] = self.tags
407
        sentence_dict["lemmas"] = self.lemmas
408
        sentence_dict["entities"] = self._entities
409
        # add graphs
410
        sentence_dict["graphs"] = dict()
411
        for (kind, graph) in self.graphs.items():
412
            sentence_dict["graphs"][kind] = graph._graph_to_JSON_dict()
413
        return sentence_dict
414
415
    def to_JSON(self):
416
        return json.dumps(self.to_JSON_dict(), sort_keys=True, indent=4)
417
418
    @staticmethod
419
    def load_from_JSON(json_dict):
420
        sent = Sentence(
421
                    words=json_dict["words"],
422
                    startOffsets=json_dict["startOffsets"],
423
                    endOffsets=json_dict["endOffsets"],
424
                    lemmas=json_dict.get("lemmas", None),
425
                    tags=json_dict.get("tags", None),
426
                    entities=json_dict.get("entities", None),
427
                    text=json_dict.get("text", None),
428
                    graphs=json_dict.get("graphs", None)
429
                    )
430
        return sent
431
432
433
class Edge(object):
434
435
    def __init__(self, source, destination, relation):
436
        self.source = source
437
        self.destination = destination
438
        self.relation = relation
439
440
    def __unicode__(self):
441
        return self.to_string()
442
443
    def to_string(self):
444
        return "Edge(source: {}, destination: {}, relation: {})".format(self.source, self.destination, self.relation)
445
446
    def __eq__(self, other):
447
        if isinstance(other, self.__class__):
448
            return self.to_JSON() == other.to_JSON()
449
        else:
450
            return False
451
452
    def to_JSON_dict(self):
453
        edge_dict = dict()
454
        edge_dict["source"] = self.source
455
        edge_dict["destination"] = self.destination
456
        edge_dict["relation"] = self.relation
457
        return edge_dict
458
459
    def to_JSON(self):
460
        return json.dumps(self.to_JSON_dict(), sort_keys=True, indent=4)
461
462
463
class DirectedGraph(object):
464
465
    """
466
    Storage class for directed graphs.
467
468
469
    Parameters
470
    ----------
471
    kind : str
472
        The name of the directed graph.
473
474
    deps : dict
475
        A dictionary of {edges: [{source, destination, relation}], roots: [int]}
476
477
    words : [str]
478
        A list of the word form of the tokens from the originating `Sentence`.
479
480
    Attributes
481
    ----------
482
    _words : [str]
483
        A list of the word form of the tokens from the originating `Sentence`.
484
485
    roots : [int]
486
        A list of indices for the syntactic dependency graph's roots.  Generally this is a single token index.
487
488
    edges: list[processors.ds.Edge]
489
        A list of `processors.ds.Edge`
490
491
    incoming : A dictionary of {int -> [int]} encoding the incoming edges for each node in the graph.
492
493
    outgoing : A dictionary of {int -> [int]} encoding the outgoing edges for each node in the graph.
494
495
    labeled : [str]
496
        A list of strings where each element in the list represents an edge encoded as source index, relation, and destination index ("source_relation_destination").
497
498
    unlabeled : [str]
499
        A list of strings where each element in the list represents an edge encoded as source index and destination index ("source_destination").
500
501
    graph : networkx.Graph
502
        A `networkx.graph` representation of the `DirectedGraph`.  Used by `shortest_path`
503
504
    Methods
505
    -------
506
    bag_of_labeled_dependencies_from_tokens(form)
507
        Produces a list of syntactic dependencies where each edge is labeled with its grammatical relation.
508
    bag_of_unlabeled_dependencies_from_tokens(form)
509
        Produces a list of syntactic dependencies where each edge is left unlabeled without its grammatical relation.
510
    """
511
    STANFORD_BASIC_DEPENDENCIES = "stanford-basic"
512
    STANFORD_COLLAPSED_DEPENDENCIES = "stanford-collapsed"
513
514
    def __init__(self, kind, deps, words):
515
        self._words = [w.lower() for w in words]
516
        self.kind = kind
517
        self.roots = deps.get("roots", [])
518
        self.edges = [Edge(e["source"], e["destination"], e["relation"]) for e in deps["edges"]]
519
        self.incoming = self._build_incoming(self.edges)
520
        self.outgoing = self._build_outgoing(self.edges)
521
        self.labeled = self._build_labeled()
522
        self.unlabeled = self._build_unlabeled()
523
        self.directed_graph = DependencyUtils.build_networkx_graph(roots=self.roots, edges=self.edges, name=self.kind, reverse=False)
524
        self.undirected_graph = self.directed_graph.to_undirected()
525
526
    def __unicode__(self):
527
        return self.edges
528
529
    def __eq__(self, other):
530
        if isinstance(other, self.__class__):
531
            return self.to_JSON() == other.to_JSON()
532
        else:
533
            return False
534
535
    def __ne__(self, other):
536
        return not self.__eq__(other)
537
538
    def __hash__(self):
539
        return hash(self.to_JSON())
540
541
    def shortest_paths(self, start, end):
542
        """
543
        Find the shortest paths in the syntactic depedency graph
544
        between the provided start and end nodes.
545
546
        Parameters
547
        ----------
548
        start : int or [int]
549
            A single token index or list of token indices serving as the start of the graph traversal.
550
551
        end : int or [int]
552
            A single token index or list of token indices serving as the end of the graph traversal.
553
554
        See Also
555
        --------
556
        `processors.paths.DependencyUtils.shortest_path`
557
        """
558
        paths = DependencyUtils.shortest_paths(self.undirected_graph, start, end)
559
        return None if not paths else [DependencyUtils.retrieve_edges(self, path) for path in paths]
560
561
    def shortest_path(self, start, end, scoring_func=lambda path: -len(path)):
562
        """
563
        Find the shortest path in the syntactic depedency graph
564
        between the provided start and end nodes.
565
566
        Parameters
567
        ----------
568
        start : int or [int]
569
            A single token index or list of token indices serving as the start of the graph traversal.
570
571
        end : int or [int]
572
            A single token index or list of token indices serving as the end of the graph traversal.
573
574
        scoring_func : function
575
            A function that scores each path in a list of [(source index, directed relation, destination index)] paths.  Each path has the form [(source index, relation, destination index)].
576
            The path with the maximum score will be returned.
577
578
        See Also
579
        --------
580
        `processors.paths.DependencyUtils.shortest_path`
581
        """
582
        paths = self.shortest_paths(start, end)
583
        return None if not paths else max(paths, key=scoring_func)
584
585
    def degree_centrality(self):
586
        """
587
        Compute the degree centrality for nodes.
588
589
        See Also
590
        --------
591
        https://networkx.github.io/documentation/development/reference/algorithms.centrality.html
592
        """
593
        return Counter(nx.degree_centrality(self.directed_graph))
594
595
    def in_degree_centrality(self):
596
        """
597
        Compute the in-degree centrality for nodes.
598
599
        See Also
600
        --------
601
        https://networkx.github.io/documentation/development/reference/algorithms.centrality.html
602
        """
603
        return Counter(nx.in_degree_centrality(self.directed_graph))
604
605
    def out_degree_centrality(self):
606
        """
607
        Compute the out-degree centrality for nodes.
608
609
        See Also
610
        --------
611
        https://networkx.github.io/documentation/development/reference/algorithms.centrality.html
612
        """
613
        return Counter(nx.out_degree_centrality(self.directed_graph))
614
615
    def pagerank(self,
616
                 alpha=0.85,
617
                 personalization=None,
618
                 max_iter=1000,
619
                 tol=1e-06,
620
                 nstart=None,
621
                 weight='weight',
622
                 dangling=None,
623
                 use_directed=True,
624
                 reverse=True):
625
        """
626
        Measures node activity in a `networkx.Graph` using a thin wrapper around `networkx` implementation of pagerank algorithm (see `networkx.algorithms.link_analysis.pagerank`).  Use with `processors.ds.DirectedGraph.graph`.
627
        Note that by default, the directed graph is reversed in order to highlight predicate-argument nodes (refer to pagerank algorithm to understand why).
628
629
        See Also
630
        --------
631
        `processors.paths.DependencyUtils.pagerank`
632
        Method parameters correspond to those of [`networkx.algorithms.link_analysis.pagerank`](https://networkx.github.io/documentation/development/reference/generated/networkx.algorithms.link_analysis.pagerank_alg.pagerank.html#networkx.algorithms.link_analysis.pagerank_alg.pagerank)
633
        """
634
        # check whether or not to reverse directed graph
635
        dg = self.directed_graph if not reverse else DependencyUtils.build_networkx_graph(roots=self.roots, edges=self.edges, name=self.kind, reverse=True)
636
        # determine graph to use
637
        graph = dg if use_directed else self.undirected_graph
638
        return DependencyUtils.pagerank(graph, alpha=alpha, personalization=personalization, max_iter=max_iter, tol=tol, nstart=nstart, weight=weight, dangling=dangling)
639
640
    def _build_incoming(self, edges):
641
        dep_dict = defaultdict(list)
642
        for edge in edges:
643
            dep_dict[edge.destination].append((edge.source, edge.relation))
644
        return dep_dict
645
646
    def _build_outgoing(self, edges):
647
        dep_dict = defaultdict(list)
648
        for edge in edges:
649
            dep_dict[edge.source].append((edge.destination, edge.relation))
650
        return dep_dict
651
652
    def _build_labeled(self):
653
        labeled = []
654
        for out in self.outgoing:
655
            for (dest, rel) in self.outgoing[out]:
656
                labeled.append("{}_{}_{}".format(self._words[out], rel.upper(), self._words[dest]))
657
        return labeled
658
659
    def _build_unlabeled(self):
660
        unlabeled = []
661
        for out in self.outgoing:
662
            for (dest, _) in self.outgoing[out]:
663
                unlabeled.append("{}_{}".format(self._words[out], self._words[dest]))
664
        return unlabeled
665
666
    def _graph_to_JSON_dict(self):
667
        dg_dict = dict()
668
        dg_dict["edges"] = [e.to_JSON_dict() for e in self.edges]
669
        dg_dict["roots"] = self.roots
670
        return dg_dict
671
672
    def to_JSON_dict(self):
673
        return {self.kind:self._graph_to_JSON_dict()}
674
675
    def to_JSON(self):
676
        return json.dumps(self.to_JSON_dict(), sort_keys=True, indent=4)
677
678
679
class Interval(object):
680
    """
681
    Defines a token or character span
682
683
    Parameters
684
    ----------
685
    start : str
686
        The token or character index where the interval begins.
687
688
    end : str
689
        The 1 + the index of the last token/character in the span.
690
    """
691
692
    def __init__(self, start, end):
693
        self.start = start
694
        self.end = end
695
696
    def to_JSON_dict(self):
697
        return {"start":self.start, "end":self.end}
698
699
    def to_JSON(self):
700
        return json.dumps(self.to_JSON_dict(), sort_keys=True, indent=4)
701
702
    @staticmethod
703
    def load_from_JSON(json):
704
        return Interval(start=json["start"], end=json["end"])
705