Completed
Pull Request — master (#11)
by Gus
24s
created

NLPDatum.to_JSON_dict()   A

Complexity

Conditions 1

Size

Total Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
c 1
b 0
f 0
dl 0
loc 2
rs 10
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4
# Gus Hahn-Powell 2015
5
# data structures for storing processors-server output
6
# based on conventions from the CLU lab's processors library (https://github.com/clulab/processors)
7
from __future__ import unicode_literals
8
from itertools import chain
9
from collections import defaultdict, Counter
10
from processors.paths import DependencyUtils, HeadFinder
11
from processors.utils import LabelManager
12
#from six import text_type
13
import networkx as nx
14
import json
15
import re
16
17
18
class NLPDatum(object):
19
20
    def to_JSON_dict(self):
21
        return dict()
22
23
    def to_JSON(self, pretty=True):
24
        """
25
        Returns JSON as String.
26
        """
27
        num_spaces = 4 if pretty else None
28
        return json.dumps(self.to_JSON_dict(), sort_keys=True, indent=num_spaces)
29
30
31
class Document(NLPDatum):
32
33
    """
34
    Storage class for annotated text. Based on [`org.clulab.processors.Document`](https://github.com/clulab/processors/blob/master/main/src/main/scala/org/clulab/processors/Document.scala)
35
36
    Parameters
37
    ----------
38
    sentences : [processors.ds.Sentence]
39
        The sentences comprising the `Document`.
40
41
    Attributes
42
    ----------
43
    id : str or None
44
        A unique ID for the `Document`.
45
46
    size : int
47
        The number of `sentences`.
48
49
    sentences : sentences
50
        The sentences comprising the `Document`.
51
52
    words : [str]
53
        A list of the `Document`'s tokens.
54
55
    tags : [str]
56
        A list of the `Document`'s tokens represented using part of speech (PoS) tags.
57
58
    lemmas : [str]
59
        A list of the `Document`'s tokens represented using lemmas.
60
61
    _entities : [str]
62
        A list of the `Document`'s tokens represented using IOB-style named entity (NE) labels.
63
64
    nes : dict
65
        A dictionary of NE labels represented in the `Document` -> a list of corresponding text spans.
66
67
    bag_of_labeled_deps : [str]
68
        The labeled dependencies from all sentences in the `Document`.
69
70
    bag_of_unlabeled_deps : [str]
71
        The unlabeled dependencies from all sentences in the `Document`.
72
73
    text : str or None
74
        The original text of the `Document`.
75
76
    Methods
77
    -------
78
    bag_of_labeled_dependencies_using(form)
79
        Produces a list of syntactic dependencies where each edge is labeled with its grammatical relation.
80
81
    bag_of_unlabeled_dependencies_using(form)
82
        Produces a list of syntactic dependencies where each edge is left unlabeled without its grammatical relation.
83
    """
84
85
    def __init__(self, sentences):
86
        NLPDatum.__init__(self)
87
        self.id = None
88
        self.size = len(sentences)
89
        self.sentences = sentences
90
        # easily access token attributes from all sentences
91
        self.words = list(chain(*[s.words for s in self.sentences]))
92
        self.tags = list(chain(*[s.tags for s in self.sentences]))
93
        self.lemmas = list(chain(*[s.lemmas for s in self.sentences]))
94
        self._entities = list(chain(*[s._entities for s in self.sentences]))
95
        self.nes = merge_entity_dicts = self._merge_ne_dicts()
96
        self.bag_of_labeled_deps = list(chain(*[s.dependencies.labeled for s in self.sentences]))
97
        self.bag_of_unlabeled_deps = list(chain(*[s.dependencies.unlabeled for s in self.sentences]))
98
        self.text = None
99
100
    def __hash__(self):
101
        return hash(self.to_JSON())
102
103
    def __unicode__(self):
104
        return self.text
105
106
    def __str__(self):
107
        return "Document w/ {} Sentence{}".format(self.size, "" if self.size == 1 else "s")
108
109
    def __eq__(self, other):
110
        if isinstance(other, self.__class__):
111
            return self.to_JSON() == other.to_JSON()
112
        else:
113
            return False
114
115
    def __ne__(self, other):
116
        return not self.__eq__(other)
117
118
    def bag_of_labeled_dependencies_using(self, form):
119
        return list(chain(*[s.labeled_dependencies_from_tokens(s._get_tokens(form)) for s in self.sentences]))
120
121
    def bag_of_unlabeled_dependencies_using(self, form):
122
        return list(chain(*[s.unlabeled_dependencies_from_tokens(s._get_tokens(form)) for s in self.sentences]))
123
124
    def _merge_ne_dicts(self):
125
        # Get the set of all NE labels found in the Doc's sentences
126
        entity_labels = set(chain(*[s.nes.keys() for s in self.sentences]))
127
        # Do we have any labels?
128
        if entity_labels == None:
129
            return None
130
        # If we have labels, consolidate the NEs under the appropriate label
131
        else:
132
            nes_dict = dict()
133
            for e in entity_labels:
134
                entities = []
135
                for s in self.sentences:
136
                    entities += s.nes[e]
137
                nes_dict[e] = entities
138
            return nes_dict
139
140
    def to_JSON_dict(self):
141
        doc_dict = dict()
142
        doc_dict["sentences"] = [s.to_JSON_dict() for s in self.sentences]
143
        doc_dict["text"] = self.text
144
        # can the ID be set?
145
        if self.id != None:
146
            doc_dict["id"] = self.id
147
        return doc_dict
148
149
    @staticmethod
150
    def load_from_JSON(json_dict):
151
        sentences = []
152
        for s in json_dict["sentences"]:
153
            kwargs = {
154
                "words": s["words"],
155
                "startOffsets": s["startOffsets"],
156
                "endOffsets": s["endOffsets"],
157
                "tags": s.get("tags", None),
158
                "lemmas": s.get("lemmas", None),
159
                "chunks": s.get("chunks", None),
160
                "entities": s.get("entities", None),
161
                "graphs": s.get("graphs", None)
162
            }
163
            sent = Sentence(**kwargs)
164
            sentences.append(sent)
165
        doc = Document(sentences)
166
        # set id and text
167
        doc.text = json_dict.get("text", None)
168
        doc.id = json_dict.get("id", None)
169
        return doc
170
171
172
class Sentence(NLPDatum):
173
174
    """
175
    Storage class for an annotated sentence. Based on [`org.clulab.processors.Sentence`](https://github.com/clulab/processors/blob/master/main/src/main/scala/org/clulab/processors/Sentence.scala)
176
177
    Parameters
178
    ----------
179
    text : str or None
180
        The text of the `Sentence`.
181
182
    words : [str]
183
        A list of the `Sentence`'s tokens.
184
185
    startOffsets : [int]
186
        The character offsets starting each token (inclusive).
187
188
    endOffsets : [int]
189
        The character offsets marking the end of each token (exclusive).
190
191
    tags : [str]
192
        A list of the `Sentence`'s tokens represented using part of speech (PoS) tags.
193
194
    lemmas : [str]
195
        A list of the `Sentence`'s tokens represented using lemmas.
196
197
    chunks : [str]
198
        A list of the `Sentence`'s tokens represented using IOB-style phrase labels (ex. `B-NP`, `I-NP`, `B-VP`, etc.).
199
200
    entities : [str]
201
        A list of the `Sentence`'s tokens represented using IOB-style named entity (NE) labels.
202
203
    graphs : dict
204
        A dictionary of {graph-name -> {edges: [{source, destination, relation}], roots: [int]}}
205
206
    Attributes
207
    ----------
208
    text : str
209
        The text of the `Sentence`.
210
211
    startOffsets : [int]
212
        The character offsets starting each token (inclusive).
213
214
    endOffsets : [int]
215
        The character offsets marking the end of each token (exclusive).
216
217
    length : int
218
        The number of tokens in the `Sentence`
219
220
    graphs : dict
221
        A dictionary (str -> `processors.ds.DirectedGraph`) mapping the graph type/name to a `processors.ds.DirectedGraph`.
222
223
    basic_dependencies : processors.ds.DirectedGraph
224
        A `processors.ds.DirectedGraph` using basic Stanford dependencies.
225
226
    collapsed_dependencies : processors.ds.DirectedGraph
227
        A `processors.ds.DirectedGraph` using collapsed Stanford dependencies.
228
229
    dependencies : processors.ds.DirectedGraph
230
        A pointer to the prefered syntactic dependency graph type for this `Sentence`.
231
232
    _entities : [str]
233
        The IOB-style Named Entity (NE) labels corresponding to each token.
234
235
    _chunks : [str]
236
        The IOB-style chunk labels corresponding to each token.
237
238
    nes : dict
239
        A dictionary of NE labels represented in the `Document` -> a list of corresponding text spans (ex. {"PERSON": [phrase 1, ..., phrase n]}). Built from `Sentence._entities`
240
241
    phrases : dict
242
        A dictionary of chunk labels represented in the `Document` -> a list of corresponding text spans (ex. {"NP": [phrase 1, ..., phrase n]}). Built from `Sentence._chunks`
243
244
245
    Methods
246
    -------
247
    bag_of_labeled_dependencies_using(form)
248
        Produces a list of syntactic dependencies where each edge is labeled with its grammatical relation.
249
250
    bag_of_unlabeled_dependencies_using(form)
251
        Produces a list of syntactic dependencies where each edge is left unlabeled without its grammatical relation.
252
    """
253
254
    UNKNOWN = LabelManager.UNKNOWN
255
    # the O in IOB notation
256
    O = LabelManager.O
257
258
    def __init__(self, **kwargs):
259
        NLPDatum.__init__(self)
260
        self.words = kwargs["words"]
261
        self.startOffsets = kwargs["startOffsets"]
262
        self.endOffsets = kwargs["endOffsets"]
263
        self.length = len(self.words)
264
        self.tags = self._set_toks(kwargs.get("tags", None))
265
        self.lemmas = self._set_toks(kwargs.get("lemmas", None))
266
        self._chunks = self._set_toks(kwargs.get("chunks", None))
267
        self._entities = self._set_toks(kwargs.get("entities", None))
268
        self.text = kwargs.get("text", None) or " ".join(self.words)
269
        self.graphs = self._build_directed_graph_from_dict(kwargs.get("graphs", None))
270
        self.basic_dependencies = self.graphs.get(DirectedGraph.STANFORD_BASIC_DEPENDENCIES, None)
271
        self.collapsed_dependencies = self.graphs.get(DirectedGraph.STANFORD_COLLAPSED_DEPENDENCIES, None)
272
        self.dependencies = self.collapsed_dependencies if self.collapsed_dependencies != None else self.basic_dependencies
273
        # IOB tokens -> {label: [phrase 1, ..., phrase n]}
274
        self.nes = self._handle_iob(self._entities)
275
        self.phrases = self._handle_iob(self._chunks)
276
277
    def __eq__(self, other):
278
        if isinstance(other, self.__class__):
279
            return self.to_JSON() == other.to_JSON()
280
        else:
281
            return False
282
283
    def __ne__(self, other):
284
        return not self.__eq__(other)
285
286
    def __hash__(self):
287
        return hash(self.to_JSON())
288
289
    def _get_tokens(self, form):
290
        f = form.lower()
291
        if f == "words":
292
            tokens = self.words
293
        elif f == "tags":
294
            tokens = self.tags
295
        elif f == "lemmas":
296
            tokens = self.lemmas
297
        elif f == "entities":
298
            tokens = self.nes
299
        elif f == "index":
300
            tokens = list(range(self.length))
301
        # unrecognized form
302
        else:
303
            raise Exception("""form must be 'words', 'tags', 'lemmas', or 'index'""")
304
        return tokens
305
306
    def _set_toks(self, toks):
307
        return toks if toks else [Sentence.UNKNOWN]*self.length
308
309
    def _handle_iob(self, iob):
310
        """
311
        Consolidates consecutive tokens in IOB notation under the appropriate label.
312
        Regexs control for bionlp annotator, which uses IOB notation.
313
        """
314
        entity_dict = defaultdict(list)
315
        # initialize to empty label
316
        current = Sentence.O
317
        start = None
318
        end = None
319
        for i, tok in enumerate(iob):
320
            # we don't have an I or O
321
            if tok == Sentence.O:
322
                # did we have an entity with the last token?
323
                current = re.sub('(B-|I-)','', str(current))
324
                if current == Sentence.O:
325
                    continue
326
                else:
327
                    # the last sequence has ended
328
                    end = i
329
                    # store the entity
330
                    named_entity = ' '.join(self.words[start:end])
331
                    entity_dict[current].append(named_entity)
332
                    # reset our book-keeping vars
333
                    current = Sentence.O
334
                    start = None
335
                    end = None
336
            # we have a tag!
337
            else:
338
                # our old sequence continues
339
                current = re.sub('(B-|I-)','', str(current))
340
                tok = re.sub('(B-|I-)','', str(tok))
341
                if tok == current:
342
                    end = i
343
                # our old sequence has ended
344
                else:
345
                    # do we have a previous NE?
346
                    if current != Sentence.O:
347
                        end = i
348
                        named_entity = ' '.join(self.words[start:end])
349
                        entity_dict[current].append(named_entity)
350
                    # update our book-keeping vars
351
                    current = tok
352
                    start = i
353
                    end = None
354
        # this might be empty
355
        return entity_dict
356
357
    def _build_directed_graph_from_dict(self, graphs):
358
        deps_dict = dict()
359
        if graphs and len(graphs) > 0:
360
            # process each stored graph
361
            for (kind, deps) in graphs.items():
362
                deps_dict[kind] = DirectedGraph(kind, deps, self.words)
363
            return deps_dict
364
        return None
365
366
    def __unicode__(self):
367
        return self.text
368
369
    def to_string(self):
370
        return ' '.join("{w}__{p}".format(w=self.words[i],p=self.tags[i]) for i in range(self.length))
371
372
    def bag_of_labeled_dependencies_using(self, form):
373
        """
374
        Produces a list of syntactic dependencies
375
        where each edge is labeled with its grammatical relation.
376
        """
377
        tokens = self._get_tokens(form)
378
        return self.labeled_dependencies_from_tokens(tokens) if tokens else None
379
380
    def bag_of_unlabeled_dependencies_using(self, form):
381
        """
382
        Produces a list of syntactic dependencies
383
        where each edge is left unlabeled without its grammatical relation.
384
        """
385
        tokens = self._get_tokens(form)
386
        return self.unlabeled_dependencies_from_tokens(tokens) if tokens else None
387
388
    def labeled_dependencies_from_tokens(self, tokens):
389
        """
390
        Generates a list of labeled dependencies for a sentence
391
        using the provided tokens
392
        """
393
        deps = self.dependencies
394
        labeled = []
395
        return [(tokens[out], rel, tokens[dest]) \
396
                for out in deps.outgoing \
397
                for (dest, rel) in deps.outgoing[out]]
398
399
    def unlabeled_dependencies_from_tokens(self, tokens):
400
        """
401
        Generate a list of unlabeled dependencies for a sentence
402
        using the provided tokens
403
        """
404
        return [(head, dep) for (head, rel, dep) in self.labeled_dependencies_from_tokens(tokens)]
405
406
    def semantic_head(self, graph_name="stanford-collapsed", valid_tags={r"^N", "VBG"}, valid_indices=None):
407
        return HeadFinder.semantic_head(self, graph_name, valid_tags, valid_indices)
408
409
    def to_JSON_dict(self):
410
        sentence_dict = dict()
411
        sentence_dict["words"] = self.words
412
        sentence_dict["startOffsets"] = self.startOffsets
413
        sentence_dict["endOffsets"] = self.endOffsets
414
        sentence_dict["tags"] = self.tags
415
        sentence_dict["lemmas"] = self.lemmas
416
        sentence_dict["entities"] = self._entities
417
        # add graphs
418
        sentence_dict["graphs"] = dict()
419
        for (kind, graph) in self.graphs.items():
420
            sentence_dict["graphs"][kind] = graph._graph_to_JSON_dict()
421
        return sentence_dict
422
423
    @staticmethod
424
    def load_from_JSON(json_dict):
425
        sent = Sentence(
426
                    words=json_dict["words"],
427
                    startOffsets=json_dict["startOffsets"],
428
                    endOffsets=json_dict["endOffsets"],
429
                    lemmas=json_dict.get("lemmas", None),
430
                    tags=json_dict.get("tags", None),
431
                    entities=json_dict.get("entities", None),
432
                    text=json_dict.get("text", None),
433
                    graphs=json_dict.get("graphs", None)
434
                    )
435
        return sent
436
437
438
class Edge(NLPDatum):
439
440
    def __init__(self, source, destination, relation):
441
        NLPDatum.__init__(self)
442
        self.source = source
443
        self.destination = destination
444
        self.relation = relation
445
446
    def __unicode__(self):
447
        return self.to_string()
448
449
    def to_string(self):
450
        return "Edge(source: {}, destination: {}, relation: {})".format(self.source, self.destination, self.relation)
451
452
    def __eq__(self, other):
453
        if isinstance(other, self.__class__):
454
            return self.to_JSON() == other.to_JSON()
455
        else:
456
            return False
457
458
    def to_JSON_dict(self):
459
        edge_dict = dict()
460
        edge_dict["source"] = self.source
461
        edge_dict["destination"] = self.destination
462
        edge_dict["relation"] = self.relation
463
        return edge_dict
464
465
class DirectedGraph(NLPDatum):
466
467
    """
468
    Storage class for directed graphs.
469
470
471
    Parameters
472
    ----------
473
    kind : str
474
        The name of the directed graph.
475
476
    deps : dict
477
        A dictionary of {edges: [{source, destination, relation}], roots: [int]}
478
479
    words : [str]
480
        A list of the word form of the tokens from the originating `Sentence`.
481
482
    Attributes
483
    ----------
484
    _words : [str]
485
        A list of the word form of the tokens from the originating `Sentence`.
486
487
    roots : [int]
488
        A list of indices for the syntactic dependency graph's roots.  Generally this is a single token index.
489
490
    edges: list[processors.ds.Edge]
491
        A list of `processors.ds.Edge`
492
493
    incoming : A dictionary of {int -> [int]} encoding the incoming edges for each node in the graph.
494
495
    outgoing : A dictionary of {int -> [int]} encoding the outgoing edges for each node in the graph.
496
497
    labeled : [str]
498
        A list of strings where each element in the list represents an edge encoded as source index, relation, and destination index ("source_relation_destination").
499
500
    unlabeled : [str]
501
        A list of strings where each element in the list represents an edge encoded as source index and destination index ("source_destination").
502
503
    graph : networkx.Graph
504
        A `networkx.graph` representation of the `DirectedGraph`.  Used by `shortest_path`
505
506
    Methods
507
    -------
508
    bag_of_labeled_dependencies_from_tokens(form)
509
        Produces a list of syntactic dependencies where each edge is labeled with its grammatical relation.
510
    bag_of_unlabeled_dependencies_from_tokens(form)
511
        Produces a list of syntactic dependencies where each edge is left unlabeled without its grammatical relation.
512
    """
513
    STANFORD_BASIC_DEPENDENCIES = "stanford-basic"
514
    STANFORD_COLLAPSED_DEPENDENCIES = "stanford-collapsed"
515
516
    def __init__(self, kind, deps, words):
517
        NLPDatum.__init__(self)
518
        self._words = [w.lower() for w in words]
519
        self.kind = kind
520
        self.roots = deps.get("roots", [])
521
        self.edges = [Edge(e["source"], e["destination"], e["relation"]) for e in deps["edges"]]
522
        self.incoming = self._build_incoming(self.edges)
523
        self.outgoing = self._build_outgoing(self.edges)
524
        self.labeled = self._build_labeled()
525
        self.unlabeled = self._build_unlabeled()
526
        self.directed_graph = DependencyUtils.build_networkx_graph(roots=self.roots, edges=self.edges, name=self.kind, reverse=False)
527
        self.undirected_graph = self.directed_graph.to_undirected()
528
529
    def __unicode__(self):
530
        return self.edges
531
532
    def __eq__(self, other):
533
        if isinstance(other, self.__class__):
534
            return self.to_JSON() == other.to_JSON()
535
        else:
536
            return False
537
538
    def __ne__(self, other):
539
        return not self.__eq__(other)
540
541
    def __hash__(self):
542
        return hash(self.to_JSON())
543
544
    def shortest_paths(self, start, end):
545
        """
546
        Find the shortest paths in the syntactic depedency graph
547
        between the provided start and end nodes.
548
549
        Parameters
550
        ----------
551
        start : int or [int]
552
            A single token index or list of token indices serving as the start of the graph traversal.
553
554
        end : int or [int]
555
            A single token index or list of token indices serving as the end of the graph traversal.
556
557
        See Also
558
        --------
559
        `processors.paths.DependencyUtils.shortest_path`
560
        """
561
        paths = DependencyUtils.shortest_paths(self.undirected_graph, start, end)
562
        return None if not paths else [DependencyUtils.retrieve_edges(self, path) for path in paths]
563
564
    def shortest_path(self, start, end, scoring_func=lambda path: -len(path)):
565
        """
566
        Find the shortest path in the syntactic depedency graph
567
        between the provided start and end nodes.
568
569
        Parameters
570
        ----------
571
        start : int or [int]
572
            A single token index or list of token indices serving as the start of the graph traversal.
573
574
        end : int or [int]
575
            A single token index or list of token indices serving as the end of the graph traversal.
576
577
        scoring_func : function
578
            A function that scores each path in a list of [(source index, directed relation, destination index)] paths.  Each path has the form [(source index, relation, destination index)].
579
            The path with the maximum score will be returned.
580
581
        See Also
582
        --------
583
        `processors.paths.DependencyUtils.shortest_path`
584
        """
585
        paths = self.shortest_paths(start, end)
586
        return None if not paths else max(paths, key=scoring_func)
587
588
    def degree_centrality(self):
589
        """
590
        Compute the degree centrality for nodes.
591
592
        See Also
593
        --------
594
        https://networkx.github.io/documentation/development/reference/algorithms.centrality.html
595
        """
596
        return Counter(nx.degree_centrality(self.directed_graph))
597
598
    def in_degree_centrality(self):
599
        """
600
        Compute the in-degree centrality for nodes.
601
602
        See Also
603
        --------
604
        https://networkx.github.io/documentation/development/reference/algorithms.centrality.html
605
        """
606
        return Counter(nx.in_degree_centrality(self.directed_graph))
607
608
    def out_degree_centrality(self):
609
        """
610
        Compute the out-degree centrality for nodes.
611
612
        See Also
613
        --------
614
        https://networkx.github.io/documentation/development/reference/algorithms.centrality.html
615
        """
616
        return Counter(nx.out_degree_centrality(self.directed_graph))
617
618
    def pagerank(self,
619
                 alpha=0.85,
620
                 personalization=None,
621
                 max_iter=1000,
622
                 tol=1e-06,
623
                 nstart=None,
624
                 weight='weight',
625
                 dangling=None,
626
                 use_directed=True,
627
                 reverse=True):
628
        """
629
        Measures node activity in a `networkx.Graph` using a thin wrapper around `networkx` implementation of pagerank algorithm (see `networkx.algorithms.link_analysis.pagerank`).  Use with `processors.ds.DirectedGraph.graph`.
630
        Note that by default, the directed graph is reversed in order to highlight predicate-argument nodes (refer to pagerank algorithm to understand why).
631
632
        See Also
633
        --------
634
        `processors.paths.DependencyUtils.pagerank`
635
        Method parameters correspond to those of [`networkx.algorithms.link_analysis.pagerank`](https://networkx.github.io/documentation/development/reference/generated/networkx.algorithms.link_analysis.pagerank_alg.pagerank.html#networkx.algorithms.link_analysis.pagerank_alg.pagerank)
636
        """
637
        # check whether or not to reverse directed graph
638
        dg = self.directed_graph if not reverse else DependencyUtils.build_networkx_graph(roots=self.roots, edges=self.edges, name=self.kind, reverse=True)
639
        # determine graph to use
640
        graph = dg if use_directed else self.undirected_graph
641
        return DependencyUtils.pagerank(graph, alpha=alpha, personalization=personalization, max_iter=max_iter, tol=tol, nstart=nstart, weight=weight, dangling=dangling)
642
643
    def _build_incoming(self, edges):
644
        dep_dict = defaultdict(list)
645
        for edge in edges:
646
            dep_dict[edge.destination].append((edge.source, edge.relation))
647
        return dep_dict
648
649
    def _build_outgoing(self, edges):
650
        dep_dict = defaultdict(list)
651
        for edge in edges:
652
            dep_dict[edge.source].append((edge.destination, edge.relation))
653
        return dep_dict
654
655
    def _build_labeled(self):
656
        labeled = []
657
        for out in self.outgoing:
658
            for (dest, rel) in self.outgoing[out]:
659
                labeled.append("{}_{}_{}".format(self._words[out], rel.upper(), self._words[dest]))
660
        return labeled
661
662
    def _build_unlabeled(self):
663
        unlabeled = []
664
        for out in self.outgoing:
665
            for (dest, _) in self.outgoing[out]:
666
                unlabeled.append("{}_{}".format(self._words[out], self._words[dest]))
667
        return unlabeled
668
669
    def _graph_to_JSON_dict(self):
670
        dg_dict = dict()
671
        dg_dict["edges"] = [e.to_JSON_dict() for e in self.edges]
672
        dg_dict["roots"] = self.roots
673
        return dg_dict
674
675
    def to_JSON_dict(self):
676
        return {self.kind:self._graph_to_JSON_dict()}
677
678
679
class Interval(NLPDatum):
680
    """
681
    Defines a token or character span
682
683
    Parameters
684
    ----------
685
    start : str
686
        The token or character index where the interval begins.
687
688
    end : str
689
        The 1 + the index of the last token/character in the span.
690
    """
691
692
    def __init__(self, start, end):
693
        NLPDatum.__init__(self)
694
        self.start = start
695
        self.end = end
696
697
    def to_JSON_dict(self):
698
        return {"start":self.start, "end":self.end}
699
700
    @staticmethod
701
    def load_from_JSON(json):
702
        return Interval(start=json["start"], end=json["end"])
703