Completed
Push — master ( 8c21e2...b98938 )
by Gus
01:05
created

DirectedGraph.shortest_paths()   A

Complexity

Conditions 3

Size

Total Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
dl 0
loc 19
rs 9.4285
c 0
b 0
f 0
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4
# Gus Hahn-Powell 2015
5
# data structures for storing processors-server output
6
# based on conventions from the CLU lab's processors library (https://github.com/clulab/processors)
7
from __future__ import unicode_literals
8
from itertools import chain
9
from collections import defaultdict, Counter
10
from processors.paths import DependencyUtils
11
from processors.utils import LabelManager
12
#from six import text_type
13
import networkx as nx
14
import json
15
import re
16
17
18
class Document(object):
19
20
    """
21
    Storage class for annotated text. Based on [`org.clulab.processors.Document`](https://github.com/clulab/processors/blob/master/main/src/main/scala/org/clulab/processors/Document.scala)
22
23
    Parameters
24
    ----------
25
    sentences : [processors.ds.Sentence]
26
        The sentences comprising the `Document`.
27
28
    Attributes
29
    ----------
30
    id : str or None
31
        A unique ID for the `Document`.
32
33
    size : int
34
        The number of `sentences`.
35
36
    sentences : sentences
37
        The sentences comprising the `Document`.
38
39
    words : [str]
40
        A list of the `Document`'s tokens.
41
42
    tags : [str]
43
        A list of the `Document`'s tokens represented using part of speech (PoS) tags.
44
45
    lemmas : [str]
46
        A list of the `Document`'s tokens represented using lemmas.
47
48
    _entities : [str]
49
        A list of the `Document`'s tokens represented using IOB-style named entity (NE) labels.
50
51
    nes : dict
52
        A dictionary of NE labels represented in the `Document` -> a list of corresponding text spans.
53
54
    bag_of_labeled_deps : [str]
55
        The labeled dependencies from all sentences in the `Document`.
56
57
    bag_of_unlabeled_deps : [str]
58
        The unlabeled dependencies from all sentences in the `Document`.
59
60
    text : str or None
61
        The original text of the `Document`.
62
63
    Methods
64
    -------
65
    bag_of_labeled_dependencies_using(form)
66
        Produces a list of syntactic dependencies where each edge is labeled with its grammatical relation.
67
68
    bag_of_unlabeled_dependencies_using(form)
69
        Produces a list of syntactic dependencies where each edge is left unlabeled without its grammatical relation.
70
    """
71
72
    def __init__(self, sentences):
73
        self.id = None
74
        self.size = len(sentences)
75
        self.sentences = sentences
76
        # easily access token attributes from all sentences
77
        self.words = list(chain(*[s.words for s in self.sentences]))
78
        self.tags = list(chain(*[s.tags for s in self.sentences]))
79
        self.lemmas = list(chain(*[s.lemmas for s in self.sentences]))
80
        self._entities = list(chain(*[s._entities for s in self.sentences]))
81
        self.nes = merge_entity_dicts = self._merge_ne_dicts()
82
        self.bag_of_labeled_deps = list(chain(*[s.dependencies.labeled for s in self.sentences]))
83
        self.bag_of_unlabeled_deps = list(chain(*[s.dependencies.unlabeled for s in self.sentences]))
84
        self.text = None
85
86
    def __hash__(self):
87
        return hash(self.to_JSON())
88
89
    def __unicode__(self):
90
        return self.text
91
92
    def __str__(self):
93
        return "Document w/ {} Sentence{}".format(self.size, "" if self.size == 1 else "s")
94
95
    def __eq__(self, other):
96
        if isinstance(other, self.__class__):
97
            return self.to_JSON() == other.to_JSON()
98
        else:
99
            return False
100
101
    def __ne__(self, other):
102
        return not self.__eq__(other)
103
104
    def bag_of_labeled_dependencies_using(self, form):
105
        return list(chain(*[s.labeled_dependencies_using(s._get_tokens(form)) for s in self.sentences]))
106
107
    def bag_of_unlabeled_dependencies_using(self, form):
108
        return list(chain(*[s.unlabeled_dependencies_using(s._get_tokens(form)) for s in self.sentences]))
109
110
    def _merge_ne_dicts(self):
111
        # Get the set of all NE labels found in the Doc's sentences
112
        entity_labels = set(chain(*[s.nes.keys() for s in self.sentences]))
113
        # Do we have any labels?
114
        if entity_labels == None:
115
            return None
116
        # If we have labels, consolidate the NEs under the appropriate label
117
        else:
118
            nes_dict = dict()
119
            for e in entity_labels:
120
                entities = []
121
                for s in self.sentences:
122
                    entities += s.nes[e]
123
                nes_dict[e] = entities
124
            return nes_dict
125
126
    def to_JSON_dict(self):
127
        doc_dict = dict()
128
        doc_dict["sentences"] = [s.to_JSON_dict() for s in self.sentences]
129
        doc_dict["text"] = self.text
130
        # can the ID be set?
131
        if self.id != None:
132
            doc_dict["id"] = self.id
133
        return doc_dict
134
135
    def to_JSON(self, pretty=True):
136
        """
137
        Returns JSON as String.
138
        """
139
        num_spaces = 4 if pretty else 0
140
        return json.dumps(self.to_JSON_dict(), sort_keys=True, indent=num_spaces)
141
142
    @staticmethod
143
    def load_from_JSON(json_dict):
144
        sentences = []
145
        for s in json_dict["sentences"]:
146
            kwargs = {
147
                "words": s["words"],
148
                "startOffsets": s["startOffsets"],
149
                "endOffsets": s["endOffsets"],
150
                "tags": s.get("tags", None),
151
                "lemmas": s.get("lemmas", None),
152
                "chunks": s.get("chunks", None),
153
                "entities": s.get("entities", None),
154
                "graphs": s.get("graphs", None)
155
            }
156
            sent = Sentence(**kwargs)
157
            sentences.append(sent)
158
        doc = Document(sentences)
159
        # set id and text
160
        doc.text = json_dict.get("text", None)
161
        doc.id = kwargs.get("id", None)
162
        return doc
163
164
165
class Sentence(object):
166
167
    """
168
    Storage class for an annotated sentence. Based on [`org.clulab.processors.Sentence`](https://github.com/clulab/processors/blob/master/main/src/main/scala/org/clulab/processors/Sentence.scala)
169
170
    Parameters
171
    ----------
172
    text : str or None
173
        The text of the `Sentence`.
174
175
    words : [str]
176
        A list of the `Sentence`'s tokens.
177
178
    startOffsets : [int]
179
        The character offsets starting each token (inclusive).
180
181
    endOffsets : [int]
182
        The character offsets marking the end of each token (exclusive).
183
184
    tags : [str]
185
        A list of the `Sentence`'s tokens represented using part of speech (PoS) tags.
186
187
    lemmas : [str]
188
        A list of the `Sentence`'s tokens represented using lemmas.
189
190
    chunks : [str]
191
        A list of the `Sentence`'s tokens represented using IOB-style phrase labels (ex. `B-NP`, `I-NP`, `B-VP`, etc.).
192
193
    entities : [str]
194
        A list of the `Sentence`'s tokens represented using IOB-style named entity (NE) labels.
195
196
    graphs : dict
197
        A dictionary of {graph-name -> {edges: [{source, destination, relation}], roots: [int]}}
198
199
    Attributes
200
    ----------
201
    text : str
202
        The text of the `Sentence`.
203
204
    startOffsets : [int]
205
        The character offsets starting each token (inclusive).
206
207
    endOffsets : [int]
208
        The character offsets marking the end of each token (exclusive).
209
210
    length : int
211
        The number of tokens in the `Sentence`
212
213
    graphs : dict
214
        A dictionary (str -> `processors.ds.DirectedGraph`) mapping the graph type/name to a `processors.ds.DirectedGraph`.
215
216
    basic_dependencies : processors.ds.DirectedGraph
217
        A `processors.ds.DirectedGraph` using basic Stanford dependencies.
218
219
    collapsed_dependencies : processors.ds.DirectedGraph
220
        A `processors.ds.DirectedGraph` using collapsed Stanford dependencies.
221
222
    dependencies : processors.ds.DirectedGraph
223
        A pointer to the prefered syntactic dependency graph type for this `Sentence`.
224
225
    _entities : [str]
226
        The IOB-style Named Entity (NE) labels corresponding to each token.
227
228
    _chunks : [str]
229
        The IOB-style chunk labels corresponding to each token.
230
231
    nes : dict
232
        A dictionary of NE labels represented in the `Document` -> a list of corresponding text spans (ex. {"PERSON": [phrase 1, ..., phrase n]}). Built from `Sentence._entities`
233
234
    phrases : dict
235
        A dictionary of chunk labels represented in the `Document` -> a list of corresponding text spans (ex. {"NP": [phrase 1, ..., phrase n]}). Built from `Sentence._chunks`
236
237
238
    Methods
239
    -------
240
    bag_of_labeled_dependencies_using(form)
241
        Produces a list of syntactic dependencies where each edge is labeled with its grammatical relation.
242
243
    bag_of_unlabeled_dependencies_using(form)
244
        Produces a list of syntactic dependencies where each edge is left unlabeled without its grammatical relation.
245
    """
246
247
    UNKNOWN = LabelManager.UNKNOWN
248
    # the O in IOB notation
249
    O = LabelManager.O
250
251
    def __init__(self, **kwargs):
252
        self.words = kwargs["words"]
253
        self.startOffsets = kwargs["startOffsets"]
254
        self.endOffsets = kwargs["endOffsets"]
255
        self.length = len(self.words)
256
        self.tags = self._set_toks(kwargs.get("tags", None))
257
        self.lemmas = self._set_toks(kwargs.get("lemmas", None))
258
        self._chunks = self._set_toks(kwargs.get("chunks", None))
259
        self._entities = self._set_toks(kwargs.get("entities", None))
260
        self.text = kwargs.get("text", None) or " ".join(self.words)
261
        self.graphs = self._build_directed_graph_from_dict(kwargs.get("graphs", None))
262
        self.basic_dependencies = self.graphs.get(DirectedGraph.STANFORD_BASIC_DEPENDENCIES, None)
263
        self.collapsed_dependencies = self.graphs.get(DirectedGraph.STANFORD_COLLAPSED_DEPENDENCIES, None)
264
        self.dependencies = self.collapsed_dependencies if self.collapsed_dependencies != None else self.basic_dependencies
265
        # IOB tokens -> {label: [phrase 1, ..., phrase n]}
266
        self.nes = self._handle_iob(self._entities)
267
        self.phrases = self._handle_iob(self._chunks)
268
269
    def __eq__(self, other):
270
        if isinstance(other, self.__class__):
271
            return self.to_JSON() == other.to_JSON()
272
        else:
273
            return False
274
275
    def __ne__(self, other):
276
        return not self.__eq__(other)
277
278
    def _get_tokens(self, form):
279
        f = form.lower()
280
        if f == "words":
281
            tokens = self.words
282
        elif f == "tags":
283
            tokens = self.tags
284
        elif f == "lemmas":
285
            tokens = self.lemmas
286
        elif f == "entities":
287
            tokens = self.nes
288
        elif f == "index":
289
            tokens = list(range(self.length))
290
        return tokens
291
292
    def _set_toks(self, toks):
293
        return toks if toks else [Sentence.UNKNOWN]*self.length
294
295
    def _handle_iob(self, iob):
296
        """
297
        Consolidates consecutive tokens in IOB notation under the appropriate label.
298
        Regexs control for bionlp annotator, which uses IOB notation.
299
        """
300
        entity_dict = defaultdict(list)
301
        # initialize to empty label
302
        current = Sentence.O
303
        start = None
304
        end = None
305
        for i, tok in enumerate(iob):
306
            # we don't have an I or O
307
            if tok == Sentence.O:
308
                # did we have an entity with the last token?
309
                current = re.sub('(B-|I-)','', str(current))
310
                if current == Sentence.O:
311
                    continue
312
                else:
313
                    # the last sequence has ended
314
                    end = i
315
                    # store the entity
316
                    named_entity = ' '.join(self.words[start:end])
317
                    entity_dict[current].append(named_entity)
318
                    # reset our book-keeping vars
319
                    current = Sentence.O
320
                    start = None
321
                    end = None
322
            # we have a tag!
323
            else:
324
                # our old sequence continues
325
                current = re.sub('(B-|I-)','', str(current))
326
                tok = re.sub('(B-|I-)','', str(tok))
327
                if tok == current:
328
                    end = i
329
                # our old sequence has ended
330
                else:
331
                    # do we have a previous NE?
332
                    if current != Sentence.O:
333
                        end = i
334
                        named_entity = ' '.join(self.words[start:end])
335
                        entity_dict[current].append(named_entity)
336
                    # update our book-keeping vars
337
                    current = tok
338
                    start = i
339
                    end = None
340
        # this might be empty
341
        return entity_dict
342
343
    def _build_directed_graph_from_dict(self, graphs):
344
        deps_dict = dict()
345
        if graphs and len(graphs) > 0:
346
            # process each stored graph
347
            for (kind, deps) in graphs.items():
348
                deps_dict[kind] = DirectedGraph(kind, deps, self.words)
349
            return deps_dict
350
        return None
351
352
    def __unicode__(self):
353
        return self.text
354
355
    def to_string(self):
356
        return ' '.join("{w}__{p}".format(w=self.words[i],p=self.tags[i]) for i in range(self.length))
357
358
    def labeled_dependencies_using(self, tokens):
359
        """
360
        Generates a list of labeled dependencies for a sentence
361
        using the provided tokens
362
        """
363
        #else:
364
        #    raise Exception("""form must be "words", "tags", "lemmas", or "index"""")
365
        deps = self.dependencies
366
        labeled = []
367
        for out in deps.outgoing:
368
            for (dest, rel) in deps.outgoing[out]:
369
                labeled.append("{}_{}_{}".format(tokens[out], rel.upper(), tokens[dest]))
370
        return labeled
371
372
    def unlabeled_dependencies_using(self, tokens):
373
        """
374
        Generate a list of unlabeled dependencies for a sentence
375
        using the provided tokens
376
        """
377
        unlabeled = []
378
        for sd in self.labeled_dependencies_using(tokens):
379
            (head, _, dep) = sd.split("_")
380
            unlabeled.append("{}_{}".format(head, dep))
381
        return unlabeled
382
383
    def to_JSON_dict(self):
384
        sentence_dict = dict()
385
        sentence_dict["words"] = self.words
386
        sentence_dict["startOffsets"] = self.startOffsets
387
        sentence_dict["endOffsets"] = self.endOffsets
388
        sentence_dict["tags"] = self.tags
389
        sentence_dict["lemmas"] = self.lemmas
390
        sentence_dict["entities"] = self._entities
391
        # add graphs
392
        sentence_dict["graphs"] = dict()
393
        for (kind, graph) in self.graphs.items():
394
            sentence_dict["graphs"][kind] = graph._graph_to_JSON_dict()
395
        return sentence_dict
396
397
    def to_JSON(self):
398
        return json.dumps(self.to_JSON_dict(), sort_keys=True, indent=4)
399
400
    @staticmethod
401
    def load_from_JSON(json_dict):
402
        sent = Sentence(
403
                    words=json_dict["words"],
404
                    startOffsets=json_dict["startOffsets"],
405
                    endOffsets=json_dict["endOffsets"],
406
                    lemmas=json_dict.get("lemmas", None),
407
                    tags=json_dict.get("tags", None),
408
                    entities=json_dict.get("entities", None),
409
                    text=json_dict.get("text", None),
410
                    graphs=json_dict.get("graphs", None)
411
                    )
412
        return sent
413
414
415
class Edge(object):
416
417
    def __init__(self, source, destination, relation):
418
        self.source = source
419
        self.destination = destination
420
        self.relation = relation
421
422
    def __unicode__(self):
423
        return self.to_string()
424
425
    def to_string(self):
426
        return "Edge(source: {}, destination: {}, relation: {})".format(self.source, self.destination, self.relation)
427
428
    def __eq__(self, other):
429
        if isinstance(other, self.__class__):
430
            return self.to_JSON() == other.to_JSON()
431
        else:
432
            return False
433
434
    def to_JSON_dict(self):
435
        edge_dict = dict()
436
        edge_dict["source"] = self.source
437
        edge_dict["destination"] = self.destination
438
        edge_dict["relation"] = self.relation
439
        return edge_dict
440
441
    def to_JSON(self):
442
        return json.dumps(self.to_JSON_dict(), sort_keys=True, indent=4)
443
444
445
class DirectedGraph(object):
446
447
    """
448
    Storage class for directed graphs.
449
450
451
    Parameters
452
    ----------
453
    kind : str
454
        The name of the directed graph.
455
456
    deps : dict
457
        A dictionary of {edges: [{source, destination, relation}], roots: [int]}
458
459
    words : [str]
460
        A list of the word form of the tokens from the originating `Sentence`.
461
462
    Attributes
463
    ----------
464
    _words : [str]
465
        A list of the word form of the tokens from the originating `Sentence`.
466
467
    roots : [int]
468
        A list of indices for the syntactic dependency graph's roots.  Generally this is a single token index.
469
470
    edges: list[processors.ds.Edge]
471
        A list of `processors.ds.Edge`
472
473
    incoming : A dictionary of {int -> [int]} encoding the incoming edges for each node in the graph.
474
475
    outgoing : A dictionary of {int -> [int]} encoding the outgoing edges for each node in the graph.
476
477
    labeled : [str]
478
        A list of strings where each element in the list represents an edge encoded as source index, relation, and destination index ("source_relation_destination").
479
480
    unlabeled : [str]
481
        A list of strings where each element in the list represents an edge encoded as source index and destination index ("source_destination").
482
483
    graph : networkx.Graph
484
        A `networkx.graph` representation of the `DirectedGraph`.  Used by `shortest_path`
485
486
    Methods
487
    -------
488
    bag_of_labeled_dependencies_using(form)
489
        Produces a list of syntactic dependencies where each edge is labeled with its grammatical relation.
490
    bag_of_unlabeled_dependencies_using(form)
491
        Produces a list of syntactic dependencies where each edge is left unlabeled without its grammatical relation.
492
    """
493
    STANFORD_BASIC_DEPENDENCIES = "stanford-basic"
494
    STANFORD_COLLAPSED_DEPENDENCIES = "stanford-collapsed"
495
496
    def __init__(self, kind, deps, words):
497
        self._words = [w.lower() for w in words]
498
        self.kind = kind
499
        self.roots = deps.get("roots", [])
500
        self.edges = [Edge(e["source"], e["destination"], e["relation"]) for e in deps["edges"]]
501
        self.incoming = self._build_incoming(self.edges)
502
        self.outgoing = self._build_outgoing(self.edges)
503
        self.labeled = self._build_labeled()
504
        self.unlabeled = self._build_unlabeled()
505
        self.directed_graph = DependencyUtils.build_networkx_graph(roots=self.roots, edges=self.edges, name=self.kind, reverse=False)
506
        self.undirected_graph = self.directed_graph.to_undirected()
507
508
    def __unicode__(self):
509
        return self.edges
510
511
    def __eq__(self, other):
512
        if isinstance(other, self.__class__):
513
            return self.to_JSON() == other.to_JSON()
514
        else:
515
            return False
516
517
    def __ne__(self, other):
518
        return not self.__eq__(other)
519
520
    def shortest_paths(self, start, end):
521
        """
522
        Find the shortest paths in the syntactic depedency graph
523
        between the provided start and end nodes.
524
525
        Parameters
526
        ----------
527
        start : int or [int]
528
            A single token index or list of token indices serving as the start of the graph traversal.
529
530
        end : int or [int]
531
            A single token index or list of token indices serving as the end of the graph traversal.
532
533
        See Also
534
        --------
535
        `processors.paths.DependencyUtils.shortest_path`
536
        """
537
        paths = DependencyUtils.shortest_paths(self.undirected_graph, start, end)
538
        return None if not paths else [DependencyUtils.retrieve_edges(self, path) for path in paths]
539
540
    def shortest_path(self, start, end, scoring_func=lambda path: -len(path)):
541
        """
542
        Find the shortest path in the syntactic depedency graph
543
        between the provided start and end nodes.
544
545
        Parameters
546
        ----------
547
        start : int or [int]
548
            A single token index or list of token indices serving as the start of the graph traversal.
549
550
        end : int or [int]
551
            A single token index or list of token indices serving as the end of the graph traversal.
552
553
        scoring_func : function
554
            A function that scores each path in a list of [(source index, directed relation, destination index)] paths.  Each path has the form [(source index, relation, destination index)].
555
            The path with the maximum score will be returned.
556
557
        See Also
558
        --------
559
        `processors.paths.DependencyUtils.shortest_path`
560
        """
561
        paths = self.shortest_paths(start, end)
562
        return None if not paths else max(paths, key=scoring_func)
563
564
    def degree_centrality(self):
565
        """
566
        Compute the degree centrality for nodes.
567
568
        See Also
569
        --------
570
        https://networkx.github.io/documentation/development/reference/algorithms.centrality.html
571
        """
572
        return Counter(nx.degree_centrality(self.directed_graph))
573
574
    def in_degree_centrality(self):
575
        """
576
        Compute the in-degree centrality for nodes.
577
578
        See Also
579
        --------
580
        https://networkx.github.io/documentation/development/reference/algorithms.centrality.html
581
        """
582
        return Counter(nx.in_degree_centrality(self.directed_graph))
583
584
    def out_degree_centrality(self):
585
        """
586
        Compute the out-degree centrality for nodes.
587
588
        See Also
589
        --------
590
        https://networkx.github.io/documentation/development/reference/algorithms.centrality.html
591
        """
592
        return Counter(nx.out_degree_centrality(self.directed_graph))
593
594
    def pagerank(self,
595
                 alpha=0.85,
596
                 personalization=None,
597
                 max_iter=1000,
598
                 tol=1e-06,
599
                 nstart=None,
600
                 weight='weight',
601
                 dangling=None,
602
                 use_directed=True,
603
                 reverse=True):
604
        """
605
        Measures node activity in a `networkx.Graph` using a thin wrapper around `networkx` implementation of pagerank algorithm (see `networkx.algorithms.link_analysis.pagerank`).  Use with `processors.ds.DirectedGraph.graph`.
606
        Note that by default, the directed graph is reversed in order to highlight predicate-argument nodes (refer to pagerank algorithm to understand why).
607
608
        See Also
609
        --------
610
        `processors.paths.DependencyUtils.pagerank`
611
        Method parameters correspond to those of [`networkx.algorithms.link_analysis.pagerank`](https://networkx.github.io/documentation/development/reference/generated/networkx.algorithms.link_analysis.pagerank_alg.pagerank.html#networkx.algorithms.link_analysis.pagerank_alg.pagerank)
612
        """
613
        # check whether or not to reverse directed graph
614
        dg = self.directed_graph if not reverse else DependencyUtils.build_networkx_graph(roots=self.roots, edges=self.edges, name=self.kind, is_directed=True, reverse=True)
615
        # determine graph to use
616
        graph = dg if use_directed else self.undirected_graph
617
        return DependencyUtils.pagerank(graph, alpha=alpha, personalization=personalization, max_iter=max_iter, tol=tol, nstart=nstart, weight=weight, dangling=dangling)
618
619
    def _build_incoming(self, edges):
620
        dep_dict = defaultdict(list)
621
        for edge in edges:
622
            dep_dict[edge.destination].append((edge.source, edge.relation))
623
        return dep_dict
624
625
    def _build_outgoing(self, edges):
626
        dep_dict = defaultdict(list)
627
        for edge in edges:
628
            dep_dict[edge.source].append((edge.destination, edge.relation))
629
        return dep_dict
630
631
    def _build_labeled(self):
632
        labeled = []
633
        for out in self.outgoing:
634
            for (dest, rel) in self.outgoing[out]:
635
                labeled.append("{}_{}_{}".format(self._words[out], rel.upper(), self._words[dest]))
636
        return labeled
637
638
    def _build_unlabeled(self):
639
        unlabeled = []
640
        for out in self.outgoing:
641
            for (dest, _) in self.outgoing[out]:
642
                unlabeled.append("{}_{}".format(self._words[out], self._words[dest]))
643
        return unlabeled
644
645
    def _graph_to_JSON_dict(self):
646
        dg_dict = dict()
647
        dg_dict["edges"] = [e.to_JSON_dict() for e in self.edges]
648
        dg_dict["roots"] = self.roots
649
        return dg_dict
650
651
    def to_JSON_dict(self):
652
        return {self.kind:self._graph_to_JSON_dict()}
653
654
    def to_JSON(self):
655
        return json.dumps(self.to_JSON_dict(), sort_keys=True, indent=4)
656
657
658
class Interval(object):
659
    """
660
    Defines a token or character span
661
662
    Parameters
663
    ----------
664
    start : str
665
        The token or character index where the interval begins.
666
667
    end : str
668
        The 1 + the index of the last token/character in the span.
669
    """
670
671
    def __init__(self, start, end):
672
        self.start = start
673
        self.end = end
674
675
    def to_JSON_dict(self):
676
        return {"start":self.start, "end":self.end}
677
678
    def to_JSON(self):
679
        return json.dumps(self.to_JSON_dict(), sort_keys=True, indent=4)
680
681
    @staticmethod
682
    def load_from_JSON(json):
683
        return Interval(start=json["start"], end=json["end"])
684