Code Duplication    Length = 12-13 lines in 2 locations

processors/ds.py 2 locations

@@ 346-358 (lines=13) @@
343
                # our old sequence continues
344
                current = re.sub('(B-|I-)','', str(current))
345
                tok = re.sub('(B-|I-)','', str(tok))
346
                if tok == current:
347
                    end = i
348
                # our old sequence has ended
349
                else:
350
                    # do we have a previous NE?
351
                    if current != Sentence.O:
352
                        end = i
353
                        named_entity = ' '.join(self.words[start:end])
354
                        entity_dict[current].append(named_entity)
355
                    # update our book-keeping vars
356
                    current = tok
357
                    start = i
358
                    end = None
359
        # this might be empty
360
        return entity_dict
361
@@ 329-340 (lines=12) @@
326
            if tok == Sentence.O:
327
                # did we have an entity with the last token?
328
                current = re.sub('(B-|I-)','', str(current))
329
                if current == Sentence.O:
330
                    continue
331
                else:
332
                    # the last sequence has ended
333
                    end = i
334
                    # store the entity
335
                    named_entity = ' '.join(self.words[start:end])
336
                    entity_dict[current].append(named_entity)
337
                    # reset our book-keeping vars
338
                    current = Sentence.O
339
                    start = None
340
                    end = None
341
            # we have a tag!
342
            else:
343
                # our old sequence continues