| @@ 346-358 (lines=13) @@ | ||
| 343 | # our old sequence continues |
|
| 344 | current = re.sub('(B-|I-)','', str(current)) |
|
| 345 | tok = re.sub('(B-|I-)','', str(tok)) |
|
| 346 | if tok == current: |
|
| 347 | end = i |
|
| 348 | # our old sequence has ended |
|
| 349 | else: |
|
| 350 | # do we have a previous NE? |
|
| 351 | if current != Sentence.O: |
|
| 352 | end = i |
|
| 353 | named_entity = ' '.join(self.words[start:end]) |
|
| 354 | entity_dict[current].append(named_entity) |
|
| 355 | # update our book-keeping vars |
|
| 356 | current = tok |
|
| 357 | start = i |
|
| 358 | end = None |
|
| 359 | # this might be empty |
|
| 360 | return entity_dict |
|
| 361 | ||
| @@ 329-340 (lines=12) @@ | ||
| 326 | if tok == Sentence.O: |
|
| 327 | # did we have an entity with the last token? |
|
| 328 | current = re.sub('(B-|I-)','', str(current)) |
|
| 329 | if current == Sentence.O: |
|
| 330 | continue |
|
| 331 | else: |
|
| 332 | # the last sequence has ended |
|
| 333 | end = i |
|
| 334 | # store the entity |
|
| 335 | named_entity = ' '.join(self.words[start:end]) |
|
| 336 | entity_dict[current].append(named_entity) |
|
| 337 | # reset our book-keeping vars |
|
| 338 | current = Sentence.O |
|
| 339 | start = None |
|
| 340 | end = None |
|
| 341 | # we have a tag! |
|
| 342 | else: |
|
| 343 | # our old sequence continues |
|