| @@ 1594-1686 (lines=93) @@ | ||
| 1591 | return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line) |
|
| 1592 | ||
| 1593 | ||
| 1594 | class CleansedLines(object): |
|
| 1595 | """Holds 4 copies of all lines with different preprocessing applied to them. |
|
| 1596 | ||
| 1597 | 1) elided member contains lines without strings and comments. |
|
| 1598 | 2) lines member contains lines without comments. |
|
| 1599 | 3) raw_lines member contains all the lines without processing. |
|
| 1600 | 4) lines_without_raw_strings member is same as raw_lines, but with C++11 raw |
|
| 1601 | strings removed. |
|
| 1602 | All these members are of <type 'list'>, and of the same length. |
|
| 1603 | """ |
|
| 1604 | ||
| 1605 | def __init__(self, lines): |
|
| 1606 | self.elided = [] |
|
| 1607 | self.lines = [] |
|
| 1608 | self.raw_lines = lines |
|
| 1609 | self.num_lines = len(lines) |
|
| 1610 | self.lines_without_raw_strings = CleanseRawStrings(lines) |
|
| 1611 | for linenum in range(len(self.lines_without_raw_strings)): |
|
| 1612 | self.lines.append(CleanseComments( |
|
| 1613 | self.lines_without_raw_strings[linenum])) |
|
| 1614 | elided = self._CollapseStrings(self.lines_without_raw_strings[linenum]) |
|
| 1615 | self.elided.append(CleanseComments(elided)) |
|
| 1616 | ||
| 1617 | def NumLines(self): |
|
| 1618 | """Returns the number of lines represented.""" |
|
| 1619 | return self.num_lines |
|
| 1620 | ||
| 1621 | @staticmethod |
|
| 1622 | def _CollapseStrings(elided): |
|
| 1623 | """Collapses strings and chars on a line to simple "" or '' blocks. |
|
| 1624 | ||
| 1625 | We nix strings first so we're not fooled by text like '"http://"' |
|
| 1626 | ||
| 1627 | Args: |
|
| 1628 | elided: The line being processed. |
|
| 1629 | ||
| 1630 | Returns: |
|
| 1631 | The line with collapsed strings. |
|
| 1632 | """ |
|
| 1633 | if _RE_PATTERN_INCLUDE.match(elided): |
|
| 1634 | return elided |
|
| 1635 | ||
| 1636 | # Remove escaped characters first to make quote/single quote collapsing |
|
| 1637 | # basic. Things that look like escaped characters shouldn't occur |
|
| 1638 | # outside of strings and chars. |
|
| 1639 | elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided) |
|
| 1640 | ||
| 1641 | # Replace quoted strings and digit separators. Both single quotes |
|
| 1642 | # and double quotes are processed in the same loop, otherwise |
|
| 1643 | # nested quotes wouldn't work. |
|
| 1644 | collapsed = '' |
|
| 1645 | while True: |
|
| 1646 | # Find the first quote character |
|
| 1647 | match = Match(r'^([^\'"]*)([\'"])(.*)$', elided) |
|
| 1648 | if not match: |
|
| 1649 | collapsed += elided |
|
| 1650 | break |
|
| 1651 | head, quote, tail = match.groups() |
|
| 1652 | ||
| 1653 | if quote == '"': |
|
| 1654 | # Collapse double quoted strings |
|
| 1655 | second_quote = tail.find('"') |
|
| 1656 | if second_quote >= 0: |
|
| 1657 | collapsed += head + '""' |
|
| 1658 | elided = tail[second_quote + 1:] |
|
| 1659 | else: |
|
| 1660 | # Unmatched double quote, don't bother processing the rest |
|
| 1661 | # of the line since this is probably a multiline string. |
|
| 1662 | collapsed += elided |
|
| 1663 | break |
|
| 1664 | else: |
|
| 1665 | # Found single quote, check nearby text to eliminate digit separators. |
|
| 1666 | # |
|
| 1667 | # There is no special handling for floating point here, because |
|
| 1668 | # the integer/fractional/exponent parts would all be parsed |
|
| 1669 | # correctly as long as there are digits on both sides of the |
|
| 1670 | # separator. So we are fine as long as we don't see something |
|
| 1671 | # like "0.'3" (gcc 4.9.0 will not allow this literal). |
|
| 1672 | if Search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head): |
|
| 1673 | match_literal = Match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', "'" + tail) |
|
| 1674 | collapsed += head + match_literal.group(1).replace("'", '') |
|
| 1675 | elided = match_literal.group(2) |
|
| 1676 | else: |
|
| 1677 | second_quote = tail.find('\'') |
|
| 1678 | if second_quote >= 0: |
|
| 1679 | collapsed += head + "''" |
|
| 1680 | elided = tail[second_quote + 1:] |
|
| 1681 | else: |
|
| 1682 | # Unmatched single quote |
|
| 1683 | collapsed += elided |
|
| 1684 | break |
|
| 1685 | ||
| 1686 | return collapsed |
|
| 1687 | ||
| 1688 | ||
| 1689 | def FindEndOfExpressionInLine(line, startpos, stack): |
|
| @@ 1594-1686 (lines=93) @@ | ||
| 1591 | return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line) |
|
| 1592 | ||
| 1593 | ||
| 1594 | class CleansedLines(object): |
|
| 1595 | """Holds 4 copies of all lines with different preprocessing applied to them. |
|
| 1596 | ||
| 1597 | 1) elided member contains lines without strings and comments. |
|
| 1598 | 2) lines member contains lines without comments. |
|
| 1599 | 3) raw_lines member contains all the lines without processing. |
|
| 1600 | 4) lines_without_raw_strings member is same as raw_lines, but with C++11 raw |
|
| 1601 | strings removed. |
|
| 1602 | All these members are of <type 'list'>, and of the same length. |
|
| 1603 | """ |
|
| 1604 | ||
| 1605 | def __init__(self, lines): |
|
| 1606 | self.elided = [] |
|
| 1607 | self.lines = [] |
|
| 1608 | self.raw_lines = lines |
|
| 1609 | self.num_lines = len(lines) |
|
| 1610 | self.lines_without_raw_strings = CleanseRawStrings(lines) |
|
| 1611 | for linenum in range(len(self.lines_without_raw_strings)): |
|
| 1612 | self.lines.append(CleanseComments( |
|
| 1613 | self.lines_without_raw_strings[linenum])) |
|
| 1614 | elided = self._CollapseStrings(self.lines_without_raw_strings[linenum]) |
|
| 1615 | self.elided.append(CleanseComments(elided)) |
|
| 1616 | ||
| 1617 | def NumLines(self): |
|
| 1618 | """Returns the number of lines represented.""" |
|
| 1619 | return self.num_lines |
|
| 1620 | ||
| 1621 | @staticmethod |
|
| 1622 | def _CollapseStrings(elided): |
|
| 1623 | """Collapses strings and chars on a line to simple "" or '' blocks. |
|
| 1624 | ||
| 1625 | We nix strings first so we're not fooled by text like '"http://"' |
|
| 1626 | ||
| 1627 | Args: |
|
| 1628 | elided: The line being processed. |
|
| 1629 | ||
| 1630 | Returns: |
|
| 1631 | The line with collapsed strings. |
|
| 1632 | """ |
|
| 1633 | if _RE_PATTERN_INCLUDE.match(elided): |
|
| 1634 | return elided |
|
| 1635 | ||
| 1636 | # Remove escaped characters first to make quote/single quote collapsing |
|
| 1637 | # basic. Things that look like escaped characters shouldn't occur |
|
| 1638 | # outside of strings and chars. |
|
| 1639 | elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided) |
|
| 1640 | ||
| 1641 | # Replace quoted strings and digit separators. Both single quotes |
|
| 1642 | # and double quotes are processed in the same loop, otherwise |
|
| 1643 | # nested quotes wouldn't work. |
|
| 1644 | collapsed = '' |
|
| 1645 | while True: |
|
| 1646 | # Find the first quote character |
|
| 1647 | match = Match(r'^([^\'"]*)([\'"])(.*)$', elided) |
|
| 1648 | if not match: |
|
| 1649 | collapsed += elided |
|
| 1650 | break |
|
| 1651 | head, quote, tail = match.groups() |
|
| 1652 | ||
| 1653 | if quote == '"': |
|
| 1654 | # Collapse double quoted strings |
|
| 1655 | second_quote = tail.find('"') |
|
| 1656 | if second_quote >= 0: |
|
| 1657 | collapsed += head + '""' |
|
| 1658 | elided = tail[second_quote + 1:] |
|
| 1659 | else: |
|
| 1660 | # Unmatched double quote, don't bother processing the rest |
|
| 1661 | # of the line since this is probably a multiline string. |
|
| 1662 | collapsed += elided |
|
| 1663 | break |
|
| 1664 | else: |
|
| 1665 | # Found single quote, check nearby text to eliminate digit separators. |
|
| 1666 | # |
|
| 1667 | # There is no special handling for floating point here, because |
|
| 1668 | # the integer/fractional/exponent parts would all be parsed |
|
| 1669 | # correctly as long as there are digits on both sides of the |
|
| 1670 | # separator. So we are fine as long as we don't see something |
|
| 1671 | # like "0.'3" (gcc 4.9.0 will not allow this literal). |
|
| 1672 | if Search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head): |
|
| 1673 | match_literal = Match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', "'" + tail) |
|
| 1674 | collapsed += head + match_literal.group(1).replace("'", '') |
|
| 1675 | elided = match_literal.group(2) |
|
| 1676 | else: |
|
| 1677 | second_quote = tail.find('\'') |
|
| 1678 | if second_quote >= 0: |
|
| 1679 | collapsed += head + "''" |
|
| 1680 | elided = tail[second_quote + 1:] |
|
| 1681 | else: |
|
| 1682 | # Unmatched single quote |
|
| 1683 | collapsed += elided |
|
| 1684 | break |
|
| 1685 | ||
| 1686 | return collapsed |
|
| 1687 | ||
| 1688 | ||
| 1689 | def FindEndOfExpressionInLine(line, startpos, stack): |
|