@@ 1594-1686 (lines=93) @@ | ||
1591 | return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line) |
|
1592 | ||
1593 | ||
1594 | class CleansedLines(object): |
|
1595 | """Holds 4 copies of all lines with different preprocessing applied to them. |
|
1596 | ||
1597 | 1) elided member contains lines without strings and comments. |
|
1598 | 2) lines member contains lines without comments. |
|
1599 | 3) raw_lines member contains all the lines without processing. |
|
1600 | 4) lines_without_raw_strings member is same as raw_lines, but with C++11 raw |
|
1601 | strings removed. |
|
1602 | All these members are of <type 'list'>, and of the same length. |
|
1603 | """ |
|
1604 | ||
1605 | def __init__(self, lines): |
|
1606 | self.elided = [] |
|
1607 | self.lines = [] |
|
1608 | self.raw_lines = lines |
|
1609 | self.num_lines = len(lines) |
|
1610 | self.lines_without_raw_strings = CleanseRawStrings(lines) |
|
1611 | for linenum in range(len(self.lines_without_raw_strings)): |
|
1612 | self.lines.append(CleanseComments( |
|
1613 | self.lines_without_raw_strings[linenum])) |
|
1614 | elided = self._CollapseStrings(self.lines_without_raw_strings[linenum]) |
|
1615 | self.elided.append(CleanseComments(elided)) |
|
1616 | ||
1617 | def NumLines(self): |
|
1618 | """Returns the number of lines represented.""" |
|
1619 | return self.num_lines |
|
1620 | ||
1621 | @staticmethod |
|
1622 | def _CollapseStrings(elided): |
|
1623 | """Collapses strings and chars on a line to simple "" or '' blocks. |
|
1624 | ||
1625 | We nix strings first so we're not fooled by text like '"http://"' |
|
1626 | ||
1627 | Args: |
|
1628 | elided: The line being processed. |
|
1629 | ||
1630 | Returns: |
|
1631 | The line with collapsed strings. |
|
1632 | """ |
|
1633 | if _RE_PATTERN_INCLUDE.match(elided): |
|
1634 | return elided |
|
1635 | ||
1636 | # Remove escaped characters first to make quote/single quote collapsing |
|
1637 | # basic. Things that look like escaped characters shouldn't occur |
|
1638 | # outside of strings and chars. |
|
1639 | elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided) |
|
1640 | ||
1641 | # Replace quoted strings and digit separators. Both single quotes |
|
1642 | # and double quotes are processed in the same loop, otherwise |
|
1643 | # nested quotes wouldn't work. |
|
1644 | collapsed = '' |
|
1645 | while True: |
|
1646 | # Find the first quote character |
|
1647 | match = Match(r'^([^\'"]*)([\'"])(.*)$', elided) |
|
1648 | if not match: |
|
1649 | collapsed += elided |
|
1650 | break |
|
1651 | head, quote, tail = match.groups() |
|
1652 | ||
1653 | if quote == '"': |
|
1654 | # Collapse double quoted strings |
|
1655 | second_quote = tail.find('"') |
|
1656 | if second_quote >= 0: |
|
1657 | collapsed += head + '""' |
|
1658 | elided = tail[second_quote + 1:] |
|
1659 | else: |
|
1660 | # Unmatched double quote, don't bother processing the rest |
|
1661 | # of the line since this is probably a multiline string. |
|
1662 | collapsed += elided |
|
1663 | break |
|
1664 | else: |
|
1665 | # Found single quote, check nearby text to eliminate digit separators. |
|
1666 | # |
|
1667 | # There is no special handling for floating point here, because |
|
1668 | # the integer/fractional/exponent parts would all be parsed |
|
1669 | # correctly as long as there are digits on both sides of the |
|
1670 | # separator. So we are fine as long as we don't see something |
|
1671 | # like "0.'3" (gcc 4.9.0 will not allow this literal). |
|
1672 | if Search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head): |
|
1673 | match_literal = Match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', "'" + tail) |
|
1674 | collapsed += head + match_literal.group(1).replace("'", '') |
|
1675 | elided = match_literal.group(2) |
|
1676 | else: |
|
1677 | second_quote = tail.find('\'') |
|
1678 | if second_quote >= 0: |
|
1679 | collapsed += head + "''" |
|
1680 | elided = tail[second_quote + 1:] |
|
1681 | else: |
|
1682 | # Unmatched single quote |
|
1683 | collapsed += elided |
|
1684 | break |
|
1685 | ||
1686 | return collapsed |
|
1687 | ||
1688 | ||
1689 | def FindEndOfExpressionInLine(line, startpos, stack): |
@@ 1594-1686 (lines=93) @@ | ||
1591 | return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line) |
|
1592 | ||
1593 | ||
1594 | class CleansedLines(object): |
|
1595 | """Holds 4 copies of all lines with different preprocessing applied to them. |
|
1596 | ||
1597 | 1) elided member contains lines without strings and comments. |
|
1598 | 2) lines member contains lines without comments. |
|
1599 | 3) raw_lines member contains all the lines without processing. |
|
1600 | 4) lines_without_raw_strings member is same as raw_lines, but with C++11 raw |
|
1601 | strings removed. |
|
1602 | All these members are of <type 'list'>, and of the same length. |
|
1603 | """ |
|
1604 | ||
1605 | def __init__(self, lines): |
|
1606 | self.elided = [] |
|
1607 | self.lines = [] |
|
1608 | self.raw_lines = lines |
|
1609 | self.num_lines = len(lines) |
|
1610 | self.lines_without_raw_strings = CleanseRawStrings(lines) |
|
1611 | for linenum in range(len(self.lines_without_raw_strings)): |
|
1612 | self.lines.append(CleanseComments( |
|
1613 | self.lines_without_raw_strings[linenum])) |
|
1614 | elided = self._CollapseStrings(self.lines_without_raw_strings[linenum]) |
|
1615 | self.elided.append(CleanseComments(elided)) |
|
1616 | ||
1617 | def NumLines(self): |
|
1618 | """Returns the number of lines represented.""" |
|
1619 | return self.num_lines |
|
1620 | ||
1621 | @staticmethod |
|
1622 | def _CollapseStrings(elided): |
|
1623 | """Collapses strings and chars on a line to simple "" or '' blocks. |
|
1624 | ||
1625 | We nix strings first so we're not fooled by text like '"http://"' |
|
1626 | ||
1627 | Args: |
|
1628 | elided: The line being processed. |
|
1629 | ||
1630 | Returns: |
|
1631 | The line with collapsed strings. |
|
1632 | """ |
|
1633 | if _RE_PATTERN_INCLUDE.match(elided): |
|
1634 | return elided |
|
1635 | ||
1636 | # Remove escaped characters first to make quote/single quote collapsing |
|
1637 | # basic. Things that look like escaped characters shouldn't occur |
|
1638 | # outside of strings and chars. |
|
1639 | elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided) |
|
1640 | ||
1641 | # Replace quoted strings and digit separators. Both single quotes |
|
1642 | # and double quotes are processed in the same loop, otherwise |
|
1643 | # nested quotes wouldn't work. |
|
1644 | collapsed = '' |
|
1645 | while True: |
|
1646 | # Find the first quote character |
|
1647 | match = Match(r'^([^\'"]*)([\'"])(.*)$', elided) |
|
1648 | if not match: |
|
1649 | collapsed += elided |
|
1650 | break |
|
1651 | head, quote, tail = match.groups() |
|
1652 | ||
1653 | if quote == '"': |
|
1654 | # Collapse double quoted strings |
|
1655 | second_quote = tail.find('"') |
|
1656 | if second_quote >= 0: |
|
1657 | collapsed += head + '""' |
|
1658 | elided = tail[second_quote + 1:] |
|
1659 | else: |
|
1660 | # Unmatched double quote, don't bother processing the rest |
|
1661 | # of the line since this is probably a multiline string. |
|
1662 | collapsed += elided |
|
1663 | break |
|
1664 | else: |
|
1665 | # Found single quote, check nearby text to eliminate digit separators. |
|
1666 | # |
|
1667 | # There is no special handling for floating point here, because |
|
1668 | # the integer/fractional/exponent parts would all be parsed |
|
1669 | # correctly as long as there are digits on both sides of the |
|
1670 | # separator. So we are fine as long as we don't see something |
|
1671 | # like "0.'3" (gcc 4.9.0 will not allow this literal). |
|
1672 | if Search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head): |
|
1673 | match_literal = Match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', "'" + tail) |
|
1674 | collapsed += head + match_literal.group(1).replace("'", '') |
|
1675 | elided = match_literal.group(2) |
|
1676 | else: |
|
1677 | second_quote = tail.find('\'') |
|
1678 | if second_quote >= 0: |
|
1679 | collapsed += head + "''" |
|
1680 | elided = tail[second_quote + 1:] |
|
1681 | else: |
|
1682 | # Unmatched single quote |
|
1683 | collapsed += elided |
|
1684 | break |
|
1685 | ||
1686 | return collapsed |
|
1687 | ||
1688 | ||
1689 | def FindEndOfExpressionInLine(line, startpos, stack): |