Passed
Push — master ( f1891f...47032c )
by Alexander
03:00 queued 12s
created

SimilarStringChecker.close()   A

Complexity

Conditions 4

Size

Total Lines 7
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 6
dl 0
loc 7
rs 10
c 0
b 0
f 0
cc 4
nop 1
1
import os
2
import re
3
import string
4
5
import astroid
6
from pylint.checkers import BaseChecker, utils
7
from pylint.interfaces import IAstroidChecker
8
from textdistance import levenshtein
9
10
11
class SimilarStringChecker(BaseChecker):
12
    __implements__ = (IAstroidChecker,)
13
    name = "similar-string-checker"
14
15
    msgs = {
16
        "R5011": (
17
            '"%s" is %.2f%% similar to "%s". Try keeping them the same '
18
            "to reduce work for translators!",
19
            "similar-string",
20
            "Similar strings should be avoided for single translation",
21
        )
22
    }
23
24
    _dict_of_strings = {}
25
    threshold = 0.8
26
    error_messages = []
27
28
    project_root = os.path.abspath(
29
        os.path.join(os.path.dirname(__file__), "..", "tcms")
30
    )
31
32
    # NOTE: this works against tcms/ directory and will not take into account
33
    # if we want to examine only a sub-dir or a few files
34
    # all files found by os.walk
35
    all_template_files = set()
36
37
    def open(self):
38
        for rootpath, _dirs, files in os.walk(self.project_root, topdown=False):
39
            for file_name in files:
40
                if file_name.endswith((".html", ".txt")):
41
                    self.all_template_files.add(
42
                        os.path.join(self.project_root, rootpath, file_name)
43
                    )
44
45
    @staticmethod
46
    def clean_string(text):
47
        """
48
        This method removes the operators and other punctuations
49
        used in the string to avoid unwanted data to add up to
50
        the similarity between the strings.
51
        """
52
        cleaned = ""
53
        for char in text:
54
            if char not in string.punctuation:
55
                cleaned += char
56
        return cleaned
57
58
    def check_similar_string(self, translation_string):
59
        cleaned_translation_string = self.clean_string(translation_string)
60
        for key in self._dict_of_strings:
61
            similarity = levenshtein.normalized_similarity(
62
                cleaned_translation_string, self.clean_string(key)
63
            )
64
            if similarity >= self.threshold:
65
                return key, similarity
66
        return None, None
67
68
    @utils.check_messages("similar-string")
69
    def visit_call(self, node):
70
        if not (
71
            isinstance(node.func, astroid.Name)
72
            and node.func.name in ("_", "gettext_lazy")
73
        ):
74
            return
75
        if not isinstance(node.args[0], astroid.nodes.Const):
76
            return
77
78
        translation_string = node.args[0].value
79
        self.check_similar_and_add_error_message(node, translation_string)
80
81
    # check if similar string found and add it to error_messages
82
    def check_similar_and_add_error_message(
83
        self, node, translation_string, **error_message
84
    ):
85
        if translation_string in self._dict_of_strings:
86
            return
87
88
        similar_string, similarity = self.check_similar_string(translation_string)
89
90
        if similar_string:
91
            if isinstance(node, str):
92
                error_message["node"] = astroid.Module(node, file=node, doc=None)
93
            else:
94
                error_message["node"] = node
95
            error_message["args"] = (
96
                translation_string,
97
                similarity * 100,
98
                similar_string,
99
            )
100
            self.error_messages.append(error_message)
101
            return
102
        self._dict_of_strings[translation_string] = True
103
104
    # checks each line and find trans or blocktrans tags
105
    def parse_translation_string(self, filename, lines):
106
        startline = 0
107
        startcol = 0
108
        blocktrans_string = ""
109
110
        for lineno, line in enumerate(lines):
111
            # if pylint disable comment is found ignore and continue with next line
112
            if re.search(r"<!--\s*pylint\s*:\s*disable\s*-->", line):
113
                continue
114
115
            # if blocktrans starting tag is found
116
            match_blocktrans_in_line = re.search(r"{% blocktrans[^%}]*%}(.+)", line)
117
            if match_blocktrans_in_line:
118
                startline = lineno
119
                startcol = match_blocktrans_in_line.start(1)
120
                blocktrans_string = match_blocktrans_in_line.group(1)
121
122
            # if line after blocktrans is found
123
            elif blocktrans_string != "":
124
                blocktrans_string += line
125
126
            # if blocktrans ending tag is found
127
            endblocktrans_line = re.search(
128
                r"((.|\n|)*){% endblocktrans %}", blocktrans_string
129
            )
130
            if endblocktrans_line:
131
                blocktrans_string = endblocktrans_line.group(1)
132
                self.check_similar_and_add_error_message(
133
                    filename,
134
                    blocktrans_string,
135
                    line=startline + 1,
136
                    col_offset=startcol,
137
                )
138
                blocktrans_string = ""
139
140
            # if trans tag is found
141
            # Note: trans tag could be more than one in same
142
            # line, hence re.finditer rather than re.search)
143
            match_in_line = re.finditer(r'{% trans "([\w ]+)" %}', line)
144
            for match in match_in_line:
145
                translation_string = match.group(1)
146
                self.check_similar_and_add_error_message(
147
                    filename,
148
                    translation_string,
149
                    line=lineno + 1,
150
                    col_offset=match.start(1),
151
                )
152
153
    def close(self):
154
        for filepath in self.all_template_files:
155
            with open(filepath, "r") as file:
156
                self.parse_translation_string(filepath, file.readlines())
157
158
        for error_message in self.error_messages:
159
            self.add_message("similar-string", **error_message)
160