Conditions | 8 |
Total Lines | 68 |
Code Lines | 33 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
1 | #!/usr/bin/env python2 |
||
58 | def harvest_ids_descriptions(page, id_map): |
||
59 | logging.debug("Harvesting page %s", page.get("number", "unknown")) |
||
60 | |||
61 | for text in page.findall("./text"): |
||
62 | # every text element describing a PCI DSS requirement will have |
||
63 | # several properties we will exploit here |
||
64 | |||
65 | # 1) some elements present |
||
66 | if len(text) == 0: |
||
67 | continue |
||
68 | |||
69 | # 2) first element is b |
||
70 | if text[0].tag != "b": |
||
71 | continue |
||
72 | |||
73 | # 3) the first element is b and contains a PCI-DSS requirement ID |
||
74 | id_candidate = text[0].text.strip() |
||
75 | |||
76 | # PCI-DSS PDF contains ID mistakes, let's fix the known ones |
||
77 | id_candidate = autocorrect_pci_id(id_candidate) |
||
78 | |||
79 | # It is my understanding that this will match all valid PCI-DSS IDs |
||
80 | id_pattern = "" |
||
81 | |||
82 | # number followed by a dot |
||
83 | id_pattern += "^[1-9][0-9]*\\." |
||
84 | # second section, number plus optional letter |
||
85 | id_pattern += "([1-9][0-9]*[a-z]?" |
||
86 | # third section only if second section is present, number plus |
||
87 | # optional letter |
||
88 | id_pattern += "(\\.[1-9][0-9]*[a-z]?)?)" |
||
89 | # sometimes there is a suffix with just a letter, preceded by a dot |
||
90 | id_pattern += "?(\\.[a-z])?$" |
||
91 | |||
92 | if re.match(id_pattern, id_candidate) is None: |
||
93 | continue |
||
94 | |||
95 | # now we are reasonably sure the text element describes a req ID |
||
96 | logging.debug("This text describes req of ID '%s'.", id_candidate) |
||
97 | |||
98 | if not is_applicable_to_os(id_candidate): |
||
99 | logging.debug( |
||
100 | "Req ID '%s' is not applicable on OS level.", id_candidate |
||
101 | ) |
||
102 | continue |
||
103 | |||
104 | # TODO: Would be great to get the entire description but that's very |
||
105 | # complex to achieve |
||
106 | description_excerpt = text[0].tail |
||
107 | |||
108 | if description_excerpt is None: |
||
109 | continue |
||
110 | |||
111 | description_excerpt = description_excerpt.strip() |
||
112 | |||
113 | if id_candidate not in id_map: |
||
114 | logging.debug( |
||
115 | "Assigning '%s' as description excerpt for ID '%s'.", |
||
116 | description_excerpt, id_candidate |
||
117 | ) |
||
118 | id_map[id_candidate] = description_excerpt |
||
119 | |||
120 | else: |
||
121 | # It is normal to encounter this. The second encounters are |
||
122 | # rationale guidances, the first encounter are descriptions |
||
123 | logging.debug( |
||
124 | "Not assigning '%s' as description excerpt for ID '%s'. This " |
||
125 | "ID is already in the map!", description_excerpt, id_candidate |
||
126 | ) |
||
204 |