| Conditions | 8 |
| Total Lines | 68 |
| Code Lines | 33 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | #!/usr/bin/env python2 |
||
| 58 | def harvest_ids_descriptions(page, id_map): |
||
| 59 | logging.debug("Harvesting page %s", page.get("number", "unknown")) |
||
| 60 | |||
| 61 | for text in page.findall("./text"): |
||
| 62 | # every text element describing a PCI DSS requirement will have |
||
| 63 | # several properties we will exploit here |
||
| 64 | |||
| 65 | # 1) some elements present |
||
| 66 | if len(text) == 0: |
||
| 67 | continue |
||
| 68 | |||
| 69 | # 2) first element is b |
||
| 70 | if text[0].tag != "b": |
||
| 71 | continue |
||
| 72 | |||
| 73 | # 3) the first element is b and contains a PCI-DSS requirement ID |
||
| 74 | id_candidate = text[0].text.strip() |
||
| 75 | |||
| 76 | # PCI-DSS PDF contains ID mistakes, let's fix the known ones |
||
| 77 | id_candidate = autocorrect_pci_id(id_candidate) |
||
| 78 | |||
| 79 | # It is my understanding that this will match all valid PCI-DSS IDs |
||
| 80 | id_pattern = "" |
||
| 81 | |||
| 82 | # number followed by a dot |
||
| 83 | id_pattern += "^[1-9][0-9]*\\." |
||
| 84 | # second section, number plus optional letter |
||
| 85 | id_pattern += "([1-9][0-9]*[a-z]?" |
||
| 86 | # third section only if second section is present, number plus |
||
| 87 | # optional letter |
||
| 88 | id_pattern += "(\\.[1-9][0-9]*[a-z]?)?)" |
||
| 89 | # sometimes there is a suffix with just a letter, preceded by a dot |
||
| 90 | id_pattern += "?(\\.[a-z])?$" |
||
| 91 | |||
| 92 | if re.match(id_pattern, id_candidate) is None: |
||
| 93 | continue |
||
| 94 | |||
| 95 | # now we are reasonably sure the text element describes a req ID |
||
| 96 | logging.debug("This text describes req of ID '%s'.", id_candidate) |
||
| 97 | |||
| 98 | if not is_applicable_to_os(id_candidate): |
||
| 99 | logging.debug( |
||
| 100 | "Req ID '%s' is not applicable on OS level.", id_candidate |
||
| 101 | ) |
||
| 102 | continue |
||
| 103 | |||
| 104 | # TODO: Would be great to get the entire description but that's very |
||
| 105 | # complex to achieve |
||
| 106 | description_excerpt = text[0].tail |
||
| 107 | |||
| 108 | if description_excerpt is None: |
||
| 109 | continue |
||
| 110 | |||
| 111 | description_excerpt = description_excerpt.strip() |
||
| 112 | |||
| 113 | if id_candidate not in id_map: |
||
| 114 | logging.debug( |
||
| 115 | "Assigning '%s' as description excerpt for ID '%s'.", |
||
| 116 | description_excerpt, id_candidate |
||
| 117 | ) |
||
| 118 | id_map[id_candidate] = description_excerpt |
||
| 119 | |||
| 120 | else: |
||
| 121 | # It is normal to encounter this. The second encounters are |
||
| 122 | # rationale guidances, the first encounter are descriptions |
||
| 123 | logging.debug( |
||
| 124 | "Not assigning '%s' as description excerpt for ID '%s'. This " |
||
| 125 | "ID is already in the map!", description_excerpt, id_candidate |
||
| 126 | ) |
||
| 204 |