| Conditions | 9 |
| Total Lines | 54 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | def youtube_video_whitelist(iframe_tag): |
||
| 115 | def strip_illegal_objects(html): |
||
| 116 | """ |
||
| 117 | Given an HTML string, will strip all object tags that do not embed |
||
| 118 | a PDF that is locally stored on this server. |
||
| 119 | |||
| 120 | Returns the remaining HTML string. |
||
| 121 | """ |
||
| 122 | from bs4 import BeautifulSoup |
||
| 123 | import re |
||
| 124 | from mezzanine.conf import settings |
||
| 125 | import logging |
||
| 126 | logger = logging.getLogger(__name__) |
||
| 127 | |||
| 128 | # Tuple of regexes that define allowed URL patterns |
||
| 129 | matchers = ("^{0}".format(settings.MEDIA_URL),) |
||
| 130 | # Tuple of allowed attributes in an object |
||
| 131 | allowed_attributes = ('data', 'type', 'width', 'height') |
||
| 132 | |||
| 133 | # Parse the input HTML into a DOM |
||
| 134 | dom = BeautifulSoup(html, "html.parser") |
||
| 135 | |||
| 136 | for object_tag in dom.findAll("object"): |
||
| 137 | data = object_tag.get("data", "") |
||
| 138 | filetype = object_tag.get("type", "") |
||
| 139 | matched = False |
||
| 140 | illegal_tag = False |
||
| 141 | # Check whether any one matcher matches |
||
| 142 | for matcher in matchers: |
||
| 143 | exp = re.compile(matcher) |
||
| 144 | if exp.match(data): |
||
| 145 | matched = True |
||
| 146 | break |
||
| 147 | # If no matcher matched, remove the object |
||
| 148 | if not matched: |
||
| 149 | object_tag.extract() |
||
| 150 | logger.debug("Stripped object - Could not match URL pattern.") |
||
| 151 | continue |
||
| 152 | # Check for illegal object attributes |
||
| 153 | for attr in object_tag.attrs: |
||
| 154 | # If object contains illegal attribute, remove the object |
||
| 155 | if attr not in allowed_attributes: |
||
| 156 | illegal_tag = True |
||
| 157 | break |
||
| 158 | if illegal_tag: |
||
| 159 | object_tag.extract() |
||
| 160 | logger.debug("Stripped object - Found illegal attribute.") |
||
| 161 | continue |
||
| 162 | # The value of the type attribute should be 'application/pdf' |
||
| 163 | if filetype != "application/pdf": |
||
| 164 | object_tag.extract() |
||
| 165 | logger.debug("Stripped object - Found illegal filetype.") |
||
| 166 | continue |
||
| 167 | |||
| 168 | return str(dom) |
||
| 169 |