src.emlx2eml.include_attachment()   C
last analyzed

Complexity

Conditions 11

Size

Total Lines 45
Code Lines 40

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 11
eloc 40
nop 3
dl 0
loc 45
rs 5.4
c 0
b 0
f 0

How to fix   Complexity   

Complexity

Complex classes like src.emlx2eml.include_attachment() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
#! /usr/bin/env python
2
# -*- coding: utf-8 -*-
3
# Compatible with python3 and python2 (tested with at least 2.4)
4
5
# flake8: noqa
6
# pylint: skip-file
7
# Originally from https://github.com/LRGH/emlx2eml
8
9
import sys
10
import os
11
import logging
12
import struct
13
import email
14
import base64
15
import mimetypes
16
17
log = logging.getLogger("emlx2eml")
18
console_handler = logging.StreamHandler()
19
console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s"))
20
log.addHandler(console_handler)
21
log.setLevel(logging.DEBUG)
22
log.setLevel(logging.ERROR)
23
24
25
def find_emlx(input):
26
    if os.path.islink(input):
27
        return []
28
    elif os.path.isdir(input):
29
        files = []
30
        for x in os.listdir(input):
31
            files += find_emlx(os.path.join(input, x))
32
        return files
33
    elif input.endswith(".emlx"):
34
        return [input]
35
    else:
36
        return []
37
38
39
# Some definitions, to enforce compatibility with python2 and python3
40
newline = struct.pack("B", 10)
41
if sys.version_info[0] == 2:
42
    message_from_bytes = email.message_from_string
43
    def message_as_bytes(msg): return msg.as_string(unixfrom=True)
44
else:
45
    message_from_bytes = email.message_from_bytes
46
    def message_as_bytes(msg): return msg.as_bytes(unixfrom=True)
47
48
49
def copy_emlx(emlx, out_dir):
50
    # Get the numeric id
51
    id = get_numeric_id(emlx)
52
53
    # Create output file
54
    if not os.path.exists(out_dir):
55
        os.mkdir(out_dir)
56
    eml = os.path.join(out_dir, id+".eml")
57
    log.debug("Extract %s to %s", emlx, eml)
58
    if os.path.exists(eml):
59
        log.error("%s already exists", eml)
60
        return False
61
    # Parse the EMLX file
62
    msg = parse_emlx(emlx)
63
    msg.set_unixfrom("From emlx2eml Thu Apr 19 00:00:00 2012")
64
    # TODO: generate relevant values for unixfrom
65
    open(eml, "wb").write(message_as_bytes(msg))
66
67
68
def get_numeric_id(filename):
69
    id = os.path.basename(filename)
70
    assert(id.endswith(".emlx"))
71
    id = id[:-5]
72
    if id.endswith(".partial"):
73
        id = id[:-8]
74
    return id
75
76
77
def parse_emlx(filename):
78
    # Read file
79
    content = open(filename, "rb").read()
80
81
    # Extract parts
82
    eol = content.find(newline)
83
    length = int(content[:eol])
84
    body = content[eol+1:eol+1+length]
85
    # TODO: parse the content of 'plist', e.g. using plistlib
86
    # plist = content[eol+1+length:]
87
    msg = message_from_bytes(body)
88
89
    # Find where attachments may be
90
    id = get_numeric_id(filename)
91
    attach_dir = os.path.dirname(filename)
92
    if attach_dir == "":
93
        attach_dir = "."
94
    attach_dir += "/../Attachments/" + id
95
96
    # Make complete eml
97
    parse_msg(attach_dir, msg, [])
98
    return msg
99
100
101
def parse_msg(attach_dir, msg, depth):
102
    log.debug("%sPART %s %r of type %s", " "*len(depth),
103
              ".".join([str(_+1) for _ in depth]), msg, msg.get_content_type())
104
    if msg.is_multipart():
105
        for idx, part in enumerate(msg.get_payload()):
106
            parse_msg(attach_dir, part, depth+[idx])
107
            include_attachment(attach_dir, part, depth+[idx])
108
109
110
# When the attachment has no explicit filename, Mail.app generates a name
111
# which we want to guess. The base_filename depends on the OS language at
112
# the time the mail was downloaded. The list below is extracted by parsing
113
# /System/Library/PrivateFrameworks/Notes.framework/Versions/A/Resources/*.lproj/MailCore.strings
114
base_filenames = (
115
    u"مرفق البريد",  # ar
116
    u"Adjunt de Mail",  # ca
117
    u"Příloha pošty",  # cs
118
    u"Postbilag",  # da
119
    u"Mail-Anhang",  # de
120
    u"Συνημμένο Mail",  # el
121
    u"Mail Attachment",  # en, en_AU, en_GB
122
    u"Archivo adjunto al mensaje",  # es
123
    u"Archivo adjunto a un correo",  # es_419
124
    u"Sähköpostiliite",  # fi
125
    u"Pièce jointe",  # fr, fr_CA
126
    u"קובץ מצורף לדואר",  # he
127
    u"मेल अटैचमेंट",  # hi
128
    u"E-mail privitak",  # hr
129
    u"Mail melléklet",  # hu
130
    u"Lampiran Mail",  # id
131
    u"Allegato di posta elettronica",  # it
132
    u"メールの添付ファイル",  # ja
133
    u"Mail 첨부 파일",  # ko
134
    u"Lampiran Mail",  # ms
135
    u"Mail-bijlage",  # nl
136
    u"E-postvedlegg",  # no
137
    u"Załącznik poczty",  # pl
138
    u"Anexo de E-mail",  # pt
139
    u"Anexo de e‑mail",  # pt_PT
140
    u"Fișier atașat Mail",  # ro
141
    u"Вложенный файл Почты",  # ru
142
    u"Mailová príloha",  # sk
143
    u"Brevbilaga",  # sv
144
    u"ไฟล์แนบเมล",  # th
145
    u"Posta İlişiği",  # tr
146
    u"Поштове прикріплення",  # uk
147
    u"Tệp đính kèm của Mail",  # vi
148
    u"邮件附件",  # yue_CN, zh_CN,
149
    u"郵件附件",  # zh_HK, zh_TW
150
)
151
152
153
def mimetypes_guess_extension(mime_type):
154
    # We don't want to always use mimetypes.guess_extension,
155
    # because it does not always return what is generated by Mail.app,
156
    # mainly because multiple extensions can be associated to a single
157
    # MIME type.
158
    # We prefer to use a hardcoded table.
159
    try:
160
        return {
161
            "text/calendar":  u".ics",
162
            "image/png":      u".png",
163
            "image/x-png":    u"",
164
            "image/gif":      u".gif",
165
            "image/jpeg":     u".jpeg",
166
            "image/pjpeg":    u".jpg",
167
            "image/jpg":      u".jpg",
168
            "message/rfc822": u".eml",
169
        }[mime_type]
170
    except KeyError:
171
        log.error("Unknown file extension for %r, making a guess...",
172
                  mime_type)
173
        return mimetypes.guess_extension(mime_type)
174
175
176
def include_attachment(attach_dir, part, depth):
177
    if "X-Apple-Content-Length" not in part:
178
        return
179
    dirpath = attach_dir + "/" + ".".join([str(_+1) for _ in depth])
180
    file = part.get_filename()
181
    if file is None:
182
        extension = mimetypes_guess_extension(part.get_content_type())
183
        for base in base_filenames:
184
            file = base + extension
185
            try:
186
                data = open(dirpath+"/"+file, "rb").read()
187
                break
188
            # python2 raises IOError, python3 raises FileNotFoundError
189
            except (IOError, FileNotFoundError):
190
                continue
191
        else:
192
            log.error("%s  Unnamed attachment of extension %s not found in %s",
193
                      " "*len(depth), extension, dirpath)
194
            return
195
    else:
196
        try:
197
            data = open(dirpath+"/"+file, "rb").read()
198
        except FileNotFoundError:
199
            log.error("%s  Attachment '%s' not found in %s",
200
                      " "*len(depth), file, dirpath)
201
            return
202
    log.debug("%s  Attachment '%s' found", " "*len(depth), file)
203
    cte = part["Content-Transfer-Encoding"]
204
    if cte is None:
205
        pass
206
    elif cte == "base64":
207
        data = base64.b64encode(data)
208
        data = newline.join([data[i*76:(i+1)*76]
209
                             for i in range(len(data)//76+1)])
210
    elif cte == "quoted-printable":
211
        # The only example I found was not QP-encoded
212
        pass
213
    elif cte == "8bit":
214
        pass
215
    else:
216
        log.error("Attachment dir is %s", attach_dir)
217
        log.error("  File name is %s", file)
218
        log.error("  CTE %r", cte)
219
        log.error("  CD  %r", part["Content-Disposition"])
220
    part.set_payload(data)
221
222
223
if __name__ == "__main__":
224
    try:
225
        input, out_dir = sys.argv[1:]
226
    except ValueError:
227
        print("Syntax: emlx2eml.py <source> <output_dir>")
228
        print("    <source> can be an EMLX file, or a directory that will")
229
        print("    be recursively searched for EMLX files.")
230
        sys.exit(1)
231
    log.debug("Input %s; Output %s", input, out_dir)
232
    for emlx in find_emlx(input):
233
        copy_emlx(emlx, out_dir)
234