Passed
Push — main ( bd5175...48c369 )
by Alexander
01:25
created

src.emlx2eml   A

Complexity

Total Complexity 23

Size/Duplication

Total Lines 157
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 110
dl 0
loc 157
rs 10
c 0
b 0
f 0
wmc 23

7 Functions

Rating   Name   Duplication   Size   Complexity  
B include_attachment() 0 27 6
A parse_msg() 0 7 3
A message_as_bytes() 0 1 1
A parse_emlx() 0 22 2
A find_emlx() 0 12 5
A get_numeric_id() 0 7 2
A copy_emlx() 0 17 3
1
#! /usr/bin/env python
2
# -*- coding: utf-8 -*-
3
# Compatible with python3 and python2 (tested with at least 2.4)
4
5
# flake8: noqa
6
# pylint: skip-file
7
# Originally from https://github.com/LRGH/emlx2eml
8
9
import sys
10
import os
11
import logging
12
import struct
13
import email
14
import base64
15
import mimetypes
16
17
log = logging.getLogger("emlx2eml")
18
console_handler = logging.StreamHandler()
19
console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s"))
20
log.addHandler(console_handler)
21
log.setLevel(logging.DEBUG)
22
# log.setLevel(logging.ERROR)
23
24
25
def find_emlx(input):
26
    if os.path.islink(input):
27
        return []
28
    elif os.path.isdir(input):
29
        files = []
30
        for x in os.listdir(input):
31
            files += find_emlx(os.path.join(input, x))
32
        return files
33
    elif input.endswith(".emlx"):
34
        return [input]
35
    else:
36
        return []
37
38
39
# Some definitions, to enforce compatibility with python2 and python3
40
newline = struct.pack("B", 10)
41
if sys.version_info[0] == 2:
42
    message_from_bytes = email.message_from_string
43
    def message_as_bytes(msg): return msg.as_string(unixfrom=True)
44
else:
45
    message_from_bytes = email.message_from_bytes
46
    def message_as_bytes(msg): return msg.as_bytes(unixfrom=True)
47
48
49
def copy_emlx(emlx, out_dir):
50
    # Get the numeric id
51
    id = get_numeric_id(emlx)
52
53
    # Create output file
54
    if not os.path.exists(out_dir):
55
        os.mkdir(out_dir)
56
    eml = os.path.join(out_dir, id+".eml")
57
    log.debug("Extract %s to %s", emlx, eml)
58
    if os.path.exists(eml):
59
        log.error("%s already exists", eml)
60
        return False
61
    # Parse the EMLX file
62
    msg = parse_emlx(emlx)
63
    msg.set_unixfrom("From emlx2eml Thu Apr 19 00:00:00 2012")
64
    # TODO: generate relevant values for unixfrom
65
    open(eml, "wb").write(message_as_bytes(msg))
66
67
68
def get_numeric_id(filename):
69
    id = os.path.basename(filename)
70
    assert(id.endswith(".emlx"))
71
    id = id[:-5]
72
    if id.endswith(".partial"):
73
        id = id[:-8]
74
    return id
75
76
77
def parse_emlx(filename):
78
    # Read file
79
    content = open(filename, "rb").read()
80
81
    # Extract parts
82
    eol = content.find(newline)
83
    length = int(content[:eol])
84
    body = content[eol+1:eol+1+length]
85
    # TODO: parse the content of 'plist', e.g. using plistlib
86
    # plist = content[eol+1+length:]
87
    msg = message_from_bytes(body)
88
89
    # Find where attachments may be
90
    id = get_numeric_id(filename)
91
    attach_dir = os.path.dirname(filename)
92
    if attach_dir == "":
93
        attach_dir = "."
94
    attach_dir += "/../Attachments/" + id
95
96
    # Make complete eml
97
    parse_msg(attach_dir, msg, [])
98
    return msg
99
100
101
def parse_msg(attach_dir, msg, depth):
102
    log.debug("%sPART %s %r of type %s", " "*len(depth),
103
              ".".join([str(_+1) for _ in depth]), msg, msg.get_content_type())
104
    if msg.is_multipart():
105
        for idx, part in enumerate(msg.get_payload()):
106
            parse_msg(attach_dir, part, depth+[idx])
107
            include_attachment(attach_dir, part, depth+[idx])
108
109
110
# When the attachment has no explicit filename, Mail.app generates a name
111
# which we want to guess.
112
base_filename = u"Mail Attachment"
113
mimetypes.add_type('image/pjpeg', '.jpg', strict=True)
114
mimetypes.add_type('image/jpg', '.jpg', strict=True)
115
116
117
def include_attachment(attach_dir, part, depth):
118
    if "X-Apple-Content-Length" not in part:
119
        return
120
    file = part.get_filename()
121
    mime_type = part.get_content_type()
122
    if file is None:
123
        file = base_filename + mimetypes.guess_extension(mime_type)
124
    dirpath = attach_dir + "/" + ".".join([str(_+1) for _ in depth])
125
    try:
126
        data = open(dirpath+"/"+file, "rb").read()
127
    except FileNotFoundError:
128
        log.error("%s  Attachment '%s' not found in %s",
129
                  " "*len(depth), file, dirpath)
130
        return
131
    log.debug("%s  Attachment '%s' found", " "*len(depth), file)
132
    cte = part["Content-Transfer-Encoding"]
133
    if cte is None:
134
        pass
135
    elif cte == "base64":
136
        data = base64.b64encode(data)
137
        data = newline.join([data[i*76:(i+1)*76]
138
                             for i in range(len(data)//76+1)])
139
    else:
140
        log.error("Attachment dir is %s", attach_dir)
141
        log.error("  CTE %r", cte)
142
        log.error("  CD  %r", part["Content-Disposition"])
143
    part.set_payload(data)
144
145
146
if __name__ == "__main__":
147
    try:
148
        input, out_dir = sys.argv[1:]
149
    except ValueError:
150
        print("Syntax: emlx2eml.py <source> <output_dir>")
151
        print("    <source> can be an EMLX file, or a directory that will")
152
        print("    be recursively searched for EMLX files.")
153
        sys.exit(1)
154
    log.debug("Input %s; Output %s", input, out_dir)
155
    for emlx in find_emlx(input):
156
        copy_emlx(emlx, out_dir)
157