src.emlx2eml.include_attachment() - Code Metrics - Inspection of "cleanup" - AlexanderWillner/MailboxCleanup - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — main ( c711b1...902473 )

by Alexander

created 2020-10-30 10:29 UTC

src.emlx2eml.include_attachment() C

↳ Parent: src.emlx2eml

Complexity

Conditions

Size

Total Lines	45
Code Lines	40

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	11
eloc	40
nop	3
dl	0
loc	45
rs	5.4
c	0
b	0
f	0

How to fix Complexity

#! /usr/bin/env python
# -*- coding: utf-8 -*-
# Compatible with python3 and python2 (tested with at least 2.4)

# flake8: noqa
# pylint: skip-file
# Originally from https://github.com/LRGH/emlx2eml

import sys
import os
import logging
import struct
import email
import base64
import mimetypes

log = logging.getLogger("emlx2eml")
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s"))
log.addHandler(console_handler)
log.setLevel(logging.DEBUG)
log.setLevel(logging.ERROR)


def find_emlx(input):
    if os.path.islink(input):
        return []
    elif os.path.isdir(input):
        files = []
        for x in os.listdir(input):
            files += find_emlx(os.path.join(input, x))
        return files
    elif input.endswith(".emlx"):
        return [input]
    else:
        return []


# Some definitions, to enforce compatibility with python2 and python3
newline = struct.pack("B", 10)
if sys.version_info[0] == 2:
    message_from_bytes = email.message_from_string
    def message_as_bytes(msg): return msg.as_string(unixfrom=True)
else:
    message_from_bytes = email.message_from_bytes
    def message_as_bytes(msg): return msg.as_bytes(unixfrom=True)


def copy_emlx(emlx, out_dir):
    # Get the numeric id
    id = get_numeric_id(emlx)

    # Create output file
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    eml = os.path.join(out_dir, id+".eml")
    log.debug("Extract %s to %s", emlx, eml)
    if os.path.exists(eml):
        log.error("%s already exists", eml)
        return False
    # Parse the EMLX file
    msg = parse_emlx(emlx)
    msg.set_unixfrom("From emlx2eml Thu Apr 19 00:00:00 2012")
    # TODO: generate relevant values for unixfrom
    open(eml, "wb").write(message_as_bytes(msg))


def get_numeric_id(filename):
    id = os.path.basename(filename)
    assert(id.endswith(".emlx"))
    id = id[:-5]
    if id.endswith(".partial"):
        id = id[:-8]
    return id


def parse_emlx(filename):
    # Read file
    content = open(filename, "rb").read()

    # Extract parts
    eol = content.find(newline)
    length = int(content[:eol])
    body = content[eol+1:eol+1+length]
    # TODO: parse the content of 'plist', e.g. using plistlib
    # plist = content[eol+1+length:]
    msg = message_from_bytes(body)

    # Find where attachments may be
    id = get_numeric_id(filename)
    attach_dir = os.path.dirname(filename)
    if attach_dir == "":
        attach_dir = "."
    attach_dir += "/../Attachments/" + id

    # Make complete eml
    parse_msg(attach_dir, msg, [])
    return msg


def parse_msg(attach_dir, msg, depth):
    log.debug("%sPART %s %r of type %s", " "*len(depth),
              ".".join([str(_+1) for _ in depth]), msg, msg.get_content_type())
    if msg.is_multipart():
        for idx, part in enumerate(msg.get_payload()):
            parse_msg(attach_dir, part, depth+[idx])
            include_attachment(attach_dir, part, depth+[idx])


# When the attachment has no explicit filename, Mail.app generates a name
# which we want to guess. The base_filename depends on the OS language at
# the time the mail was downloaded. The list below is extracted by parsing
# /System/Library/PrivateFrameworks/Notes.framework/Versions/A/Resources/*.lproj/MailCore.strings
base_filenames = (
    u"مرفق البريد",  # ar
    u"Adjunt de Mail",  # ca
    u"Příloha pošty",  # cs
    u"Postbilag",  # da
    u"Mail-Anhang",  # de
    u"Συνημμένο Mail",  # el
    u"Mail Attachment",  # en, en_AU, en_GB
    u"Archivo adjunto al mensaje",  # es
    u"Archivo adjunto a un correo",  # es_419
    u"Sähköpostiliite",  # fi
    u"Pièce jointe",  # fr, fr_CA
    u"קובץ מצורף לדואר",  # he
    u"मेल अटैचमेंट",  # hi
    u"E-mail privitak",  # hr
    u"Mail melléklet",  # hu
    u"Lampiran Mail",  # id
    u"Allegato di posta elettronica",  # it
    u"メールの添付ファイル",  # ja
    u"Mail 첨부 파일",  # ko
    u"Lampiran Mail",  # ms
    u"Mail-bijlage",  # nl
    u"E-postvedlegg",  # no
    u"Załącznik poczty",  # pl
    u"Anexo de E-mail",  # pt
    u"Anexo de e‑mail",  # pt_PT
    u"Fișier atașat Mail",  # ro
    u"Вложенный файл Почты",  # ru
    u"Mailová príloha",  # sk
    u"Brevbilaga",  # sv
    u"ไฟล์แนบเมล",  # th
    u"Posta İlişiği",  # tr
    u"Поштове прикріплення",  # uk
    u"Tệp đính kèm của Mail",  # vi
    u"邮件附件",  # yue_CN, zh_CN,
    u"郵件附件",  # zh_HK, zh_TW
)


def mimetypes_guess_extension(mime_type):
    # We don't want to always use mimetypes.guess_extension,
    # because it does not always return what is generated by Mail.app,
    # mainly because multiple extensions can be associated to a single
    # MIME type.
    # We prefer to use a hardcoded table.
    try:
        return {
            "text/calendar":  u".ics",
            "image/png":      u".png",
            "image/x-png":    u"",
            "image/gif":      u".gif",
            "image/jpeg":     u".jpeg",
            "image/pjpeg":    u".jpg",
            "image/jpg":      u".jpg",
            "message/rfc822": u".eml",
        }[mime_type]
    except KeyError:
        log.error("Unknown file extension for %r, making a guess...",
                  mime_type)
        return mimetypes.guess_extension(mime_type)


def include_attachment(attach_dir, part, depth):
    if "X-Apple-Content-Length" not in part:
        return
    dirpath = attach_dir + "/" + ".".join([str(_+1) for _ in depth])
    file = part.get_filename()
    if file is None:
        extension = mimetypes_guess_extension(part.get_content_type())
        for base in base_filenames:
            file = base + extension
            try:
                data = open(dirpath+"/"+file, "rb").read()
                break
            # python2 raises IOError, python3 raises FileNotFoundError
            except (IOError, FileNotFoundError):
                continue
        else:
            log.error("%s  Unnamed attachment of extension %s not found in %s",
                      " "*len(depth), extension, dirpath)
            return
    else:
        try:
            data = open(dirpath+"/"+file, "rb").read()
        except FileNotFoundError:
            log.error("%s  Attachment '%s' not found in %s",
                      " "*len(depth), file, dirpath)
            return
    log.debug("%s  Attachment '%s' found", " "*len(depth), file)
    cte = part["Content-Transfer-Encoding"]
    if cte is None:
        pass
    elif cte == "base64":
        data = base64.b64encode(data)
        data = newline.join([data[i*76:(i+1)*76]
                             for i in range(len(data)//76+1)])
    elif cte == "quoted-printable":
        # The only example I found was not QP-encoded
        pass
    elif cte == "8bit":
        pass
    else:
        log.error("Attachment dir is %s", attach_dir)
        log.error("  File name is %s", file)
        log.error("  CTE %r", cte)
        log.error("  CD  %r", part["Content-Disposition"])
    part.set_payload(data)


if __name__ == "__main__":
    try:
        input, out_dir = sys.argv[1:]
    except ValueError:
        print("Syntax: emlx2eml.py <source> <output_dir>")
        print("    <source> can be an EMLX file, or a directory that will")
        print("    be recursively searched for EMLX files.")
        sys.exit(1)
    log.debug("Input %s; Output %s", input, out_dir)
    for emlx in find_emlx(input):
        copy_emlx(emlx, out_dir)


1			#! /usr/bin/env python
2			# -- coding: utf-8 --
3			# Compatible with python3 and python2 (tested with at least 2.4)
4
5			# flake8: noqa
6			# pylint: skip-file
7			# Originally from https://github.com/LRGH/emlx2eml
8
9			import sys
10			import os
11			import logging
12			import struct
13			import email
14			import base64
15			import mimetypes
16
17			log = logging.getLogger("emlx2eml")
18			console_handler = logging.StreamHandler()
19			console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s"))
20			log.addHandler(console_handler)
21			log.setLevel(logging.DEBUG)
22			log.setLevel(logging.ERROR)
23
24
25			def find_emlx(input):
26			if os.path.islink(input):
27			return []
28			elif os.path.isdir(input):
29			files = []
30			for x in os.listdir(input):
31			files += find_emlx(os.path.join(input, x))
32			return files
33			elif input.endswith(".emlx"):
34			return [input]
35			else:
36			return []
37
38
39			# Some definitions, to enforce compatibility with python2 and python3
40			newline = struct.pack("B", 10)
41			if sys.version_info[0] == 2:
42			message_from_bytes = email.message_from_string
43			def message_as_bytes(msg): return msg.as_string(unixfrom=True)
44			else:
45			message_from_bytes = email.message_from_bytes
46			def message_as_bytes(msg): return msg.as_bytes(unixfrom=True)
47
48
49			def copy_emlx(emlx, out_dir):
50			# Get the numeric id
51			id = get_numeric_id(emlx)
52
53			# Create output file
54			if not os.path.exists(out_dir):
55			os.mkdir(out_dir)
56			eml = os.path.join(out_dir, id+".eml")
57			log.debug("Extract %s to %s", emlx, eml)
58			if os.path.exists(eml):
59			log.error("%s already exists", eml)
60			return False
61			# Parse the EMLX file
62			msg = parse_emlx(emlx)
63			msg.set_unixfrom("From emlx2eml Thu Apr 19 00:00:00 2012")
64			# TODO: generate relevant values for unixfrom
65			open(eml, "wb").write(message_as_bytes(msg))
66
67
68			def get_numeric_id(filename):
69			id = os.path.basename(filename)
70			assert(id.endswith(".emlx"))
71			id = id[:-5]
72			if id.endswith(".partial"):
73			id = id[:-8]
74			return id
75
76
77			def parse_emlx(filename):
78			# Read file
79			content = open(filename, "rb").read()
80
81			# Extract parts
82			eol = content.find(newline)
83			length = int(content[:eol])
84			body = content[eol+1:eol+1+length]
85			# TODO: parse the content of 'plist', e.g. using plistlib
86			# plist = content[eol+1+length:]
87			msg = message_from_bytes(body)
88
89			# Find where attachments may be
90			id = get_numeric_id(filename)
91			attach_dir = os.path.dirname(filename)
92			if attach_dir == "":
93			attach_dir = "."
94			attach_dir += "/../Attachments/" + id
95
96			# Make complete eml
97			parse_msg(attach_dir, msg, [])
98			return msg
99
100
101			def parse_msg(attach_dir, msg, depth):
102			log.debug("%sPART %s %r of type %s", " "*len(depth),
103			".".join([str(_+1) for _ in depth]), msg, msg.get_content_type())
104			if msg.is_multipart():
105			for idx, part in enumerate(msg.get_payload()):
106			parse_msg(attach_dir, part, depth+[idx])
107			include_attachment(attach_dir, part, depth+[idx])
108
109
110			# When the attachment has no explicit filename, Mail.app generates a name
111			# which we want to guess. The base_filename depends on the OS language at
112			# the time the mail was downloaded. The list below is extracted by parsing
113			# /System/Library/PrivateFrameworks/Notes.framework/Versions/A/Resources/*.lproj/MailCore.strings
114			base_filenames = (
115			u"مرفق البريد", # ar
116			u"Adjunt de Mail", # ca
117			u"Příloha pošty", # cs
118			u"Postbilag", # da
119			u"Mail-Anhang", # de
120			u"Συνημμένο Mail", # el
121			u"Mail Attachment", # en, en_AU, en_GB
122			u"Archivo adjunto al mensaje", # es
123			u"Archivo adjunto a un correo", # es_419
124			u"Sähköpostiliite", # fi
125			u"Pièce jointe", # fr, fr_CA
126			u"קובץ מצורף לדואר", # he
127			u"मेल अटैचमेंट", # hi
128			u"E-mail privitak", # hr
129			u"Mail melléklet", # hu
130			u"Lampiran Mail", # id
131			u"Allegato di posta elettronica", # it
132			u"メールの添付ファイル", # ja
133			u"Mail 첨부 파일", # ko
134			u"Lampiran Mail", # ms
135			u"Mail-bijlage", # nl
136			u"E-postvedlegg", # no
137			u"Załącznik poczty", # pl
138			u"Anexo de E-mail", # pt
139			u"Anexo de e‑mail", # pt_PT
140			u"Fișier atașat Mail", # ro
141			u"Вложенный файл Почты", # ru
142			u"Mailová príloha", # sk
143			u"Brevbilaga", # sv
144			u"ไฟล์แนบเมล", # th
145			u"Posta İlişiği", # tr
146			u"Поштове прикріплення", # uk
147			u"Tệp đính kèm của Mail", # vi
148			u"邮件附件", # yue_CN, zh_CN,
149			u"郵件附件", # zh_HK, zh_TW
150			)
151
152
153			def mimetypes_guess_extension(mime_type):
154			# We don't want to always use mimetypes.guess_extension,
155			# because it does not always return what is generated by Mail.app,
156			# mainly because multiple extensions can be associated to a single
157			# MIME type.
158			# We prefer to use a hardcoded table.
159			try:
160			return {
161			"text/calendar": u".ics",
162			"image/png": u".png",
163			"image/x-png": u"",
164			"image/gif": u".gif",
165			"image/jpeg": u".jpeg",
166			"image/pjpeg": u".jpg",
167			"image/jpg": u".jpg",
168			"message/rfc822": u".eml",
169			}[mime_type]
170			except KeyError:
171			log.error("Unknown file extension for %r, making a guess...",
172			mime_type)
173			return mimetypes.guess_extension(mime_type)
174
175
176			def include_attachment(attach_dir, part, depth):
177			if "X-Apple-Content-Length" not in part:
178			return
179			dirpath = attach_dir + "/" + ".".join([str(_+1) for _ in depth])
180			file = part.get_filename()
181			if file is None:
182			extension = mimetypes_guess_extension(part.get_content_type())
183			for base in base_filenames:
184			file = base + extension
185			try:
186			data = open(dirpath+"/"+file, "rb").read()
187			break
188			# python2 raises IOError, python3 raises FileNotFoundError
189			except (IOError, FileNotFoundError):
190			continue
191			else:
192			log.error("%s Unnamed attachment of extension %s not found in %s",
193			" "*len(depth), extension, dirpath)
194			return
195			else:
196			try:
197			data = open(dirpath+"/"+file, "rb").read()
198			except FileNotFoundError:
199			log.error("%s Attachment '%s' not found in %s",
200			" "*len(depth), file, dirpath)
201			return
202			log.debug("%s Attachment '%s' found", " "*len(depth), file)
203			cte = part["Content-Transfer-Encoding"]
204			if cte is None:
205			pass
206			elif cte == "base64":
207			data = base64.b64encode(data)
208			data = newline.join([data[i76:(i+1)76]
209			for i in range(len(data)//76+1)])
210			elif cte == "quoted-printable":
211			# The only example I found was not QP-encoded
212			pass
213			elif cte == "8bit":
214			pass
215			else:
216			log.error("Attachment dir is %s", attach_dir)
217			log.error(" File name is %s", file)
218			log.error(" CTE %r", cte)
219			log.error(" CD %r", part["Content-Disposition"])
220			part.set_payload(data)
221
222
223			if __name__ == "__main__":
224			try:
225			input, out_dir = sys.argv[1:]
226			except ValueError:
227			print("Syntax: emlx2eml.py <source> <output_dir>")
228			print(" <source> can be an EMLX file, or a directory that will")
229			print(" be recursively searched for EMLX files.")
230			sys.exit(1)
231			log.debug("Input %s; Output %s", input, out_dir)
232			for emlx in find_emlx(input):
233			copy_emlx(emlx, out_dir)
234

AlexanderWillner / MailboxCleanup

Push — main ( c711b1...902473 )

src.emlx2eml.include_attachment() C

Complexity

Size

Duplication

Importance

How to fix Complexity

Complexity

Duplication Side-by-Side

Filter issues like