Passed
Push — main ( 4b8065...a02c75 )
by Alexander
01:39
created

src.mailbox_imap   D

Complexity

Total Complexity 59

Size/Duplication

Total Lines 368
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 237
dl 0
loc 368
rs 4.08
c 0
b 0
f 0
wmc 59

17 Methods

Rating   Name   Duplication   Size   Complexity  
A MailboxCleanerIMAP.cleanup() 0 4 1
B MailboxCleanerIMAP.get_msg() 0 30 6
A MailboxCleanerIMAP.get_msgs_from_folder() 0 13 1
A MailboxCleanerIMAP._load_cache() 0 10 4
A MailboxCleanerIMAP.get_flags_from_struct() 0 9 2
A MailboxCleanerIMAP.get_folders() 0 22 3
B MailboxCleanerIMAP.upload() 0 33 5
C MailboxCleanerIMAP.process_folders() 0 60 11
A MailboxCleanerIMAP.login() 0 14 4
A MailboxCleanerIMAP.logout() 0 14 3
A MailboxCleanerIMAP.process_directory() 0 3 1
A MailboxCleanerIMAP.__init__() 0 10 1
A MailboxCleanerIMAP.does_msg_exist() 0 17 4
A MailboxCleanerIMAP.get_msg_from_struct() 0 13 3
A MailboxCleanerIMAP._save_cache() 0 7 3
A MailboxCleanerIMAP.convert_date() 0 8 1
B MailboxCleanerIMAP.replace_msg() 0 30 6

How to fix   Complexity   

Complexity

Complex classes like src.mailbox_imap often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
4
"""
5
Module to download and to detach/strip/remove attachments
6
from e-mails on IMAP servers.
7
"""
8
9
from __future__ import print_function
10
11
import re
12
import email
13
import email.mime.text
14
import email.utils
15
import email.parser
16
import imaplib
17
import logging
18
import socket
19
import time
20
import typing
21
import collections
22
import os.path
23
import pickle
24
25
from src.mailbox_message import MailboxCleanerMessage
26
27
imaplib._MAXLINE = 10000000  # pylint: disable=protected-access
28
29
30
__author__ = "Alexander Willner"
31
__copyright__ = "Copyright 2020, Alexander Willner"
32
__credits__ = ["github.com/guido4000",
33
               "github.com/halteproblem", "github.com/jamesridgway"]
34
__license__ = "MIT"
35
__version__ = "1.0.0"
36
__maintainer__ = "Alexander Willner"
37
__email__ = "[email protected]"
38
__status__ = "Development"
39
40
41
class MailboxCleanerIMAP():
42
    """
43
    Download and detach/strip/remove attachments from e-mails
44
    on IMAP servers.
45
    """
46
47
    # Number of retries to get messages
48
    __RETRIES = 2
49
50
    # IMAP folders to ignore
51
    __IGNORE_PREFIX = ('Contacts', 'Calendar', '"Calendar',
52
                       'Trash', '"Deleted', 'Tasks',
53
                       '"[Gmail]"')
54
55
    def __init__(self, args, imap=None):
56
        """Initialize class."""
57
58
        self.args = args
59
        self.message = MailboxCleanerMessage(args)
60
        self.cache = collections.OrderedDict()
61
        self.cache_file = os.path.join(
62
            self.args.target, '_cache-' + args.server + '.pkl')
63
        self.imap: imaplib.IMAP4_SSL = imap
64
        self.stopped: bool = False
65
66
    def cleanup(self):
67
        """Cleanup after error."""
68
69
        self._save_cache()
70
71
    def login(self):
72
        """Log into the IMAP server."""
73
74
        try:
75
            if self.imap is None:
76
                self.imap = imaplib.IMAP4_SSL(self.args.server)
77
            self.imap.login(self.args.user, self.args.password)
78
            self._load_cache()
79
        except socket.gaierror as error:
80
            raise SystemExit('Login failed (wrong server?): %s' %
81
                             error) from error
82
        except imaplib.IMAP4.error as error:
83
            raise SystemExit('Login failed (wrong password?): %s' %
84
                             error) from error
85
86
    def logout(self):
87
        """Log out of the IMAP server."""
88
89
        try:
90
            self.imap.close()
91
            logging.warning('Connection\t: Closed')
92
        except (AttributeError, imaplib.IMAP4.error):
93
            pass
94
95
        try:
96
            self.imap.logout()
97
            logging.warning('Connection\t: Logged Out')
98
        except (AttributeError, imaplib.IMAP4.error):
99
            pass
100
101
    def does_msg_exist(self, msg) -> bool:
102
        """Check if message is already on the server."""
103
104
        msg_uid = self.message.get_uid(msg)
105
        self.imap.select(self.args.folder, readonly=self.args.read_only)
106
        status, data = self.imap.uid('SEARCH', None,
107
                                     '(HEADER Message-ID "%s") UNDELETED'
108
                                     % msg_uid)
109
110
        if data is not None and\
111
           len(data[0]) > 0 and\
112
           self.args.upload is not None:
113
            logging.warning('    Duplicate\t: %s', status)
114
            self.cache[msg_uid] = self.message.get_subject(msg_uid)
115
            return True
116
117
        return False
118
119
    def process_directory(self):
120
        """Iterate over mails from a local directory for upload."""
121
        self.message.process_directory(self.upload)
122
123
    def process_folders(self):
124
        """Iterate over mails in configured folders."""
125
126
        folders = self.get_folders()
127
        self.stopped = False
128
129
        # Iterate over each folder
130
        for i, folder in enumerate(folders, start=1):
131
132
            # For threaded environments
133
            if self.stopped:
134
                break
135
136
            # Get all mails in this folder
137
            if hasattr(self.args, 'logger'):
138
                self.args.logger.log_progress_folders(i, len(folders), folder)
139
            logging.info('Progress\t: %s / %s (folders)', i, len(folders))
140
            logging.warning('Folder\t\t: %s (started)', folder)
141
            msg_uids = self.get_msgs_from_folder(folder)
142
143
            # Iterate over each email
144
            for j, msg_uid in enumerate(msg_uids, start=1):
145
146
                # For threaded environments
147
                if self.stopped:
148
                    break
149
150
                # Skip if already in cache
151
                logging.info('Progress\t: %s / %s (mail uid: %s)',
152
                             j, len(msg_uids), msg_uid.decode())
153
                if msg_uid in self.cache:
154
                    logging.info('  Subject\t: %s (cached)',
155
                                 self.cache[msg_uid])
156
                    if hasattr(self.args, 'logger'):
157
                        self.args.logger.log_progress_mails(
158
                            j, len(msg_uids), self.cache[msg_uid])
159
                    continue
160
161
                # Get the actual email
162
                try:
163
                    msg, msg_flags = self.get_msg(msg_uid)
164
                except imaplib.IMAP4.error:
165
                    logging.info('  Error\t: Message %s skipped', msg_uid)
166
                    continue
167
                subject = self.message.get_subject(msg)
168
                logging.info('  Subject\t: %s', subject)
169
                if hasattr(self.args, 'logger'):
170
                    self.args.logger.log_progress_mails(
171
                        j, len(msg_uids), subject)
172
173
                # Download and detach attachments from email
174
                modified = self.message.download_and_detach_attachments(msg)
175
176
                # Upload new email
177
                if modified:
178
                    self.replace_msg(msg, msg_flags, msg_uid, folder)
179
180
                self.cache[msg_uid] = subject
181
182
            logging.warning('Folder\t\t: %s (completed)', folder)
183
184
    def replace_msg(self, msg, msg_flags, msg_uid, folder):
185
        """Upload new message and remove the old one."""
186
187
        # Only upload in non-readonly mode
188
        if self.args.read_only:
189
            logging.debug('    Replacing\t: skipped (read-only)')
190
            return
191
192
        # Upload new message
193
        status, data = self.upload(msg, msg_flags)
194
195
        # Delete old message
196
        if status == 'OK' and self.args.read_only is False:
197
            result = self.imap.select(folder, readonly=self.args.read_only)
198
            assert result[0] == 'OK'
199
            result = self.imap.uid('STORE', msg_uid, '+FLAGS', '\\Deleted')
200
            logging.debug('    Deleting\t: %s', result)
201
            # GMail needs special treatment
202
            try:
203
                self.imap.uid('STORE', msg_uid, '+X-GM-LABELS', '\\Trash')
204
            except imaplib.IMAP4.error:
205
                pass
206
            # Sometimes expunge just fails with an EOF socket error
207
            try:
208
                self.imap.expunge()
209
                logging.debug('    Comment\t: Expunged')
210
            except imaplib.IMAP4.abort:
211
                pass
212
        else:
213
            logging.warning('    Result\t: %s (%s)', status, data)
214
215
    def upload(self, msg, msg_flags='\\Seen'):
216
        """Upload message to server."""
217
218
        # Knowing what's going on
219
        msg_date = self.convert_date(msg.get('date'))
220
        msg_subject = self.message.get_subject(msg)
221
        msg_uid = self.message.get_uid(msg)
222
        if self.args.read_only:
223
            logging.warning('    Uploading\t: skipped (read-only)')
224
            return ('Read Only', '')
225
226
        logging.debug('    Uploading\t: %s / %s', msg_date, msg_flags)
227
228
        # Check cache
229
        msg_uid = self.message.get_uid(msg)
230
        if msg_uid in self.cache:
231
            logging.warning('    Cache\t: OK')
232
            return ('Cached', '')
233
234
        # Check for duplicates
235
        if self.does_msg_exist(msg) is True:
236
            self.cache[msg_uid] = msg_subject
237
            return ('Duplicate', '')
238
239
        status, data = self.imap.append(
240
            self.args.folder, msg_flags, msg_date, msg.as_string().encode())
241
        if status == "OK":
242
            logging.warning('    Success\t: %s', status)
243
            self.cache[msg_uid] = msg_subject
244
        else:
245
            logging.warning('    Return\t\t: %s, %s', status, data)
246
247
        return status, data
248
249
    def get_msg(self, uid):
250
        """Fetch an email from the IMAP server."""
251
252
        # Sometimes IMAP servers might return empty bodies, so try again
253
        for _ in range(self.__RETRIES):
254
            try:
255
                result, data = self.imap.uid('fetch', uid,
256
                                             '(UID BODY.PEEK[] FLAGS)')
257
                if data is None or data[0] is None:
258
                    logging.warning('  Error\t: '
259
                                    'Could not get a message body. '
260
                                    'Retrying in a few seconds...')
261
                    time.sleep(2)
262
                    raise imaplib.IMAP4.error('Could not get a message body')
263
264
                body = data[0][1]
265
                logging.debug('  Result (Size)\t: %s (%d KB)',
266
                              result, len(body) / 1024)
267
268
                msg = self.get_msg_from_struct(data)
269
                msg_flags = self.get_flags_from_struct(data)
270
271
                logging.debug('  Flags\t\t: %s', msg_flags)
272
273
                return (msg, msg_flags)
274
            except imaplib.IMAP4.error:
275
                continue
276
            break
277
        else:
278
            raise imaplib.IMAP4.error('Could not get a message subject')
279
280
    def get_msgs_from_folder(self, folder):
281
        """Get all emails from a folder on the IMAP server."""
282
283
        # Safety net: enable read-only if requested
284
        self.imap.select(folder, readonly=self.args.read_only)
285
286
        # Extract email UIDs
287
        result_mails, data_mails = self.imap.uid('search', None, "ALL")
288
        msg_uids = data_mails[0].split()
289
        logging.warning('Mails (#)\t: %s (%s)',
290
                        result_mails, len(msg_uids))
291
292
        return msg_uids
293
294
    def get_folders(self) -> typing.List[str]:
295
        """Get the folders from the IMAP server to iterate through."""
296
297
        res, folder_list = self.imap.list()
298
        logging.warning('Folders (#)\t: %s (%s)', res, len(folder_list))
299
300
        folders = [re.split('"."|"/"', item.decode())[-1].strip()
301
                   for item in folder_list]
302
303
        if not self.args.all:
304
            if self.args.folder.lower() not in map(str.lower, folders):
305
                raise imaplib.IMAP4.error(
306
                    'IMAP folder %s does not exist. Existing folders: %s'
307
                    % (self.args.folder, folders))
308
            folders = [self.args.folder]
309
        else:
310
            folders[:] = [item for item in folders
311
                          if not item.startswith(self.__IGNORE_PREFIX)]
312
            folders[:] = [item for item in folders
313
                          if not item.startswith(self.__IGNORE_PREFIX)]
314
315
        return folders
316
317
    @staticmethod
318
    def convert_date(date):
319
        """Convert dates to copy old date to new message."""
320
321
        pz_time = email.utils.parsedate_tz(date)
322
        stamp = email.utils.mktime_tz(pz_time)
323
        date = imaplib.Time2Internaldate(stamp)
324
        return date
325
326
    @staticmethod
327
    def get_msg_from_struct(data) -> str:
328
        """Convert message to a string."""
329
330
        try:
331
            raw_email = (data[0][1]).decode('utf-8')
332
        except ValueError:
333
            try:
334
                raw_email = (data[0][1]).decode('iso-8859-1')
335
            except ValueError:
336
                raw_email = (data[0][1]).decode('utf-8', 'backslashreplace')
337
338
        return email.message_from_string(raw_email)
339
340
    @staticmethod
341
    def get_flags_from_struct(data):
342
        """Get flags to copy old flags to new message."""
343
344
        flags = imaplib.ParseFlags(data[1])
345
        flags = b" ".join(flags) if flags != () else b""
346
        flags = flags.decode("utf-8")
347
        flags = flags.replace("\\Recent", "")  # read-only attribute
348
        return flags.strip()
349
350
    def _load_cache(self):
351
        """Load cache of processed mail UIDs with their subjects."""
352
353
        # Create new cache if needed
354
        if not os.path.exists(self.cache_file) or\
355
           self.args.reset_cache:
356
            self._save_cache()
357
358
        with open(self.cache_file, 'rb') as filepointer:
359
            self.cache = pickle.load(filepointer)
360
361
    def _save_cache(self):
362
        """Save cache of processed mail UIDs with their subjects."""
363
364
        if not os.path.exists(self.cache_file):
365
            os.mkdir(os.path.dirname(self.cache_file))
366
        with open(self.cache_file, 'wb+') as filepointer:
367
            pickle.dump(self.cache, filepointer, pickle.HIGHEST_PROTOCOL)
368