src.mailbox_imap.MailboxCleanerIMAP.logout()   A
last analyzed

Complexity

Conditions 3

Size

Total Lines 18
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 13
nop 1
dl 0
loc 18
rs 9.75
c 0
b 0
f 0
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
4
"""
5
Module to download and to detach/strip/remove attachments
6
from e-mails on IMAP servers.
7
"""
8
9
from __future__ import print_function
10
11
import re
12
import email
13
import email.mime.text
14
import email.utils
15
import email.parser
16
import imaplib
17
import logging
18
import socket
19
import time
20
import typing
21
import collections
22
import os.path
23
import pickle
24
25
from src.mailbox_message import MailboxCleanerMessage
26
27
imaplib._MAXLINE = 10000000  # pylint: disable=protected-access
28
TCP_KEEPALIVE = 0x10
29
30
__author__ = "Alexander Willner"
31
__copyright__ = "Copyright 2020, Alexander Willner"
32
__credits__ = ["github.com/guido4000",
33
               "github.com/halteproblem", "github.com/jamesridgway"]
34
__license__ = "MIT"
35
__version__ = "1.0.4"
36
__maintainer__ = "Alexander Willner"
37
__email__ = "[email protected]"
38
__status__ = "Development"
39
40
41
class MailboxCleanerIMAP():
42
    """
43
    Download and detach/strip/remove attachments from e-mails
44
    on IMAP servers.
45
    """
46
47
    # Number of retries to get messages
48
    __RETRIES = 2
49
50
    # IMAP folders to ignore
51
    __IGNORE_PREFIX = ('Contacts', 'Calendar', '"Calendar',
52
                       'Trash', '"Deleted', 'Tasks',
53
                       '"[Gmail]"')
54
55
    def __init__(self, args, imap=None):
56
        """Initialize class."""
57
58
        self.args = args
59
        self.message = MailboxCleanerMessage(args)
60
        self.cache = collections.OrderedDict()
61
        self.cache_file = os.path.join(
62
            self.args.target, '_cache-' + args.server + '.pkl')
63
        self.imap: imaplib.IMAP4_SSL = imap
64
        self.stopped: bool = False
65
        self.uploaded: int = 0
66
67
    def cleanup(self):
68
        """Cleanup after error."""
69
70
        self._save_cache()
71
72
    def login(self):
73
        """Log into the IMAP server."""
74
75
        try:
76
            if self.imap is None:
77
                self.imap = imaplib.IMAP4_SSL(self.args.server)
78
            self.imap.login(self.args.user, self.args.password)
79
80
            self.imap.sock.setsockopt(
81
                socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
82
            self.imap.sock.setsockopt(
83
                socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
84
            self.imap.sock.setsockopt(socket.IPPROTO_TCP, TCP_KEEPALIVE, 3)
85
            self._load_cache()
86
        except socket.gaierror as error:
87
            raise SystemExit('Login failed (wrong server?): %s' %
88
                             error) from error
89
        except imaplib.IMAP4.error as error:
90
            raise SystemExit('Login failed (wrong password?): %s' %
91
                             error) from error
92
93
    def logout(self):
94
        """Log out of the IMAP server."""
95
96
        self._save_cache()
97
98
        try:
99
            logging.warning('Connection\t: Closing...')
100
            self.imap.close()
101
        except (AttributeError, imaplib.IMAP4.error):
102
            pass
103
104
        try:
105
            logging.warning('Connection\t: Logging out...')
106
            self.imap.logout()
107
        except (AttributeError, imaplib.IMAP4.error):
108
            pass
109
110
        self.imap = None
111
112
    def does_msg_exist(self, msg) -> bool:
113
        """Check if message is already on the server."""
114
115
        msg_uid = self.message.get_uid(msg)
116
        for _attempt in range(2):  # don't select folder in every single check
117
            try:
118
                logging.warning('    Searching\t: %s', msg_uid)
119
                status, data = self.imap.uid(
120
                    'SEARCH', None,
121
                    '(UNDELETED HEADER Message-ID "%s")' % msg_uid)
122
                if data is not None and\
123
                        len(data[0]) > 0 and\
124
                        self.args.upload is not None:
125
                    logging.warning('    Duplicate\t: Yes')
126
                    self.cache[msg_uid] = self.message.get_subject(msg_uid)
127
                    return True
128
                logging.warning('    Duplicate\t: No')
129
            except imaplib.IMAP4.error as error:
130
                self.logout()
131
                self.login()
132
                status, error = self.imap.select(
133
                    self.args.folder, readonly=self.args.read_only)
134
                if status != "OK":
135
                    raise imaplib.IMAP4.error(
136
                        'Could not select folder: %s' % error) from error
137
            break
138
139
        return False
140
141
    def process_directory(self):
142
        """Iterate over mails from a local directory for upload."""
143
        self.message.process_directory(self.upload, cache=self.cache)
144
145
    def process_folders(self):  # noqa: C901
146
        """Iterate over mails in configured folders."""
147
148
        folders = self.get_folders()
149
        self.stopped = False
150
151
        # Iterate over each folder
152
        for i, folder in enumerate(folders, start=1):
153
154
            # For threaded environments
155
            if self.stopped:
156
                break
157
158
            # Get all mails in this folder
159
            if hasattr(self.args, 'logger'):
160
                self.args.logger.log_progress_folders(i, len(folders), folder)
161
            logging.info('Progress\t: %s / %s (folders)', i, len(folders))
162
            logging.warning('Folder\t\t: %s (started)', folder)
163
            msg_uids = self.get_msgs_from_folder(folder)
164
165
            # Iterate over each email
166
            for j, msg_uid in enumerate(msg_uids, start=1):
167
168
                # For threaded environments
169
                if self.stopped:
170
                    break
171
172
                # Skip if already in cache
173
                logging.info('Progress\t: %s / %s (mail uid: %s)',
174
                             j, len(msg_uids), msg_uid.decode())
175
                if msg_uid in self.cache:
176
                    logging.info('  Subject\t: %s (cached)',
177
                                 self.cache[msg_uid])
178
                    if hasattr(self.args, 'logger'):
179
                        self.args.logger.log_progress_mails(
180
                            j, len(msg_uids), self.cache[msg_uid])
181
                    continue
182
183
                # Get the actual email
184
                try:
185
                    msg, msg_flags = self.get_msg(msg_uid)
186
                except imaplib.IMAP4.error as error:
187
                    try:
188
                        logging.info(
189
                            '  Error\t\t: Message %s (%s). Logging in again.',
190
                            msg_uid, error)
191
                        self.logout()
192
                        self.login()
193
                        self.imap.select(folder, readonly=self.args.read_only)
194
                        msg, msg_flags = self.get_msg(msg_uid)
195
                    except imaplib.IMAP4.error:
196
                        logging.info('  Error\t: Message %s skipped', msg_uid)
197
                        continue
198
199
                subject = self.message.get_subject(msg)
200
                logging.info('  Subject\t: %s', subject)
201
                if hasattr(self.args, 'logger'):
202
                    self.args.logger.log_progress_mails(
203
                        j, len(msg_uids), subject)
204
205
                # Download and detach attachments from email
206
                modified = self.message.download_and_detach_attachments(msg)
207
208
                # Upload new email
209
                if modified:
210
                    self.replace_msg(msg, msg_flags, msg_uid, folder)
211
212
                # Save the cache
213
                self.cache[msg_uid] = subject
214
                if j % 10 == 0:
215
                    self._save_cache()
216
217
            logging.warning('Folder\t\t: %s (completed)', folder)
218
219
    def replace_msg(self, msg, msg_flags, msg_uid, folder):
220
        """Upload new message and remove the old one."""
221
222
        # Only upload in non-readonly mode
223
        if self.args.read_only:
224
            logging.debug('    Replacing\t: skipped (read-only)')
225
            return
226
227
        # Upload new message
228
        status, data = self.upload(msg, folder, msg_flags)
229
230
        # Delete old message
231
        if status == 'OK' and self.args.read_only is False:
232
            result = self.imap.select(folder, readonly=self.args.read_only)
233
            assert result[0] == 'OK'
234
            result = self.imap.uid('STORE', msg_uid, '+FLAGS', '\\Deleted')
235
            logging.debug('    Deleting\t: %s', result)
236
            # GMail needs special treatment
237
            try:
238
                self.imap.uid('STORE', msg_uid, '+X-GM-LABELS', '\\Trash')
239
            except imaplib.IMAP4.error:
240
                pass
241
            # Sometimes expunge just fails with an EOF socket error
242
            try:
243
                self.imap.expunge()
244
                logging.debug('    Comment\t: Expunged')
245
            except imaplib.IMAP4.abort:
246
                pass
247
        else:
248
            logging.warning('    Result\t: %s (%s)', status, data)
249
250
    def upload(self, msg, folder, msg_flags='\\Seen'):
251
        """Upload message to server."""
252
253
        # Knowing what's going on
254
        try:
255
            msg_date = self.convert_date(msg.get('date'))
256
            msg_subject = self.message.get_subject(msg)
257
            msg_uid = self.message.get_uid(msg)
258
        except TypeError as error:
259
            status = "Error"
260
            data = error
261
            logging.warning('    Error\t: %s, %s', status, data)
262
            return status, data
263
264
        if self.args.read_only:
265
            logging.warning('    Uploading\t: skipped (read-only)')
266
            return ('Read Only', '')
267
268
        # Check cache
269
        msg_uid = self.message.get_uid(msg)
270
        if msg_uid in self.cache:
271
            logging.warning('    Uploading\t: skipped (cached)')
272
            return ('Cached', '')
273
274
        # Check for duplicates
275
        if self.does_msg_exist(msg) is True:
276
            self.cache[msg_uid] = msg_subject
277
            logging.warning('    Uploading\t: skipped (duplicate)')
278
            return ('Duplicate', '')
279
280
        logging.debug('    Uploading\t: %s / %s', msg_date, msg_flags)
281
282
        try:
283
            status, data = self.imap.append(
284
                folder, msg_flags, msg_date, msg.as_string().encode())
285
            if status == "OK":
286
                logging.warning('    Result\t: %s', status)
287
                self.cache[msg_uid] = msg_subject
288
            else:
289
                logging.warning('    Result\t: %s, %s (in %s)',
290
                                status, data, folder)
291
        except imaplib.IMAP4.abort as error:
292
            status = "Error"
293
            data = error
294
            self.logout()
295
            self.login()
296
297
        # Reconnect after uploading a couple of files
298
        # (avoiding TimeoutError after a while)
299
        self.uploaded += 1
300
        if self.uploaded % 500 == 0:
301
            logging.warning('    Reconnecting...')
302
            self.uploaded = 0
303
            self.logout()
304
            self.login()
305
306
        return status, data
307
308
    def get_msg(self, uid):
309
        """Fetch an email from the IMAP server."""
310
311
        # Sometimes IMAP servers might return empty bodies, so try again
312
        for _ in range(self.__RETRIES):
313
            try:
314
                result, data = self.imap.uid('fetch', uid,
315
                                             '(UID BODY.PEEK[] FLAGS)')
316
                if data is None or data[0] is None:
317
                    logging.warning('  Error\t: '
318
                                    'Could not get a message body. '
319
                                    'Retrying in a few seconds...')
320
                    time.sleep(2)
321
                    raise imaplib.IMAP4.error('Could not get a message body')
322
323
                body = data[0][1]
324
                logging.debug('  Result (Size)\t: %s (%d KB)',
325
                              result, len(body) / 1024)
326
327
                msg = self.get_msg_from_struct(data)
328
                msg_flags = self.get_flags_from_struct(data)
329
330
                logging.debug('  Flags\t\t: %s', msg_flags)
331
332
                return (msg, msg_flags)
333
            except imaplib.IMAP4.error:
334
                continue
335
            break
336
        else:
337
            raise imaplib.IMAP4.error('Could not get a message subject')
338
339
    def get_msgs_from_folder(self, folder):
340
        """Get all emails from a folder on the IMAP server."""
341
342
        # Safety net: enable read-only if requested
343
        self.imap.select(folder, readonly=self.args.read_only)
344
345
        # Extract email UIDs
346
        result_mails, data_mails = self.imap.uid('search', None, "ALL")
347
        msg_uids = data_mails[0].split()
348
        logging.warning('Mails (#)\t: %s (%s)',
349
                        result_mails, len(msg_uids))
350
351
        return msg_uids
352
353
    def get_folders(self) -> typing.List[str]:
354
        """Get the folders from the IMAP server to iterate through."""
355
356
        res, folder_list = self.imap.list()
357
        logging.warning('Folders (#)\t: %s (%s)', res, len(folder_list))
358
359
        folders = [re.split('"."|"/"', item.decode())[-1].strip()
360
                   for item in folder_list]
361
362
        if not self.args.all:
363
            if self.args.folder.lower() not in map(str.lower, folders):
364
                raise imaplib.IMAP4.error(
365
                    'IMAP folder %s does not exist. Existing folders: %s'
366
                    % (self.args.folder, folders))
367
            folders = [self.args.folder]
368
        else:
369
            folders[:] = [item for item in folders
370
                          if not item.startswith(self.__IGNORE_PREFIX)]
371
            folders[:] = [item for item in folders
372
                          if not item.startswith(self.__IGNORE_PREFIX)]
373
374
        return folders
375
376
    @staticmethod
377
    def convert_date(date):
378
        """Convert dates to copy old date to new message."""
379
380
        pz_time = email.utils.parsedate_tz(date)
381
        stamp = email.utils.mktime_tz(pz_time)
382
        date = imaplib.Time2Internaldate(stamp)
383
        return date
384
385
    @staticmethod
386
    def get_msg_from_struct(data) -> str:
387
        """Convert message to a string."""
388
389
        try:
390
            raw_email = (data[0][1]).decode('utf-8')
391
        except ValueError:
392
            try:
393
                raw_email = (data[0][1]).decode('iso-8859-1')
394
            except ValueError:
395
                raw_email = (data[0][1]).decode('utf-8', 'backslashreplace')
396
397
        return email.message_from_string(raw_email)
398
399
    @staticmethod
400
    def get_flags_from_struct(data):
401
        """Get flags to copy old flags to new message."""
402
403
        flags = imaplib.ParseFlags(data[1])
404
        flags = b" ".join(flags) if flags != () else b""
405
        flags = flags.decode("utf-8")
406
        flags = flags.replace("\\Recent", "")  # read-only attribute
407
        return flags.strip()
408
409
    def _load_cache(self):
410
        """Load cache of processed mail UIDs with their subjects."""
411
412
        # Create new cache if needed
413
        if not os.path.exists(self.cache_file) or\
414
           self.args.reset_cache:
415
            self._save_cache()
416
417
        with open(self.cache_file, 'rb') as filepointer:
418
            self.cache = pickle.load(filepointer)
419
420
    def _save_cache(self):
421
        """Save cache of processed mail UIDs with their subjects."""
422
423
        if not os.path.exists(os.path.dirname(self.cache_file)):
424
            os.mkdir(os.path.dirname(self.cache_file))
425
            print("Cache folder created")
426
        with open(self.cache_file, 'wb+') as filepointer:
427
            pickle.dump(self.cache, filepointer, pickle.HIGHEST_PROTOCOL)
428