parse and save email attachments

This commit is contained in:
jedi 2024-01-09 22:42:47 +01:00
parent f9a95317a2
commit 734af10525
8 changed files with 357 additions and 76 deletions

View file

@ -197,21 +197,14 @@ DATA_UPLOAD_MAX_MEMORY_SIZE = 1024 * 1024 * 128 # 128 MB
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
if 'test' in sys.argv:
CHANNEL_LAYERS = {
'default': {
'BACKEND': 'channels.layers.InMemoryChannelLayer'
}
CHANNEL_LAYERS = {
'default': {
'BACKEND': 'channels_redis.core.RedisChannelLayer',
'CONFIG': {
'hosts': [('localhost', 6379)],
},
}
else:
CHANNEL_LAYERS = {
'default': {
'BACKEND': 'channels_redis.core.RedisChannelLayer',
'CONFIG': {
'hosts': [('localhost', 6379)],
},
}
}
}
TEST_RUNNER = 'core.test_runner.FastTestRunner'

View file

@ -19,3 +19,15 @@ class FastTestRunner(DiscoverRunner):
settings.PASSWORD_HASHERS = (
'django.contrib.auth.hashers.MD5PasswordHasher',
)
settings.CHANNEL_LAYERS = {
'default': {
'BACKEND': 'channels.layers.InMemoryChannelLayer'
}
}
settings.DATABASES = {
'default': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': ':memory:',
}
}

View file

@ -27,12 +27,16 @@ class FileManager(models.Manager):
kwargs['file'] = ContentFile(content, content_hash)
kwargs['hash'] = content_hash
kwargs['mime_type'] = mime_type
elif 'file' in kwargs and 'hash' in kwargs and type(kwargs['file']) == ContentFile and 'mime_type' in kwargs:
pass
else:
raise ValueError('data must be a base64 encoded string or file and hash must be provided')
try:
return self.get(hash=kwargs['hash']), False
except self.model.DoesNotExist:
return self.create(**kwargs), True
obj = super().create(**kwargs)
obj.file.save(content=kwargs['file'], name=kwargs['hash'])
return obj, True
def create(self, **kwargs):
if 'data' in kwargs and type(kwargs['data']) == str:
@ -51,23 +55,32 @@ class FileManager(models.Manager):
kwargs['file'] = ContentFile(content, content_hash)
kwargs['hash'] = content_hash
kwargs['mime_type'] = mime_type
elif 'file' in kwargs and 'hash' in kwargs and type(kwargs['file']) == ContentFile:
elif 'file' in kwargs and 'hash' in kwargs and type(kwargs['file']) == ContentFile and 'mime_type' in kwargs:
pass
else:
raise ValueError('data must be a base64 encoded string or file and hash must be provided')
if not self.filter(hash=kwargs['hash']).exists():
return super().create(**kwargs)
obj = super().create(**kwargs)
obj.file.save(content=kwargs['file'], name=kwargs['hash'])
return obj
else:
raise IntegrityError('File with this hash already exists')
class File(models.Model):
item = models.ForeignKey(Item, models.CASCADE, db_column='iid', null=True, blank=True, related_name='files')
class AbstractFile(models.Model):
created_at = models.DateTimeField(blank=True, null=True)
updated_at = models.DateTimeField(blank=True, null=True)
deleted_at = models.DateTimeField(blank=True, null=True)
file = models.ImageField(upload_to=hash_upload)
file = models.FileField(upload_to=hash_upload)
mime_type = models.CharField(max_length=255, null=False, blank=False)
hash = models.CharField(max_length=64, null=False, blank=False, unique=True)
objects = FileManager()
class Meta:
abstract = True
class File(AbstractFile):
item = models.ForeignKey(Item, models.CASCADE, db_column='iid', null=True, blank=True, related_name='files')
pass

View file

@ -1,4 +1,5 @@
from django.test import TestCase, Client
from django.core.files.base import ContentFile
from files.models import File
from inventory.models import Event, Container, Item
@ -13,13 +14,27 @@ class FileTestCase(TestCase):
self.event = Event.objects.create(slug='EVENT', name='Event')
self.box = Container.objects.create(name='BOX')
def test_create_file_raw(self):
from hashlib import sha256
content = b"foo"
chash = sha256(content).hexdigest()
item = Item.objects.create(container=self.box, event=self.event, description='1')
file = File.objects.create(file=ContentFile(b"foo"), mime_type='text/plain', hash=chash, item=item)
file.save()
self.assertEqual(1, len(File.objects.all()))
self.assertEqual(content, File.objects.all()[0].file.read())
self.assertEqual(chash, File.objects.all()[0].hash)
def test_list_files(self):
import base64
item = File.objects.create(data="data:text/plain;base64," + base64.b64encode(b"foo").decode('utf-8'))
response = client.get('/api/1/files')
self.assertEqual(response.status_code, 200)
self.assertEqual(response.json()[0]['hash'], item.hash)
self.assertEqual(len(response.json()[0]['hash']), 64)
self.assertEqual(len(File.objects.all()), 1)
self.assertEqual(File.objects.all()[0].file.read(), b"foo")
def test_one_file(self):
import base64
@ -28,6 +43,8 @@ class FileTestCase(TestCase):
self.assertEqual(response.status_code, 200)
self.assertEqual(response.json()['hash'], item.hash)
self.assertEqual(len(response.json()['hash']), 64)
self.assertEqual(len(File.objects.all()), 1)
self.assertEqual(File.objects.all()[0].file.read(), b"foo")
def test_create_file(self):
import base64
@ -38,6 +55,8 @@ class FileTestCase(TestCase):
content_type='application/json')
self.assertEqual(response.status_code, 201)
self.assertEqual(len(response.json()['hash']), 64)
self.assertEqual(len(File.objects.all()), 1)
self.assertEqual(File.objects.all()[0].file.read(), b"foo")
def test_delete_file(self):
import base64

View file

@ -0,0 +1,59 @@
# Generated by Django 4.2.7 on 2024-01-09 20:56
from django.db import migrations, models
import django.db.models.deletion
import files.models
from mail.models import Email
from mail.protocol import parse_email_body
class NullLogger:
def info(self, *args, **kwargs):
pass
def warning(self, *args, **kwargs):
pass
def debug(self, *args, **kwargs):
pass
class Migration(migrations.Migration):
dependencies = [
('mail', '0002_printed_quotable'),
]
def generate_email_attachments(apps, schema_editor):
for email in Email.objects.all():
raw = email.raw
if raw is None:
continue
parsed, body, attachments = parse_email_body(raw.encode('utf-8'), NullLogger())
email.attachments.clear()
for attachment in attachments:
email.attachments.add(attachment)
email.body = body
email.save()
operations = [
migrations.CreateModel(
name='EmailAttachment',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created_at', models.DateTimeField(blank=True, null=True)),
('updated_at', models.DateTimeField(blank=True, null=True)),
('deleted_at', models.DateTimeField(blank=True, null=True)),
('file', models.ImageField(upload_to=files.models.hash_upload)),
('mime_type', models.CharField(max_length=255)),
('hash', models.CharField(max_length=64, unique=True)),
('name', models.CharField(max_length=255)),
('email',
models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, related_name='attachments',
to='mail.email')),
],
options={
'abstract': False,
},
),
migrations.RunPython(generate_email_attachments),
]

View file

@ -4,6 +4,7 @@ from django.db import models
from django_softdelete.models import SoftDeleteModel
from core.settings import MAIL_DOMAIN
from files.models import AbstractFile
from inventory.models import Event
from tickets.models import IssueThread
@ -32,3 +33,8 @@ class EventAddress(models.Model):
id = models.AutoField(primary_key=True)
event = models.ForeignKey(Event, models.SET_NULL, null=True)
address = models.CharField(max_length=255)
class EmailAttachment(AbstractFile):
email = models.ForeignKey(Email, models.CASCADE, related_name='attachments', null=True)
name = models.CharField(max_length=255)

View file

@ -3,10 +3,11 @@ import logging
import aiosmtplib
from asgiref.sync import sync_to_async
from channels.layers import get_channel_layer
from django.core.files.base import ContentFile
from mail.models import Email, EventAddress
from mail.models import Email, EventAddress, EmailAttachment
from notify_sessions.models import SystemEvent
from tickets.models import IssueThread, StateChange
from tickets.models import IssueThread
def find_quoted_printable(s, marker):
@ -99,6 +100,96 @@ def find_target_event(address):
pass
return None
def parse_email_body(raw, log=None):
import email
from hashlib import sha256
attachments = []
parsed = email.message_from_bytes(raw)
body = ""
if parsed.is_multipart():
for part in parsed.walk():
ctype = part.get_content_type()
cdispo = str(part.get('Content-Disposition'))
# skip any text/plain (txt) attachments
if ctype == 'text/plain' and 'attachment' not in cdispo:
segment = part.get_payload(decode=True).decode('utf-8')
segment = unescape_and_decode_quoted_printable(segment)
segment = unescape_and_decode_base64(segment)
log.debug(segment)
body = body + segment
elif 'attachment' in cdispo or 'inline' in cdispo:
file = ContentFile(part.get_payload(decode=True))
chash = sha256(file.read()).hexdigest()
name = part.get_filename()
if name is None:
name = "unnamed"
attachment, _ = EmailAttachment.objects.get_or_create(
name=name, mime_type=ctype, file=file, hash=chash)
attachment.save()
attachments.append(attachment)
if 'inline' in cdispo:
body = body + f'<img src="cid:{attachment.id}">'
log.info("Image", ctype, attachment.id)
else:
log.info("Attachment", ctype, cdispo)
else:
body = parsed.get_payload(decode=True).decode('utf-8')
return parsed, body, attachments
def receive_email(envelope, log=None):
parsed, body, attachments = parse_email_body(envelope.content, log)
header_from = parsed.get('From')
header_to = parsed.get('To')
header_in_reply_to = parsed.get('In-Reply-To')
header_message_id = parsed.get('Message-ID')
if header_from != envelope.mail_from:
log.warning("Header from does not match envelope from")
log.info(f"Header from: {header_from}, envelope from: {envelope.mail_from}")
if header_to != envelope.rcpt_tos[0]:
log.warning("Header to does not match envelope to")
log.info(f"Header to: {header_to}, envelope to: {envelope.rcpt_tos[0]}")
recipient = envelope.rcpt_tos[0].lower()
sender = envelope.mail_from
subject = parsed.get('Subject')
subject = unescape_and_decode_quoted_printable(subject)
subject = unescape_and_decode_base64(subject)
target_event = find_target_event(recipient)
active_issue_thread, new = find_active_issue_thread(header_in_reply_to, subject)
body_decoded = body
body_decoded = unescape_and_decode_quoted_printable(body_decoded)
body_decoded = unescape_and_decode_base64(body_decoded)
email = Email.objects.create(
sender=sender, recipient=recipient, body=body_decoded, subject=subject, reference=header_message_id,
in_reply_to=header_in_reply_to, raw=envelope.content.decode('utf-8'), event=target_event,
issue_thread=active_issue_thread)
for attachment in attachments:
email.attachments.add(attachment)
email.save()
reply = None
if new:
references = collect_references(active_issue_thread)
reply_email = Email.objects.create(
sender=recipient, recipient=sender, body="Thank you for your message.", subject="Message received",
in_reply_to=header_message_id, event=target_event, issue_thread=active_issue_thread)
reply = make_reply(reply_email, references)
return email, new, reply
class LMTPHandler:
async def handle_RCPT(self, server, session, envelope, address, rcpt_options):
from core.settings import MAIL_DOMAIN
@ -109,7 +200,6 @@ class LMTPHandler:
return '250 OK'
async def handle_DATA(self, server, session, envelope):
import email
log = logging.getLogger('mail.log')
log.setLevel(logging.DEBUG)
log.info('Message from %s' % envelope.mail_from)
@ -117,51 +207,7 @@ class LMTPHandler:
log.info('Message data:\n')
try:
parsed = email.message_from_bytes(envelope.content)
body = ""
if parsed.is_multipart():
for part in parsed.walk():
ctype = part.get_content_type()
cdispo = str(part.get('Content-Disposition'))
# skip any text/plain (txt) attachments
if ctype == 'text/plain' and 'attachment' not in cdispo:
body = part.get_payload(decode=True)
else:
log.info("Attachment", ctype, cdispo)
else:
body = parsed.get_payload(decode=True)
log.info(body)
header_from = parsed.get('From')
header_to = parsed.get('To')
header_in_reply_to = parsed.get('In-Reply-To')
header_message_id = parsed.get('Message-ID')
if header_from != envelope.mail_from:
log.warning("Header from does not match envelope from")
log.info(f"Header from: {header_from}, envelope from: {envelope.mail_from}")
if header_to != envelope.rcpt_tos[0]:
log.warning("Header to does not match envelope to")
log.info(f"Header to: {header_to}, envelope to: {envelope.rcpt_tos[0]}")
recipient = envelope.rcpt_tos[0].lower()
sender = envelope.mail_from
subject = parsed.get('Subject')
subject = unescape_and_decode_quoted_printable(subject)
subject = unescape_and_decode_base64(subject)
target_event = await sync_to_async(find_target_event)(recipient)
active_issue_thread, new = await sync_to_async(find_active_issue_thread)(header_in_reply_to, subject)
body_decoded = body.decode('utf-8')
body_decoded = unescape_and_decode_quoted_printable(body_decoded)
body_decoded = unescape_and_decode_base64(body_decoded)
email = await sync_to_async(Email.objects.create)(
sender=sender, recipient=recipient, body=body_decoded, subject=subject, reference=header_message_id,
in_reply_to=header_in_reply_to, raw=envelope.content.decode('utf-8'), event=target_event,
issue_thread=active_issue_thread)
email, new, reply = await sync_to_async(receive_email)(envelope, log)
log.info(f"Created email {email.id}")
systemevent = await sync_to_async(SystemEvent.objects.create)(type='email received', reference=email.id)
log.info(f"Created system event {systemevent.id}")
@ -172,15 +218,10 @@ class LMTPHandler:
)
log.info(f"Sent message to frontend")
if new:
references = await sync_to_async(collect_references)(active_issue_thread)
reply_email = await sync_to_async(Email.objects.create)(
sender=recipient, recipient=sender, body="Thank you for your message.", subject="Message received",
in_reply_to=header_message_id, event=target_event, issue_thread=active_issue_thread)
await send_smtp(make_reply(reply_email, references), log)
await send_smtp(reply, log)
log.info("Sent auto reply")
return '250 Message accepted for delivery'
except Exception as e:
log.error(e)
return '550 Message rejected'
return '451 Internal server error'

View file

@ -8,7 +8,7 @@ from knox.models import AuthToken
from authentication.models import ExtendedUser
from core.settings import MAIL_DOMAIN
from inventory.models import Event
from mail.models import Email, EventAddress
from mail.models import Email, EventAddress, EmailAttachment
from mail.protocol import LMTPHandler
from tickets.models import IssueThread, StateChange
@ -301,3 +301,141 @@ class LMTPHandlerTestCase(TestCase): # TODO replace with less hacky test
states = StateChange.objects.filter(issue_thread=IssueThread.objects.all()[0])
self.assertEqual(1, len(states))
self.assertEqual('pending_new', states[0].state)
def test_split_text_inline_image(self):
from aiosmtpd.smtp import Envelope
from asgiref.sync import async_to_sync
import aiosmtplib
aiosmtplib.send = make_mocked_coro()
handler = LMTPHandler()
server = mock.Mock()
session = mock.Mock()
envelope = Envelope()
envelope.mail_from = 'test1@test'
envelope.rcpt_tos = ['test2@test']
envelope.content = b'''Subject: test
From: test1@test
To: test2@test
Message-ID: <1@test>
Content-Type: multipart/alternative; boundary="abc"
--abc
Content-Type: text/plain; charset=utf-8
test1
--abc
Content-Type: image/jpeg; name="test.jpg"
Content-Disposition: inline; filename="test.jpg"
Content-Transfer-Encoding: base64
Content-ID: <1>
X-Attachment-Id: 1
dGVzdGltYWdl
--abc
Content-Type: text/plain; charset=utf-8
test2
--abc--'''
result = async_to_sync(handler.handle_DATA)(server, session, envelope)
self.assertEqual(result, '250 Message accepted for delivery')
self.assertEqual(len(Email.objects.all()), 2)
self.assertEqual(len(IssueThread.objects.all()), 1)
aiosmtplib.send.assert_called_once()
self.assertEqual('test', Email.objects.all()[0].subject)
self.assertEqual('test1@test', Email.objects.all()[0].sender)
self.assertEqual('test2@test', Email.objects.all()[0].recipient)
self.assertEqual('test1\n<img src="cid:1">test2\n', Email.objects.all()[0].body)
self.assertEqual(IssueThread.objects.all()[0], Email.objects.all()[0].issue_thread)
self.assertEqual('<1@test>', Email.objects.all()[0].reference)
self.assertEqual(None, Email.objects.all()[0].in_reply_to)
self.assertEqual('Message received', Email.objects.all()[1].subject)
self.assertEqual('test2@test', Email.objects.all()[1].sender)
self.assertEqual('test1@test', Email.objects.all()[1].recipient)
self.assertEqual('Thank you for your message.', Email.objects.all()[1].body)
self.assertEqual(IssueThread.objects.all()[0], Email.objects.all()[1].issue_thread)
self.assertTrue(Email.objects.all()[1].reference.startswith("<"))
self.assertTrue(Email.objects.all()[1].reference.endswith("@localhost>"))
self.assertEqual("<1@test>", Email.objects.all()[1].in_reply_to)
self.assertEqual('test', IssueThread.objects.all()[0].name)
self.assertEqual('pending_new', IssueThread.objects.all()[0].state)
self.assertEqual(None, IssueThread.objects.all()[0].assigned_to)
states = StateChange.objects.filter(issue_thread=IssueThread.objects.all()[0])
self.assertEqual(1, len(states))
self.assertEqual('pending_new', states[0].state)
self.assertEqual(1, len(EmailAttachment.objects.all()))
self.assertEqual(1, EmailAttachment.objects.all()[0].id)
self.assertEqual('image/jpeg', EmailAttachment.objects.all()[0].mime_type)
self.assertEqual('test.jpg', EmailAttachment.objects.all()[0].name)
file_content = EmailAttachment.objects.all()[0].file.read()
self.assertEqual(b'testimage', file_content)
def test_text_with_attachment(self):
from aiosmtpd.smtp import Envelope
from asgiref.sync import async_to_sync
import aiosmtplib
aiosmtplib.send = make_mocked_coro()
handler = LMTPHandler()
server = mock.Mock()
session = mock.Mock()
envelope = Envelope()
envelope.mail_from = 'test1@test'
envelope.rcpt_tos = ['test2@test']
envelope.content = b'''Subject: test
From: test1@test
To: test2@test
Message-ID: <1@test>
Content-Type: multipart/mixed; boundary="abc"
--abc
Content-Type: text/plain; charset=utf-8
test1
--abc
Content-Type: image/jpeg; name="test.jpg"
Content-Disposition: attachment; filename="test.jpg"
Content-Transfer-Encoding: base64
Content-ID: <1>
X-Attachment-Id: 1
dGVzdGltYWdl
--abc--'''
result = async_to_sync(handler.handle_DATA)(server, session, envelope)
self.assertEqual(result, '250 Message accepted for delivery')
self.assertEqual(len(Email.objects.all()), 2)
self.assertEqual(len(IssueThread.objects.all()), 1)
aiosmtplib.send.assert_called_once()
self.assertEqual('test', Email.objects.all()[0].subject)
self.assertEqual('test1@test', Email.objects.all()[0].sender)
self.assertEqual('test2@test', Email.objects.all()[0].recipient)
self.assertEqual('test1\n', Email.objects.all()[0].body)
self.assertEqual(IssueThread.objects.all()[0], Email.objects.all()[0].issue_thread)
self.assertEqual('<1@test>', Email.objects.all()[0].reference)
self.assertEqual(None, Email.objects.all()[0].in_reply_to)
self.assertEqual('Message received', Email.objects.all()[1].subject)
self.assertEqual('test2@test', Email.objects.all()[1].sender)
self.assertEqual('test1@test', Email.objects.all()[1].recipient)
self.assertEqual('Thank you for your message.', Email.objects.all()[1].body)
self.assertEqual(IssueThread.objects.all()[0], Email.objects.all()[1].issue_thread)
self.assertTrue(Email.objects.all()[1].reference.startswith("<"))
self.assertTrue(Email.objects.all()[1].reference.endswith("@localhost>"))
self.assertEqual("<1@test>", Email.objects.all()[1].in_reply_to)
self.assertEqual('test', IssueThread.objects.all()[0].name)
self.assertEqual('pending_new', IssueThread.objects.all()[0].state)
self.assertEqual(None, IssueThread.objects.all()[0].assigned_to)
states = StateChange.objects.filter(issue_thread=IssueThread.objects.all()[0])
self.assertEqual(1, len(states))
self.assertEqual('pending_new', states[0].state)
self.assertEqual(1, len(EmailAttachment.objects.all()))
self.assertEqual(1, EmailAttachment.objects.all()[0].id)
self.assertEqual('image/jpeg', EmailAttachment.objects.all()[0].mime_type)
self.assertEqual('test.jpg', EmailAttachment.objects.all()[0].name)
file_content = EmailAttachment.objects.all()[0].file.read()
self.assertEqual(b'testimage', file_content)