diff --git a/pyquarantine/notifications.py b/pyquarantine/notifications.py
index ebe2b44..16f71bc 100644
--- a/pyquarantine/notifications.py
+++ b/pyquarantine/notifications.py
@@ -215,48 +215,47 @@ class EMailNotification(BaseNotification):
img.add_header("Content-ID", "<{}>".format(basename(img_path)))
self.embedded_imgs.append(img)
- def get_text(self, queueid, part):
- "Get the mail text in html form from email part."
- mimetype = part.get_content_type()
+ def get_decoded_email_body(self, queueid, msg, preferred=_html_text):
+ "Find and decode email body."
+ # try to find the body part
+ self.logger.debug("{}: trying to find email body".format(queueid))
+ body = None
+ for part in msg.walk():
+ content_type = part.get_content_type()
+ if content_type in [EMailNotification._plain_text,
+ EMailNotification._html_text]:
+ body = part
+ if content_type == preferred:
+ break
- self.logger.debug(
- "{}: extracting content of email text part".format(queueid))
- text = part.get_payload(decode=True)
+ if body is not None:
+ # get the character set, fallback to utf-8 if not defined in header
+ charset = body.get_content_charset()
+ if charset is None:
+ charset = "utf-8"
- if mimetype == EMailNotification._plain_text:
- self.logger.debug(
- "{}: content mimetype is {}, converting to {}".format(
- queueid, mimetype, self._html_text))
- text = re.sub(r"^(.*)$", r"\1
",
- escape(text.decode()), flags=re.MULTILINE)
+ # decode content
+ content = body.get_payload(decode=True).decode(
+ encoding=charset, errors="replace")
+
+ content_type = body.get_content_type()
+ if content_type == EMailNotification._plain_text:
+ # convert text/plain to text/html
+ self.logger.debug(
+ "{}: content type is {}, converting to {}".format(
+ queueid, content_type, EMailNotification._html_text))
+ content = re.sub(r"^(.*)$", r"\1
",
+ escape(content), flags=re.MULTILINE)
+ else:
+ self.logger.debug(
+ "{}: content type is {}".format(
+ queueid, content_type))
else:
- self.logger.debug(
- "{}: content mimetype is {}".format(
- queueid, mimetype))
- self.logger.debug(
- "{}: trying to create BeatufilSoup object with parser lib {}, "
- "text length is {} bytes".format(
- queueid, self.parser_lib, len(text)))
- soup = BeautifulSoup(text, self.parser_lib)
- self.logger.debug(
- "{}: sucessfully created BeautifulSoup object".format(queueid))
- return soup
+ self.logger.error(
+ "{}: unable to find email body".format(queueid))
+ content = "ERROR: unable to find email body"
- def get_text_multipart(self, queueid, msg, preferred=_html_text):
- "Get the mail text of a multipart email in html form."
- soup = None
-
- for part in msg.get_payload():
- mimetype = part.get_content_type()
- if mimetype in [EMailNotification._plain_text,
- EMailNotification._html_text]:
- soup = self.get_text(queueid, part)
- elif mimetype.startswith("multipart"):
- soup = self.get_text_multipart(queueid, part, preferred)
-
- if soup is not None and mimetype == preferred:
- break
- return soup
+ return content
def sanitize(self, queueid, soup):
"Sanitize mail html text."
@@ -293,27 +292,6 @@ class EMailNotification(BaseNotification):
del(element.attrs[attribute])
return soup
- def get_html_text_part(self, queueid, msg):
- "Get the mail text of an email in html form."
- soup = None
- mimetype = msg.get_content_type()
-
- self.logger.debug(
- "{}: trying to find text part of email".format(queueid))
- if mimetype in [EMailNotification._plain_text,
- EMailNotification._html_text]:
- soup = self.get_text(queueid, msg)
- elif mimetype.startswith("multipart"):
- soup = self.get_text_multipart(queueid, msg)
-
- if soup is None:
- self.logger.error(
- "{}: unable to extract text part of email".format(queueid))
- text = "ERROR: unable to extract text from email body"
- soup = BeautifulSoup(text, "lxml", "UTF-8")
-
- return soup
-
def notify(self, queueid, quarantine_id, mailfrom, recipients, headers, fp,
subgroups=None, named_subgroups=None, synchronous=False):
"Notify recipients via email."
@@ -330,12 +308,19 @@ class EMailNotification(BaseNotification):
named_subgroups,
synchronous)
- # extract html text from email
- self.logger.debug(
- "{}: extraction email text from original email".format(queueid))
- soup = self.get_html_text_part(
+ # extract body from email
+ content = self.get_decoded_email_body(
queueid, email.message_from_binary_file(fp))
+ # create BeautifulSoup object
+ self.logger.debug(
+ "{}: trying to create BeatufilSoup object with parser lib {}, "
+ "text length is {} bytes".format(
+ queueid, self.parser_lib, len(content)))
+ soup = BeautifulSoup(content, self.parser_lib)
+ self.logger.debug(
+ "{}: sucessfully created BeautifulSoup object".format(queueid))
+
# replace picture sources
image_replaced = False
if self.strip_images:
diff --git a/pyquarantine/version.py b/pyquarantine/version.py
index 81f0fde..b1a19e3 100644
--- a/pyquarantine/version.py
+++ b/pyquarantine/version.py
@@ -1 +1 @@
-__version__ = "0.0.4"
+__version__ = "0.0.5"