From 7020c53b28deecd2546386bf8b18efb97f4d67da Mon Sep 17 00:00:00 2001 From: Thomas Oettli Date: Sat, 26 Oct 2019 13:04:19 +0200 Subject: [PATCH] Add option notification_email_parser_lib --- README.md | 2 ++ docs/pyquarantine.conf.example | 9 ++++++++- pyquarantine/notifications.py | 16 +++++++++++++--- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c056ee4..016a019 100644 --- a/README.md +++ b/README.md @@ -114,6 +114,8 @@ The following configuration options are optional in each quarantine section: Enable to strip images from e-mails. This option superseeds notification_email_replacement_img. * **notification_email_replacement_img** Path to an image to replace images in e-mails. It is hold in memory during runtime. + * **notification_email_parser_lib** + HTML parser library used to parse text part of emails. ### Actions diff --git a/docs/pyquarantine.conf.example b/docs/pyquarantine.conf.example index 885927b..679fac7 100644 --- a/docs/pyquarantine.conf.example +++ b/docs/pyquarantine.conf.example @@ -119,7 +119,7 @@ notification_email_template = templates/notification.template # Option: notification_email_strip_images # Notes: Optionally enable this option to strip img tags from emails. -# Values: [ TRUE|ON|YES|FALSE|OFF|NO ] +# Values: [ TRUE | ON | YES | FALSE | OFF | NO ] # notification_email_strip_images = False @@ -138,6 +138,13 @@ notification_email_replacement_img = templates/removed.png # notification_email_embedded_imgs = templates/logo.png +# Option: notification_email_parser_lib +# Notes: Optionally set the parser library used to parse +# the text part of emails. +# Values: [ lxml | html.parser ] +# +notification_email_parser_lib = lxml + # Option: whitelist_type # Notes: Set the whitelist type. # Values: [ db | none ] diff --git a/pyquarantine/notifications.py b/pyquarantine/notifications.py index 8eed921..cbe1eb4 100644 --- a/pyquarantine/notifications.py +++ b/pyquarantine/notifications.py @@ -132,7 +132,8 @@ class EMailNotification(BaseNotification): # check if optional config options are present in config defaults = { "notification_email_replacement_img": "", - "notification_email_strip_images": "false" + "notification_email_strip_images": "false", + "notification_email_parser_lib": "lxml" } for option in defaults.keys(): if option not in config.keys() and \ @@ -181,6 +182,10 @@ class EMailNotification(BaseNotification): else: raise RuntimeError("error parsing notification_email_strip_images: unknown value") + self.parser_lib = self.config["notification_email_parser_lib"].strip() + if self.parser_lib not in ["lxml", "html.parser"]: + raise RuntimeError("error parsing notification_email_parser_lib: unknown value") + # read email replacement image if specified replacement_img = self.config["notification_email_replacement_img"].strip() if not strip_images and replacement_img: @@ -229,8 +234,13 @@ class EMailNotification(BaseNotification): "{}: content mimetype is {}".format( queueid, mimetype)) self.logger.debug( - "{}: creating BeatufilSoup object".format(queueid)) - return BeautifulSoup(text, "lxml") + "{}: trying to create BeatufilSoup object with parser lib {}, " + "text length is {} bytes".format( + queueid, self.parser_lib, len(text))) + soup = BeautifulSoup(text, self.parser_lib) + self.logger.debug( + "{}: sucessfully created BeautifulSoup object".format(queueid)) + return soup def get_text_multipart(self, queueid, msg, preferred=_html_text): "Get the mail text of a multipart email in html form."