Mailroom - handling international emails
As of mailroom 0.6.0 we can safely process multilingual emails (from multiple operating systems)

Problem
Living in Switzerland we handle emails in 4 languages and several encoding methods (Windows, Mac ans Linux) Mail clients.
Solution
To handle reading imap emails coming in from Multiple languages (especially from Outlook & Office 365), we need to handle multiple encoding systems (for non-ASCII characters like, ß, ç, ü, œ, etc. common in non-English languages). This can be handled by passing parsing options in the config:
parser_opts: [charset_handler: &handle_charset/2]
Of course you need to character set translation code. An example is shown below:
defmodule MyProject.Boundary.ImapClient do
use Mailroom.Inbox
require Logger
alias Mailroom.Inbox.MessageContext
def config(_opts) do
# add this line to handle character parsing
[parser_opts: [charset_handler: &handle_charset/2]]
|> Keyword.merge(Application.get_env(:my_project, :mailroom, []))
end
match do
fetch_mail
process(__MODULE__, :process_email)
end
@doc
"""
Common Imap Return Patterns
- **`:seen`** - Email successfully read and processed
- **`:flagged`** - Email processed but with a problem (i.e. not authorized)
- **`:deleted`** - Email that should be removed
- **`nil`** or **`:unseen`** - Email unchanged (will read again)
- **`:answered`** - Email has been "responded" to
"""
def process_email(%MessageContext{message: message}) do
with {:ok, email_address} <- get_from_address(message),
:ok <- verify_authorized(email_address),
attachments <- Mail.get_attachments(message) do
do_your_stuff(email_address, attachments)
else
{:error, reason} -> Logger.error(reason)
end
end
# Outlook Client and Office365 using "iso-8859-1" for non-ASCII character by default
defp handle_charset("iso-8859-1", string),
do: :unicode.characters_to_binary(string, :latin1, :utf8)
defp handle_charset(charset_name, string) when charset_name in ["utf-8", "UTF-8"] do
case :unicode.characters_to_binary(string, :utf8) do
binary when is_binary(binary) ->
binary
{:error, _binary, _rest_data} ->
Logger.warning("ImapClient: Invalid UTF-8 string detected, replacing invalid characters")
replace_invalid(string, <<0xFFFD::utf8>>)
{:incomplete, _binary, _incomplete_tail} ->
Logger.warning("ImapClient: Incomplete UTF-8 string detected, replacing invalid tail")
replace_invalid(string, <<0xFFFD::utf8>>)
end
end
# Handle unexpected charsets and ensure no invalid characters
defp handle_charset(charset_name, string) do
Logger.warning(
"ImapClient: Unexpected charset: #{charset_name}, attempting to fix invalid characters"
)
# ensure that unexpected charsets have no invalid characters
# if so replace with a chosen valid character `�`(<<0xFFFD::utf8>>), "?", "_", etc.
replace_invalid(string, <<0xFFFD::utf8>>)
end
# safely replace a string with possible invalid characters with a placeholder
# code from: https://github.com/andrewtimberlake (author of mailroom)
# https://github.com/andrewtimberlake/mailroom/pull/24
# this approach deals with large binaries efficiently
defp replace_invalid(binary, replacement) do
replace_invalid(binary, binary, 0, 0, [], replacement)
end
defp replace_invalid(<<>>, original, offset, len, acc, _replacement) do
acc = [acc, binary_part(original, offset, len)]
IO.iodata_to_binary(acc)
end
defp replace_invalid(<<char::utf8, rest::binary>>, original, offset, len, acc, replacement) do
char_len = byte_size(<<char::utf8>>)
replace_invalid(rest, original, offset, len + char_len, acc, replacement)
end
defp replace_invalid(<<_, rest::binary>>, original, offset, len, acc, replacement) do
acc = [acc, binary_part(original, offset, len), replacement]
replace_invalid(rest, original, offset + len + 1, 0, acc, replacement)
end
defp get_from_address(message) do
case Mail.Message.get_header(message, "from") do
{_name, email_address} when is_binary(email_address) ->
{:ok, clean_email_address(email_address)}
email_address when is_binary(email_address) ->
{:ok, clean_email_address(email_address)}
[email_address] when is_binary(email_address) ->
{:ok, clean_email_address(email_address)}
# rare but possible (according to RFC 5322) - to have multiple addresses in from header
[email_address | rest] when is_binary(email_address) and length(rest) > 0 ->
Logger.warning(
"Mail.Imap: Multiple addresses in 'from' header, using first: #{inspect([email_address | rest])}"
)
{:ok, clean_email_address(email_address)}
unexpected ->
message =
"Mail.Imap: cannot decode from_email #{inspect(unexpected)}, from email message: #{inspect(message)}"
Logger.error(message)
{:error, message}
end
rescue
ArgumentError ->
message =
"Mail.Imap: cannot decode from_email from email message: invalid format - #{inspect(message)}"
Logger.error(message)
{:error, message}
end
# Remove angle brackets from email addresses
# (e.g., "<email@example.com>" -> "email@example.com")
defp clean_email_address(email) when is_binary(email) do
email
|> String.trim()
|> String.trim_leading("<")
|> String.trim_trailing(">")
end
end
and a sample auth-config:
# config/runtime.exs
config :my_project, :mailroom,
ssl: true,
ssl_opts: [verify: :verify_none],
server: System.fetch_env!("IMAP_SERVER"),
username: System.fetch_env!("IMAP_USERNAME"),
password: System.fetch_env!("IMAP_PASSWORD"),
folder: :inbox,
debug: false