3 namespace Drupal\Core\Mail;
5 use Drupal\Component\Utility\Html;
6 use Drupal\Component\Utility\Unicode;
7 use Drupal\Component\Utility\Xss;
8 use Drupal\Core\Site\Settings;
11 * Defines a class containing utility methods for formatting mail messages.
13 class MailFormatHelper {
16 * Internal array of urls replaced with tokens.
20 protected static $urls = [];
23 * Quoted regex expression based on base path.
27 protected static $regexp;
30 * Array of tags supported.
34 protected static $supportedTags = [];
37 * Performs format=flowed soft wrapping for mail (RFC 3676).
39 * We use delsp=yes wrapping, but only break non-spaced languages when
40 * absolutely necessary to avoid compatibility issues.
42 * We deliberately use LF rather than CRLF, see MailManagerInterface::mail().
45 * The plain text to process.
46 * @param string $indent
47 * (optional) A string to indent the text with. Only '>' characters are
48 * repeated on subsequent wrapped lines. Others are replaced by spaces.
51 * The content of the email as a string with formatting applied.
53 public static function wrapMail($text, $indent = '') {
54 // Convert CRLF into LF.
55 $text = str_replace("\r", '', $text);
56 // See if soft-wrapping is allowed.
57 $clean_indent = static::htmlToTextClean($indent);
58 $soft = strpos($clean_indent, ' ') === FALSE;
59 // Check if the string has line breaks.
60 if (strpos($text, "\n") !== FALSE) {
61 // Remove trailing spaces to make existing breaks hard, but leave
62 // signature marker untouched (RFC 3676, Section 4.3).
63 $text = preg_replace('/(?(?<!^--) +\n| +\n)/m', "\n", $text);
64 // Wrap each line at the needed width.
65 $lines = explode("\n", $text);
66 array_walk($lines, '\Drupal\Core\Mail\MailFormatHelper::wrapMailLine', ['soft' => $soft, 'length' => strlen($indent)]);
67 $text = implode("\n", $lines);
71 static::wrapMailLine($text, 0, ['soft' => $soft, 'length' => strlen($indent)]);
73 // Empty lines with nothing but spaces.
74 $text = preg_replace('/^ +\n/m', "\n", $text);
75 // Space-stuff special lines.
76 $text = preg_replace('/^(>| |From)/m', ' $1', $text);
77 // Apply indentation. We only include non-'>' indentation on the first line.
78 $text = $indent . substr(preg_replace('/^/m', $clean_indent, $text), strlen($indent));
84 * Transforms an HTML string into plain text, preserving its structure.
86 * The output will be suitable for use as 'format=flowed; delsp=yes' text
87 * (RFC 3676) and can be passed directly to MailManagerInterface::mail() for sending.
89 * We deliberately use LF rather than CRLF, see MailManagerInterface::mail().
91 * This function provides suitable alternatives for the following tags:
92 * <a> <em> <i> <strong> <b> <br> <p> <blockquote> <ul> <ol> <li> <dl> <dt>
93 * <dd> <h1> <h2> <h3> <h4> <h5> <h6> <hr>
95 * @param string $string
96 * The string to be transformed.
97 * @param array $allowed_tags
98 * (optional) If supplied, a list of tags that will be transformed. If
99 * omitted, all supported tags are transformed.
102 * The transformed string.
104 public static function htmlToText($string, $allowed_tags = NULL) {
105 // Cache list of supported tags.
106 if (empty(static::$supportedTags)) {
107 static::$supportedTags = ['a', 'em', 'i', 'strong', 'b', 'br', 'p',
108 'blockquote', 'ul', 'ol', 'li', 'dl', 'dt', 'dd', 'h1', 'h2', 'h3',
109 'h4', 'h5', 'h6', 'hr',
113 // Make sure only supported tags are kept.
114 $allowed_tags = isset($allowed_tags) ? array_intersect(static::$supportedTags, $allowed_tags) : static::$supportedTags;
116 // Make sure tags, entities and attributes are well-formed and properly
118 $string = Html::normalize(Xss::filter($string, $allowed_tags));
120 // Apply inline styles.
121 $string = preg_replace('!</?(em|i)((?> +)[^>]*)?>!i', '/', $string);
122 $string = preg_replace('!</?(strong|b)((?> +)[^>]*)?>!i', '*', $string);
124 // Replace inline <a> tags with the text of link and a footnote.
125 // 'See <a href="https://www.drupal.org">the Drupal site</a>' becomes
126 // 'See the Drupal site [1]' with the URL included as a footnote.
127 static::htmlToMailUrls(NULL, TRUE);
128 $pattern = '@(<a[^>]+?href="([^"]*)"[^>]*?>(.+?)</a>)@i';
129 $string = preg_replace_callback($pattern, 'static::htmlToMailUrls', $string);
130 $urls = static::htmlToMailUrls();
134 for ($i = 0, $max = count($urls); $i < $max; $i++) {
135 $footnotes .= '[' . ($i + 1) . '] ' . $urls[$i] . "\n";
139 // Split tags from text.
140 $split = preg_split('/<([^>]+?)>/', $string, -1, PREG_SPLIT_DELIM_CAPTURE);
141 // Note: PHP ensures the array consists of alternating delimiters and
142 // literals and begins and ends with a literal (inserting $null as
144 // Odd/even counter (tag or no tag).
146 // Case conversion function.
149 // All current indentation string chunks.
151 // Array of counters for opened lists.
153 foreach ($split as $value) {
154 // Holds a string ready to be formatted and output.
157 // Process HTML tags (but don't output any literally).
159 list($tagname) = explode(' ', strtolower($value), 2);
163 array_unshift($lists, '*');
167 array_unshift($lists, 1);
173 // Ensure blank new-line.
177 // Quotation/list markers, non-fancy headers.
179 // Format=flowed indentation cannot be mixed with lists.
180 $indent[] = count($lists) ? ' "' : '>';
184 $indent[] = isset($lists[0]) && is_numeric($lists[0]) ? ' ' . $lists[0]++ . ') ' : ' * ';
201 // Append closing quote for inline quotes (immediately).
202 $output = rtrim($output, "> \n") . "\"\n";
203 // Ensure blank new-line.
206 // Intentional fall-through to the processing for '/li' and '/dd'.
215 // Intentional fall-through to the processing for '/h5' and '/h6'.
218 // Ensure blank new-line.
224 $indent[] = '======== ';
225 $casing = '\Drupal\Component\Utility\Unicode::strtoupper';
229 $indent[] = '-------- ';
230 $casing = '\Drupal\Component\Utility\Unicode::strtoupper';
236 // Pad the line with dashes.
237 $output = static::htmlToTextPad($output, ($tagname == '/h1') ? '=' : '-', ' ');
239 // Ensure blank new-line.
243 // Horizontal rulers.
245 // Insert immediately.
246 $output .= static::wrapMail('', implode('', $indent)) . "\n";
247 $output = static::htmlToTextPad($output, '-');
250 // Paragraphs and definition lists.
253 // Ensure blank new-line.
258 // Process blocks of text.
260 // Convert inline HTML text to plain text; not removing line-breaks or
261 // white-space, since that breaks newlines when sanitizing plain-text.
262 $value = trim(Html::decodeEntities($value));
263 if (Unicode::strlen($value)) {
268 // See if there is something waiting to be output.
270 // Apply any necessary case conversion.
271 if (isset($casing)) {
272 $chunk = call_user_func($casing, $chunk);
274 $line_endings = Settings::get('mail_line_endings', PHP_EOL);
275 // Format it and apply the current indentation.
276 $output .= static::wrapMail($chunk, implode('', $indent)) . $line_endings;
277 // Remove non-quotation markers from indentation.
278 $indent = array_map('\Drupal\Core\Mail\MailFormatHelper::htmlToTextClean', $indent);
284 return $output . $footnotes;
288 * Wraps words on a single line.
290 * Callback for array_walk() within
291 * \Drupal\Core\Mail\MailFormatHelper::wrapMail().
293 * Note that we are skipping MIME content header lines, because attached
294 * files, especially applications, could have long MIME types or long
295 * filenames which result in line length longer than the 77 characters limit
296 * and wrapping that line will break the email format. For instance, the
297 * attached file hello_drupal.docx will produce the following Content-Type:
300 * application/vnd.openxmlformats-officedocument.wordprocessingml.document;
301 * name="hello_drupal.docx"
304 protected static function wrapMailLine(&$line, $key, $values) {
305 $line_is_mime_header = FALSE;
308 'Content-Transfer-Encoding',
309 'Content-Disposition',
310 'Content-Description',
313 // Do not break MIME headers which could be longer than 77 characters.
314 foreach ($mime_headers as $header) {
315 if (strpos($line, $header . ': ') === 0) {
316 $line_is_mime_header = TRUE;
320 if (!$line_is_mime_header) {
321 // Use soft-breaks only for purely quoted or unindented text.
322 $line = wordwrap($line, 77 - $values['length'], $values['soft'] ? " \n" : "\n");
324 // Break really long words at the maximum width allowed.
325 $line = wordwrap($line, 996 - $values['length'], $values['soft'] ? " \n" : "\n", TRUE);
329 * Keeps track of URLs and replaces them with placeholder tokens.
331 * Callback for preg_replace_callback() within
332 * \Drupal\Core\Mail\MailFormatHelper::htmlToText().
334 protected static function htmlToMailUrls($match = NULL, $reset = FALSE) {
335 // @todo Use request context instead.
336 global $base_url, $base_path;
339 // Reset internal URL list.
343 if (empty(static::$regexp)) {
344 static::$regexp = '@^' . preg_quote($base_path, '@') . '@';
347 list(, , $url, $label) = $match;
348 // Ensure all URLs are absolute.
349 static::$urls[] = strpos($url, '://') ? $url : preg_replace(static::$regexp, $base_url . '/', $url);
350 return $label . ' [' . count(static::$urls) . ']';
353 return static::$urls;
357 * Replaces non-quotation markers from a piece of indentation with spaces.
359 * Callback for array_map() within
360 * \Drupal\Core\Mail\MailFormatHelper::htmlToText().
362 protected static function htmlToTextClean($indent) {
363 return preg_replace('/[^>]/', ' ', $indent);
367 * Pads the last line with the given character.
369 * @param string $text
372 * The character to pad the end of the string with.
373 * @param string $prefix
374 * (optional) Prefix to add to the string.
379 * @see \Drupal\Core\Mail\MailFormatHelper::htmlToText()
381 protected static function htmlToTextPad($text, $pad, $prefix = '') {
382 // Remove last line break.
383 $text = substr($text, 0, -1);
384 // Calculate needed padding space and add it.
385 if (($p = strrpos($text, "\n")) === FALSE) {
388 $n = max(0, 79 - (strlen($text) - $p) - strlen($prefix));
389 // Add prefix and padding, and restore linebreak.
390 return $text . $prefix . str_repeat($pad, $n) . "\n";