1 /++ 2 Create MIME emails with things like HTML, attachments, and send with convenience wrappers around std.net.curl's SMTP function, or read email from an mbox file. 3 4 For preparing and sending outgoing email, see [EmailMessage]. For processing incoming email or opening .eml files, mbox files, etc., see [IncomingEmailMessage]. 5 6 History: 7 Originally released as open source on August 11, 2012. The last-modified date of its predecessor file was January 2011. 8 9 Many of the public string members were overhauled on May 13, 2024. Compatibility methods are provided so your code will hopefully still work, but this also results in some stricter adherence to email encoding rules, so you should retest if you update after then. 10 11 Future_Directions: 12 I might merge `IncomingEmailMessage` and `EmailMessage` some day, it seems silly to have them completely separate like this. 13 +/ 14 module arsd.email; 15 16 import std.net.curl; 17 18 import std.base64; 19 import std.string; 20 import std.range; 21 import std.utf; 22 import std.array; 23 import std.algorithm.iteration; 24 25 import arsd.characterencodings; 26 27 public import arsd.core : FilePath; 28 29 // import std.uuid; 30 // smtpMessageBoundary = randomUUID().toString(); 31 32 // SEE ALSO: std.net.curl.SMTP 33 34 /++ 35 Credentials for a SMTP relay, as passed to [std.net.curl.SMTP]. 36 +/ 37 struct RelayInfo { 38 /++ 39 Should be as a url, such as `smtp://example.com` or `smtps://example.com`. You normally want smtp:// - even if you want TLS encryption, smtp uses STARTTLS so it gets that. smtps will only work if the server supports tls from the start, which is not always the case. 40 +/ 41 string server; 42 string username; /// 43 string password; /// 44 } 45 46 /++ 47 Representation of an email attachment. 48 +/ 49 struct MimeAttachment { 50 string type; /// e.g. `text/plain` 51 string filename; /// 52 const(ubyte)[] content; /// 53 string id; /// 54 } 55 56 /// 57 enum ToType { 58 to, 59 cc, 60 bcc 61 } 62 63 /++ 64 Structured representation of email users, including the name and email address as separate components. 65 66 `EmailRecipient` represents a single user, and `RecipientList` represents multiple users. A "recipient" may also be a from or reply to address. 67 68 69 `RecipientList` is a wrapper over `EmailRecipient[]` that provides overloads that take string arguments, for compatibility for users of previous versions of the `arsd.email` api. It should generally work as you expect if you just pretend it is a normal array though (and if it doesn't, you can get the internal array via the `recipients` member.) 70 71 History: 72 Added May 13, 2024 (dub v12.0) to replace the old plain, public strings and arrays of strings. 73 +/ 74 struct EmailRecipient { 75 /++ 76 The email user's name. It should not have quotes or any other encoding. 77 78 For example, `Adam D. Ruppe`. 79 +/ 80 string name; 81 /++ 82 The email address. It should not have brackets or any other encoding. 83 84 For example, `destructionator@gmail.com`. 85 +/ 86 string address; 87 88 /++ 89 Returns a string representing this email address, in a format suitable for inclusion in a message about to be saved or transmitted. 90 91 In many cases, this is easy to read for people too, but not in all cases. 92 +/ 93 string toProtocolString(string linesep = "\r\n") { 94 if(name.length) 95 return "\"" ~ encodeEmailHeaderContentForTransmit(name, linesep) ~ "\" <" ~ address ~ ">"; 96 return address; 97 } 98 99 /++ 100 Returns a string representing this email address, in a format suitable for being read by people. This is not necessarily reversible. 101 +/ 102 string toReadableString() { 103 if(name.length) 104 return "\"" ~ name ~ "\" <" ~ address ~ ">"; 105 return address; 106 } 107 108 /++ 109 Construct an `EmailRecipient` either from a name and address (preferred!) or from an encoded string as found in an email header. 110 111 Examples: 112 113 `EmailRecipient("Adam D. Ruppe", "destructionator@gmail.com")` or `EmailRecipient(`"Adam D. Ruppe" <destructionator@gmail.com>`); 114 +/ 115 this(string name, string address) { 116 this.name = name; 117 this.address = address; 118 } 119 120 /// ditto 121 this(string str) { 122 this = str; 123 } 124 125 /++ 126 Provided for compatibility for users of old versions of `arsd.email` - does implicit conversion from `EmailRecipient` to a plain string (in protocol format), as was present in previous versions of the api. 127 +/ 128 alias toProtocolString this; 129 130 /// ditto 131 void opAssign(string str) { 132 auto idx = str.indexOf("<"); 133 if(idx == -1) { 134 name = null; 135 address = str; 136 } else { 137 name = decodeEncodedWord(unquote(str[0 .. idx].strip)); 138 address = str[idx + 1 .. $ - 1]; 139 } 140 141 } 142 } 143 144 /// ditto 145 struct RecipientList { 146 EmailRecipient[] recipients; 147 148 void opAssign(string[] strings) { 149 recipients = null; 150 foreach(s; strings) 151 recipients ~= EmailRecipient(s); 152 } 153 void opAssign(EmailRecipient[] recpts) { 154 this.recipients = recpts; 155 } 156 157 void opOpAssign(string op : "~")(EmailRecipient r) { 158 recipients ~= r; 159 } 160 void opOpAssign(string op : "~")(string s) { 161 recipients ~= EmailRecipient(s); 162 } 163 int opApply(int delegate(size_t idx, EmailRecipient rcp) dg) { 164 foreach(idx, item; recipients) 165 if(auto result = dg(idx, item)) 166 return result; 167 return 0; 168 } 169 int opApply(int delegate(EmailRecipient rcp) dg) { 170 foreach(item; recipients) 171 if(auto result = dg(item)) 172 return result; 173 return 0; 174 } 175 176 size_t length() { 177 return recipients.length; 178 } 179 180 string toProtocolString(string linesep = "\r\n") { 181 string ret; 182 foreach(idx, item; recipients) { 183 if(idx) 184 ret ~= ", "; 185 ret ~= item.toProtocolString(linesep); 186 } 187 return ret; 188 } 189 190 EmailRecipient front() { return recipients[0]; } 191 void popFront() { recipients = recipients[1 .. $]; } 192 bool empty() { return recipients.length == 0; } 193 RecipientList save() { return this; } 194 } 195 196 private string unquote(string s) { 197 if(s.length == 0) 198 return s; 199 if(s[0] != '"') 200 return s; 201 s = s[1 .. $-1]; // strip the quotes 202 // FIXME: possible to have \" escapes in there too 203 return s; 204 } 205 206 private struct CaseInsensitiveString { 207 string actual; 208 209 size_t toHash() const { 210 string l = actual.toLower; 211 return typeid(string).getHash(&l); 212 } 213 bool opEquals(ref const typeof(this) s) const { 214 return icmp(s.actual, this.actual) == 0; 215 } 216 bool opEquals(string s) const { 217 return icmp(s, this.actual) == 0; 218 } 219 220 alias actual this; 221 } 222 223 /++ 224 A type that acts similarly to a `string[string]` to hold email headers in a case-insensitive way. 225 +/ 226 struct HeadersHash { 227 string[CaseInsensitiveString] hash; 228 229 string opIndex(string key) const { 230 return hash[CaseInsensitiveString(key)]; 231 } 232 string opIndexAssign(string value, string key) { 233 return hash[CaseInsensitiveString(key)] = value; 234 } 235 inout(string)* opBinaryRight(string op : "in")(string key) inout { 236 return CaseInsensitiveString(key) in hash; 237 } 238 alias hash this; 239 } 240 241 unittest { 242 HeadersHash h; 243 h["From"] = "test"; 244 h["from"] = "other"; 245 foreach(k, v; h) { 246 assert(k == "From"); 247 assert(v == "other"); 248 } 249 250 assert("from" in h); 251 assert("From" in h); 252 assert(h["from"] == "other"); 253 254 const(HeadersHash) ch = HeadersHash([CaseInsensitiveString("From") : "test"]); 255 assert(ch["from"] == "test"); 256 assert("From" in ch); 257 } 258 259 /++ 260 For OUTGOING email 261 262 263 To use: 264 265 --- 266 auto message = new EmailMessage(); 267 message.to ~= "someuser@example.com"; 268 message.from = "youremail@example.com"; 269 message.subject = "My Subject"; 270 message.setTextBody("hi there"); 271 //message.toString(); // get string to send externally 272 message.send(); // send via some relay 273 // may also set replyTo, etc 274 --- 275 276 History: 277 This class got an API overhaul on May 13, 2024. Some undocumented members were removed, and some public members got changed (albeit in a mostly compatible way). 278 +/ 279 class EmailMessage { 280 /++ 281 Adds a custom header to the message. The header name should not include a colon and must not duplicate a header set elsewhere in the class; for example, do not use this to set `To`, and instead use the [to] field. 282 283 Setting the same header multiple times will overwrite the old value. It will not set duplicate headers and does not retain the specific order of which you added headers. 284 285 History: 286 Prior to May 13, 2024, this assumed the value was previously encoded. This worked most the time but also left open the possibility of incorrectly encoded values, including the possibility of injecting inappropriate headers. 287 288 Since May 13, 2024, it now encodes the header content internally. You should NOT pass pre-encoded values to this function anymore. 289 290 It also would previously allow you to set repeated headers like `Subject` or `To`. These now throw exceptions. 291 292 It previously also allowed duplicate headers. Adding the same thing twice will now silently overwrite the old value instead. 293 +/ 294 void setHeader(string name, string value, string file = __FILE__, size_t line = __LINE__) { 295 import arsd.core; 296 if(name.length == 0) 297 throw new InvalidArgumentsException("name", "name cannot be an empty string", LimitedVariant(name), "setHeader", file, line); 298 if(name.indexOf(":") != -1) 299 throw new InvalidArgumentsException("name", "do not put a colon in the header name", LimitedVariant(name), "setHeader", file, line); 300 if(!headerSettableThroughAA(name)) 301 throw new InvalidArgumentsException("name", "use named methods/properties for this header instead of setHeader", LimitedVariant(name), "setHeader", file, line); 302 303 headers_[name] = value; 304 } 305 306 protected bool headerSettableThroughAA(string name) { 307 switch(name.toLower) { 308 case "to", "cc", "bcc": 309 case "from", "reply-to", "in-reply-to": 310 case "subject": 311 case "content-type", "content-transfer-encoding", "mime-version": 312 case "received", "return-path": // set by the MTA 313 return false; 314 default: 315 return true; 316 } 317 } 318 319 /++ 320 Recipients of the message. You can use operator `~=` to add people to this list, or you can also use [addRecipient] to achieve the same result. 321 322 --- 323 message.to ~= EmailRecipient("Adam D. Ruppe", "destructionator@gmail.com"); 324 message.cc ~= EmailRecipient("John Doe", "john.doe@example.com"); 325 // or, same result as the above two lines: 326 message.addRecipient("Adam D. Ruppe", "destructionator@gmail.com"); 327 message.addRecipient("John Doe", "john.doe@example.com", ToType.cc); 328 329 // or, the old style code that still works, but is not recommended, since 330 // it is harder to encode properly for anything except pure ascii names: 331 message.to ~= `"Adam D. Ruppe" <destructionator@gmail.com>` 332 --- 333 334 History: 335 On May 13, 2024, the types of these changed. Before, they were `public string[]`; plain string arrays. This put the burden of proper encoding on the user, increasing the probability of bugs. Now, they are [RecipientList]s - internally, an array of `EmailRecipient` objects, but with a wrapper to provide compatibility with the old string-based api. 336 +/ 337 RecipientList to; 338 /// ditto 339 RecipientList cc; 340 /// ditto 341 RecipientList bcc; 342 343 /++ 344 Represents the `From:` and `Reply-To:` header values in the email. 345 346 347 Note that the `from` member is the "From:" header, which is not necessarily the same as the "envelope from". The "envelope from" is set by the email server usually based on your login credentials. The email server may or may not require these to match. 348 349 History: 350 On May 13, 2024, the types of these changed from plain `string` to [EmailRecipient], to try to get the encoding easier to use correctly. `EmailRecipient` offers overloads for string parameters for compatibility, so your code should not need changing, however if you use non-ascii characters in your names, you should retest to ensure it still works correctly. 351 +/ 352 EmailRecipient from; 353 /// ditto 354 EmailRecipient replyTo; 355 /// The `Subject:` header value in the email. 356 string subject; 357 /// The `In-Reply-to:` header value. This should be set to the same value as the `Message-ID` header from the message you're replying to. 358 string inReplyTo; 359 360 private string textBody_; 361 private string htmlBody_; 362 363 private HeadersHash headers_; 364 365 /++ 366 Gets and sets the current text body. 367 368 History: 369 Prior to May 13, 2024, this was a simple `public string` member, but still had a [setTextBody] method too. It now is a public property that works through that method. 370 +/ 371 string textBody() { 372 return textBody_; 373 } 374 /// ditto 375 void textBody(string text) { 376 setTextBody(text); 377 } 378 /++ 379 Gets the current html body, if any. 380 381 There is no setter for this property, use [setHtmlBody] instead. 382 383 History: 384 Prior to May 13, 2024, this was a simple `public string` member. This let you easily get the `EmailMessage` object into an inconsistent state. 385 +/ 386 string htmlBody() { 387 return htmlBody_; 388 } 389 390 /++ 391 If you use the send method with an SMTP server, you don't want to change this. 392 While RFC 2045 mandates CRLF as a lineseperator, there are some edge-cases where this won't work. 393 When passing the E-Mail string to a unix program which handles communication with the SMTP server, some (i.e. qmail) 394 expect the system lineseperator (LF) instead. 395 Notably, the google mail REST API will choke on CRLF lineseps and produce strange emails (as of 2024). 396 397 Do not change this after calling other methods, since it might break presaved values. 398 +/ 399 string linesep = "\r\n"; 400 401 /++ 402 History: 403 Added May 13, 2024 404 +/ 405 this(string linesep = "\r\n") { 406 this.linesep = linesep; 407 } 408 409 private bool isMime = false; 410 private bool isHtml = false; 411 412 /// 413 void addRecipient(string name, string email, ToType how = ToType.to) { 414 addRecipient(`"`~name~`" <`~email~`>`, how); 415 } 416 417 /// 418 void addRecipient(string who, ToType how = ToType.to) { 419 final switch(how) { 420 case ToType.to: 421 to ~= who; 422 break; 423 case ToType.cc: 424 cc ~= who; 425 break; 426 case ToType.bcc: 427 bcc ~= who; 428 break; 429 } 430 } 431 432 /++ 433 Sets the plain text body of the email. You can also separately call [setHtmlBody] to set a HTML body. 434 +/ 435 void setTextBody(string text) { 436 textBody_ = text.strip; 437 } 438 /++ 439 Sets the HTML body to the mail, which can support rich text, inline images (see [addInlineImage]), etc. 440 441 Automatically sets a text fallback if you haven't already, unless you pass `false` as the `addFallback` template value. Adding the fallback requires [arsd.htmltotext]. 442 443 History: 444 The `addFallback` parameter was added on May 13, 2024. 445 +/ 446 void setHtmlBody(bool addFallback = true)(string html) { 447 isMime = true; 448 isHtml = true; 449 htmlBody_ = html; 450 451 static if(addFallback) { 452 import arsd.htmltotext; 453 if(textBody_ is null) 454 textBody_ = htmlToText(html); 455 } 456 } 457 458 const(MimeAttachment)[] attachments; 459 460 /++ 461 The attachmentFileName is what is shown to the user, not the file on your sending computer. It should NOT have a path in it. 462 If you want a filename from your computer, try [addFileAsAttachment]. 463 464 The `mimeType` can be excluded if the filename has a common extension supported by the library. 465 466 --- 467 message.addAttachment("text/plain", "something.txt", std.file.read("/path/to/local/something.txt")); 468 --- 469 470 History: 471 The overload without `mimeType` was added October 28, 2024. 472 473 The parameter `attachmentFileName` was previously called `filename`. This was changed for clarity and consistency with other overloads on October 28, 2024. 474 +/ 475 void addAttachment(string mimeType, string attachmentFileName, const void[] content, string id = null) { 476 isMime = true; 477 attachments ~= MimeAttachment(mimeType, attachmentFileName, cast(const(ubyte)[]) content, id); 478 } 479 480 481 /// ditto 482 void addAttachment(string attachmentFileName, const void[] content, string id = null) { 483 import arsd.core; 484 addAttachment(FilePath(attachmentFileName).contentTypeFromFileExtension, attachmentFileName, content, id); 485 } 486 487 /++ 488 Reads the local file and attaches it. 489 490 If `attachmentFileName` is null, it uses the filename of `localFileName`, without the directory. 491 492 If `mimeType` is null, it guesses one based on the local file name's file extension. 493 494 If these cannot be determined, it will throw an `InvalidArgumentsException`. 495 496 History: 497 Added October 28, 2024 498 +/ 499 void addFileAsAttachment(FilePath localFileName, string attachmentFileName = null, string mimeType = null, string id = null) { 500 if(mimeType is null) 501 mimeType = localFileName.contentTypeFromFileExtension; 502 if(attachmentFileName is null) 503 attachmentFileName = localFileName.filename; 504 505 import std.file; 506 507 addAttachment(mimeType, attachmentFileName, std.file.read(localFileName.toString()), id); 508 509 // see also: curl.h :1877 CURLOPT(CURLOPT_XOAUTH2_BEARER, CURLOPTTYPE_STRINGPOINT, 220), 510 // also option to force STARTTLS 511 } 512 513 /// in the html, use img src="cid:ID_GIVEN_HERE" 514 void addInlineImage(string id, string mimeType, string filename, const void[] content) { 515 assert(isHtml); 516 isMime = true; 517 inlineImages ~= MimeAttachment(mimeType, filename, cast(const(ubyte)[]) content, id); 518 } 519 520 const(MimeAttachment)[] inlineImages; 521 522 523 /* we should build out the mime thingy 524 related 525 mixed 526 alternate 527 */ 528 529 /// Returns the MIME formatted email string, including encoded attachments 530 override string toString() { 531 assert(!isHtml || (isHtml && isMime)); 532 533 string[] headers; 534 foreach(k, v; this.headers_) { 535 if(headerSettableThroughAA(k)) 536 headers ~= k ~ ": " ~ encodeEmailHeaderContentForTransmit(v, this.linesep); 537 } 538 539 if(to.length) 540 headers ~= "To: " ~ to.toProtocolString(this.linesep); 541 if(cc.length) 542 headers ~= "Cc: " ~ cc.toProtocolString(this.linesep); 543 544 if(from.length) 545 headers ~= "From: " ~ from.toProtocolString(this.linesep); 546 547 //assert(0, headers[$-1]); 548 549 if(subject !is null) 550 headers ~= "Subject: " ~ encodeEmailHeaderContentForTransmit(subject, this.linesep); 551 if(replyTo !is null) 552 headers ~= "Reply-To: " ~ replyTo.toProtocolString(this.linesep); 553 if(inReplyTo !is null) 554 headers ~= "In-Reply-To: " ~ encodeEmailHeaderContentForTransmit(inReplyTo, this.linesep); 555 556 if(isMime) 557 headers ~= "MIME-Version: 1.0"; 558 559 /+ 560 if(inlineImages.length) { 561 headers ~= "Content-Type: multipart/related; boundary=" ~ boundary; 562 // so we put the alternative inside asthe first attachment with as seconary boundary 563 // then we do the images 564 } else 565 if(attachments.length) 566 headers ~= "Content-Type: multipart/mixed; boundary=" ~ boundary; 567 else if(isHtml) 568 headers ~= "Content-Type: multipart/alternative; boundary=" ~ boundary; 569 else 570 headers ~= "Content-Type: text/plain; charset=UTF-8"; 571 +/ 572 573 574 string msgContent; 575 576 if(isMime) { 577 MimeContainer top; 578 579 { 580 MimeContainer mimeMessage; 581 enum NO_TRANSFER_ENCODING = "Content-Transfer-Encoding: 8bit"; 582 if(isHtml) { 583 auto alternative = new MimeContainer("multipart/alternative"); 584 alternative.stuff ~= new MimeContainer("text/plain; charset=UTF-8", textBody_).with_header(NO_TRANSFER_ENCODING); 585 alternative.stuff ~= new MimeContainer("text/html; charset=UTF-8", htmlBody_).with_header(NO_TRANSFER_ENCODING); 586 mimeMessage = alternative; 587 } else { 588 mimeMessage = new MimeContainer("text/plain; charset=UTF-8", textBody_).with_header(NO_TRANSFER_ENCODING); 589 } 590 top = mimeMessage; 591 } 592 593 { 594 MimeContainer mimeRelated; 595 if(inlineImages.length) { 596 mimeRelated = new MimeContainer("multipart/related"); 597 598 mimeRelated.stuff ~= top; 599 top = mimeRelated; 600 601 foreach(attachment; inlineImages) { 602 auto mimeAttachment = new MimeContainer(attachment.type ~ "; name=\""~attachment.filename~"\""); 603 mimeAttachment.headers ~= "Content-Transfer-Encoding: base64"; 604 mimeAttachment.headers ~= "Content-ID: <" ~ attachment.id ~ ">"; 605 mimeAttachment.content = encodeBase64Mime(cast(const(ubyte)[]) attachment.content, this.linesep); 606 607 mimeRelated.stuff ~= mimeAttachment; 608 } 609 } 610 } 611 612 { 613 MimeContainer mimeMixed; 614 if(attachments.length) { 615 mimeMixed = new MimeContainer("multipart/mixed"); 616 617 mimeMixed.stuff ~= top; 618 top = mimeMixed; 619 620 foreach(attachment; attachments) { 621 auto mimeAttachment = new MimeContainer(attachment.type); 622 mimeAttachment.headers ~= "Content-Disposition: attachment; filename=\""~encodeEmailHeaderContentForTransmit(attachment.filename, this.linesep)~"\""; 623 mimeAttachment.headers ~= "Content-Transfer-Encoding: base64"; 624 if(attachment.id.length) 625 mimeAttachment.headers ~= "Content-ID: <" ~ attachment.id ~ ">"; 626 627 mimeAttachment.content = encodeBase64Mime(cast(const(ubyte)[]) attachment.content, this.linesep); 628 629 mimeMixed.stuff ~= mimeAttachment; 630 } 631 } 632 } 633 634 headers ~= top.contentType; 635 msgContent = top.toMimeString(true, this.linesep); 636 } else { 637 headers ~= "Content-Type: text/plain; charset=UTF-8"; 638 msgContent = textBody_; 639 } 640 641 642 string msg; 643 msg.reserve(htmlBody_.length + textBody_.length + 1024); 644 645 foreach(header; headers) 646 msg ~= header ~ this.linesep; 647 if(msg.length) // has headers 648 msg ~= this.linesep; 649 650 msg ~= msgContent; 651 652 return msg; 653 } 654 655 /// Sends via a given SMTP relay 656 void send(RelayInfo mailServer = RelayInfo("smtp://localhost")) { 657 auto smtp = SMTP(mailServer.server); 658 659 smtp.verifyHost = false; 660 smtp.verifyPeer = false; 661 //smtp.verbose = true; 662 663 { 664 // std.net.curl doesn't work well with STARTTLS if you don't 665 // put smtps://... and if you do, it errors if you can't start 666 // with a TLS connection from the beginning. 667 668 // This change allows ssl if it can. 669 import std.net.curl; 670 import etc.c.curl; 671 smtp.handle.set(CurlOption.use_ssl, CurlUseSSL.tryssl); 672 } 673 674 if(mailServer.username.length) 675 smtp.setAuthentication(mailServer.username, mailServer.password); 676 677 const(char)[][] allRecipients; 678 void processPerson(string person) { 679 auto idx = person.indexOf("<"); 680 if(idx == -1) 681 allRecipients ~= person; 682 else { 683 person = person[idx + 1 .. $]; 684 idx = person.indexOf(">"); 685 if(idx != -1) 686 person = person[0 .. idx]; 687 688 allRecipients ~= person; 689 } 690 } 691 foreach(person; to) processPerson(person); 692 foreach(person; cc) processPerson(person); 693 foreach(person; bcc) processPerson(person); 694 695 smtp.mailTo(allRecipients); 696 697 auto mailFrom = from; 698 auto idx = mailFrom.indexOf("<"); 699 if(idx != -1) 700 mailFrom = mailFrom[idx + 1 .. $]; 701 idx = mailFrom.indexOf(">"); 702 if(idx != -1) 703 mailFrom = mailFrom[0 .. idx]; 704 705 smtp.mailFrom = mailFrom; 706 smtp.message = this.toString(); 707 smtp.perform(); 708 } 709 } 710 711 /// 712 void email(string to, string subject, string message, string from, RelayInfo mailServer = RelayInfo("smtp://localhost")) { 713 auto msg = new EmailMessage(); 714 msg.from = from; 715 msg.to = [to]; 716 msg.subject = subject; 717 msg.textBody_ = message; 718 msg.send(mailServer); 719 } 720 721 // private: 722 723 import std.conv; 724 725 /// for reading 726 class MimePart { 727 string[] headers; 728 immutable(ubyte)[] content; 729 immutable(ubyte)[] encodedContent; // usually valid only for GPG, and will be cleared by creator; canonical form 730 string textContent; 731 MimePart[] stuff; 732 733 string name; 734 string charset; 735 string type; 736 string transferEncoding; 737 string disposition; 738 string id; 739 string filename; 740 // gpg signatures 741 string gpgalg; 742 string gpgproto; 743 744 MimeAttachment toMimeAttachment() { 745 if(type == "multipart/mixed" && stuff.length == 1) 746 return stuff[0].toMimeAttachment; 747 748 // just attach the preferred thing. i think this is never triggered since this is most likely handled by the text/html body scanner 749 if(type == "multipart/alternative" && stuff.length >= 1) 750 return stuff[0].toMimeAttachment; 751 752 MimeAttachment att; 753 att.type = type; 754 att.id = id; 755 if(filename.length == 0 && name.length > 0 ) { 756 att.filename = name; 757 } else { 758 att.filename = filename; 759 } 760 761 if(type == "multipart/related" && stuff.length >= 1) { 762 // super hack for embedded html for a special user; it is basically a mhtml attachment so we report the filename of the html part for the whole thing 763 // really, the code should understand the type itself but this is still better than nothing 764 if(att.filename.length == 0) 765 att.filename = stuff[0].filename; 766 if(att.filename.length == 0) 767 att.filename = stuff[0].name; 768 att.filename = att.filename.replace(".html", ".mhtml"); 769 att.content = []; // FIXME: recreate the concat thing and put some headers on it to make a valid .mhtml file out of it 770 } else { 771 att.content = content; 772 } 773 774 return att; 775 } 776 777 this(immutable(ubyte)[][] lines, string contentType = null) { 778 string boundary; 779 780 void parseContentType(string content) { 781 //{ import std.stdio; writeln("c=[", content, "]"); } 782 foreach(k, v; breakUpHeaderParts(content)) { 783 //{ import std.stdio; writeln(" k=[", k, "]; v=[", v, "]"); } 784 switch(k) { 785 case "root": 786 type = v; 787 break; 788 case "name": 789 name = v; 790 break; 791 case "charset": 792 charset = v; 793 break; 794 case "boundary": 795 boundary = v; 796 break; 797 default: 798 case "micalg": 799 gpgalg = v; 800 break; 801 case "protocol": 802 gpgproto = v; 803 break; 804 } 805 } 806 } 807 808 if(contentType is null) { 809 // read headers immediately... 810 auto copyOfLines = lines; 811 immutable(ubyte)[] currentHeader; 812 813 void commitHeader() { 814 if(currentHeader.length == 0) 815 return; 816 string h = decodeEncodedWord(cast(string) currentHeader); 817 headers ~= h; 818 currentHeader = null; 819 820 auto idx = h.indexOf(":"); 821 if(idx != -1) { 822 auto name = h[0 .. idx].strip.toLower; 823 auto content = h[idx + 1 .. $].strip; 824 825 string[4] filenames_found; 826 827 switch(name) { 828 case "content-type": 829 parseContentType(content); 830 break; 831 case "content-transfer-encoding": 832 transferEncoding = content.toLower; 833 break; 834 case "content-disposition": 835 foreach(k, v; breakUpHeaderParts(content)) { 836 switch(k) { 837 case "root": 838 disposition = v; 839 break; 840 case "filename": 841 filename = v; 842 break; 843 // FIXME: https://datatracker.ietf.org/doc/html/rfc2184#section-3 is what it is SUPPOSED to do 844 case "filename*0": 845 filenames_found[0] = v; 846 break; 847 case "filename*1": 848 filenames_found[1] = v; 849 break; 850 case "filename*2": 851 filenames_found[2] = v; 852 break; 853 case "filename*3": 854 filenames_found[3] = v; 855 break; 856 default: 857 } 858 } 859 break; 860 case "content-id": 861 id = content; 862 break; 863 default: 864 } 865 866 if (filenames_found[0] != "") { 867 foreach (string v; filenames_found) { 868 this.filename ~= v; 869 } 870 } 871 } 872 } 873 874 foreach(line; copyOfLines) { 875 lines = lines[1 .. $]; 876 if(line.length == 0) 877 break; 878 879 if(line[0] == ' ' || line[0] == '\t') 880 currentHeader ~= (cast(string) line).stripLeft(); 881 else { 882 if(currentHeader.length) { 883 commitHeader(); 884 } 885 currentHeader = line; 886 } 887 } 888 889 commitHeader(); 890 } else { 891 parseContentType(contentType); 892 } 893 894 // if it is multipart, find the start boundary. we'll break it up and fill in stuff 895 // otherwise, all the data that follows is just content 896 897 if(boundary.length) { 898 immutable(ubyte)[][] partLines; 899 bool inPart; 900 foreach(line; lines) { 901 if(line.startsWith("--" ~ boundary)) { 902 if(inPart) 903 stuff ~= new MimePart(partLines); 904 inPart = true; 905 partLines = null; 906 907 if(line == "--" ~ boundary ~ "--") 908 break; // all done 909 } 910 911 if(inPart) { 912 partLines ~= line; 913 } else { 914 content ~= line ~ '\n'; 915 } 916 } 917 } else { 918 foreach(line; lines) { 919 content ~= line; 920 921 if(transferEncoding != "base64") 922 content ~= '\n'; 923 } 924 } 925 926 // store encoded content for GPG (should be cleared by caller if necessary) 927 encodedContent = content; 928 929 // decode the content.. 930 switch(transferEncoding) { 931 case "base64": 932 content = Base64.decode(cast(string) content); 933 break; 934 case "quoted-printable": 935 content = decodeQuotedPrintable(cast(string) content); 936 break; 937 default: 938 // no change needed (I hope) 939 } 940 941 if(type.indexOf("text/") == 0) { 942 if(charset.length == 0) 943 charset = "latin1"; 944 textContent = convertToUtf8Lossy(content, charset); 945 } 946 } 947 } 948 949 string[string] breakUpHeaderParts(string headerContent) { 950 string[string] ret; 951 952 string currentName = "root"; 953 string currentContent; 954 bool inQuote = false; 955 bool gettingName = false; 956 bool ignoringSpaces = false; 957 foreach(char c; headerContent) { 958 if(ignoringSpaces) { 959 if(c == ' ') 960 continue; 961 else 962 ignoringSpaces = false; 963 } 964 965 if(gettingName) { 966 if(c == '=') { 967 gettingName = false; 968 continue; 969 } 970 currentName ~= c; 971 } 972 973 if(c == '"') { 974 inQuote = !inQuote; 975 continue; 976 } 977 978 if(!inQuote && c == ';') { 979 ret[currentName] = currentContent; 980 ignoringSpaces = true; 981 currentName = null; 982 currentContent = null; 983 984 gettingName = true; 985 continue; 986 } 987 988 if(!gettingName) 989 currentContent ~= c; 990 } 991 992 if(currentName.length) 993 ret[currentName] = currentContent; 994 995 return ret; 996 } 997 998 // for writing 999 class MimeContainer { 1000 private static int sequence; 1001 1002 immutable string _contentType; 1003 immutable string boundary; 1004 1005 string[] headers; // NOT including content-type 1006 string content; 1007 MimeContainer[] stuff; 1008 1009 this(string contentType, string content = null) { 1010 this._contentType = contentType; 1011 this.content = content; 1012 sequence++; 1013 if(_contentType.indexOf("multipart/") == 0) 1014 boundary = "0016e64be86203dd36047610926a" ~ to!string(sequence); 1015 } 1016 1017 @property string contentType() { 1018 string ct = "Content-Type: "~_contentType; 1019 if(boundary.length) 1020 ct ~= "; boundary=" ~ boundary; 1021 return ct; 1022 } 1023 1024 1025 string toMimeString(bool isRoot = false, string linesep="\r\n") { 1026 string ret; 1027 1028 if(!isRoot) { 1029 ret ~= contentType; 1030 foreach(header; headers) { 1031 ret ~= linesep; 1032 ret ~= encodeEmailHeaderForTransmit(header, linesep); 1033 } 1034 ret ~= linesep ~ linesep; 1035 } 1036 1037 ret ~= content; 1038 1039 foreach(idx, thing; stuff) { 1040 assert(boundary.length); 1041 ret ~= linesep ~ "--" ~ boundary ~ linesep; 1042 ret ~= thing.toMimeString(false, linesep); 1043 } 1044 1045 if(boundary.length) 1046 ret ~= linesep ~ "--" ~ boundary ~ "--"; 1047 1048 return ret; 1049 } 1050 } 1051 1052 import std.algorithm : startsWith; 1053 /++ 1054 Represents a single email from an incoming or saved source consisting of the raw data. Such saved sources include mbox files (which are several concatenated together, see [MboxMessages] for a full reader of these files), .eml files, and Maildir entries. 1055 +/ 1056 class IncomingEmailMessage : EmailMessage { 1057 /++ 1058 Various constructors for parsing an email message. 1059 1060 1061 The `ref immutable(ubyte)[][]` one is designed for reading a pre-loaded mbox file. It updates the ref variable to the point at the next message in the file as it processes. You probably should use [MboxMessages] in a `foreach` loop instead of calling this directly most the time. 1062 1063 The `string[]` one takes an ascii or utf-8 file of a single email pre-split into lines. 1064 1065 The `immutable(ubyte)[]` one is designed for reading an individual message in its own file in the easiest way. Try `new IncomingEmailMessage(cast(immutable(ubyte)[]) std.file.read("filename.eml"));` to use this. You can also use `IncomingEmailMessage.fromFile("filename.eml")` as well. 1066 1067 History: 1068 The `immutable(ubyte)[]` overload for a single file was added on May 14, 2024. 1069 +/ 1070 this(ref immutable(ubyte)[][] mboxLines, bool asmbox=true) @trusted { 1071 1072 enum ParseState { 1073 lookingForFrom, 1074 readingHeaders, 1075 readingBody 1076 } 1077 1078 auto state = (asmbox ? ParseState.lookingForFrom : ParseState.readingHeaders); 1079 string contentType; 1080 1081 bool isMultipart; 1082 bool isHtml; 1083 immutable(ubyte)[][] mimeLines; 1084 1085 string charset = "latin-1"; 1086 1087 string contentTransferEncoding; 1088 1089 string headerName; 1090 string headerContent; 1091 void commitHeader() { 1092 if(headerName is null) 1093 return; 1094 1095 auto originalHeaderName = headerName; 1096 headerName = headerName.toLower(); 1097 headerContent = headerContent.strip(); 1098 1099 headerContent = decodeEncodedWord(headerContent); 1100 1101 if(headerName == "content-type") { 1102 contentType = headerContent; 1103 if(contentType.indexOf("multipart/") != -1) 1104 isMultipart = true; 1105 else if(contentType.indexOf("text/html") != -1) 1106 isHtml = true; 1107 1108 auto charsetIdx = contentType.indexOf("charset="); 1109 if(charsetIdx != -1) { 1110 string cs = contentType[charsetIdx + "charset=".length .. $]; 1111 if(cs.length && cs[0] == '\"') 1112 cs = cs[1 .. $]; 1113 1114 auto quoteIdx = cs.indexOf("\""); 1115 if(quoteIdx != -1) 1116 cs = cs[0 .. quoteIdx]; 1117 auto semicolonIdx = cs.indexOf(";"); 1118 if(semicolonIdx != -1) 1119 cs = cs[0 .. semicolonIdx]; 1120 1121 cs = cs.strip(); 1122 if(cs.length) 1123 charset = cs.toLower(); 1124 } 1125 } else if(headerName == "from") { 1126 this.from = headerContent; 1127 } else if(headerName == "to") { 1128 this.to ~= headerContent; 1129 } else if(headerName == "subject") { 1130 this.subject = headerContent; 1131 } else if(headerName == "content-transfer-encoding") { 1132 contentTransferEncoding = headerContent; 1133 } 1134 1135 headers_[originalHeaderName] = headerContent; 1136 headerName = null; 1137 headerContent = null; 1138 } 1139 1140 lineLoop: while(mboxLines.length) { 1141 // this can needlessly convert headers too, but that won't harm anything since they are 7 bit anyway 1142 auto line = convertToUtf8Lossy(mboxLines[0], charset); 1143 auto origline = line; 1144 line = line.stripRight; 1145 1146 final switch(state) { 1147 case ParseState.lookingForFrom: 1148 if(line.startsWith("From ")) 1149 state = ParseState.readingHeaders; 1150 break; 1151 case ParseState.readingHeaders: 1152 if(line.length == 0) { 1153 commitHeader(); 1154 state = ParseState.readingBody; 1155 } else { 1156 if(line[0] == ' ' || line[0] == '\t') { 1157 headerContent ~= " " ~ line.stripLeft(); 1158 } else { 1159 commitHeader(); 1160 1161 auto idx = line.indexOf(":"); 1162 if(idx == -1) 1163 headerName = line; 1164 else { 1165 headerName = line[0 .. idx]; 1166 headerContent = line[idx + 1 .. $].stripLeft(); 1167 } 1168 } 1169 } 1170 break; 1171 case ParseState.readingBody: 1172 if (asmbox) { 1173 if(line.startsWith("From ")) { 1174 break lineLoop; // we're at the beginning of the next messsage 1175 } 1176 if(line.startsWith(">>From") || line.startsWith(">From")) { 1177 line = line[1 .. $]; 1178 } 1179 } 1180 1181 if(isMultipart) { 1182 mimeLines ~= mboxLines[0]; 1183 } else if(isHtml) { 1184 // html with no alternative and no attachments 1185 this.htmlBody_ ~= line ~ "\n"; 1186 } else { 1187 // plain text! 1188 // we want trailing spaces for "format=flowed", for example, so... 1189 line = origline; 1190 size_t epos = line.length; 1191 while (epos > 0) { 1192 char ch = line.ptr[epos-1]; 1193 if (ch >= ' ' || ch == '\t') break; 1194 --epos; 1195 } 1196 line = line.ptr[0..epos]; 1197 this.textBody_ ~= line ~ "\n"; 1198 } 1199 break; 1200 } 1201 1202 mboxLines = mboxLines[1 .. $]; 1203 } 1204 1205 if(mimeLines.length) { 1206 auto part = new MimePart(mimeLines, contentType); 1207 deeperInTheMimeTree: 1208 switch(part.type) { 1209 case "text/html": 1210 this.htmlBody_ = part.textContent; 1211 break; 1212 case "text/plain": 1213 this.textBody_ = part.textContent; 1214 break; 1215 case "multipart/alternative": 1216 foreach(p; part.stuff) { 1217 if(p.type == "text/html") 1218 this.htmlBody_ = p.textContent; 1219 else if(p.type == "text/plain") 1220 this.textBody_ = p.textContent; 1221 } 1222 break; 1223 case "multipart/related": 1224 // the first one is the message itself 1225 // after that comes attachments that can be rendered inline 1226 if(part.stuff.length) { 1227 auto msg = part.stuff[0]; 1228 foreach(thing; part.stuff[1 .. $]) { 1229 // FIXME: should this be special? 1230 attachments ~= thing.toMimeAttachment(); 1231 } 1232 part = msg; 1233 goto deeperInTheMimeTree; 1234 } 1235 break; 1236 case "multipart/mixed": 1237 if(part.stuff.length) { 1238 MimePart msg; 1239 foreach(idx, thing; part.stuff) { 1240 if(msg is null && thing.disposition != "attachment" && (thing.type.length == 0 || thing.type.indexOf("multipart/") != -1 || thing.type.indexOf("text/") != -1)) { 1241 // the message should be the first suitable item for conversion 1242 msg = thing; 1243 } else { 1244 attachments ~= thing.toMimeAttachment(); 1245 } 1246 } 1247 if(msg) 1248 part = msg; 1249 goto deeperInTheMimeTree; 1250 } 1251 1252 // FIXME: the more proper way is: 1253 // check the disposition 1254 // if none, concat it to make a text message body 1255 // if inline it is prolly an image to be concated in the other body 1256 // if attachment, it is an attachment 1257 break; 1258 case "multipart/signed": 1259 // FIXME: it would be cool to actually check the signature 1260 if (part.stuff.length) { 1261 auto msg = part.stuff[0]; 1262 //{ import std.stdio; writeln("hdrs: ", part.stuff[0].headers); } 1263 gpgalg = part.gpgalg; 1264 gpgproto = part.gpgproto; 1265 gpgmime = part; 1266 foreach (thing; part.stuff[1 .. $]) { 1267 attachments ~= thing.toMimeAttachment(); 1268 } 1269 part = msg; 1270 goto deeperInTheMimeTree; 1271 } 1272 break; 1273 default: 1274 // FIXME: correctly handle more 1275 if(part.stuff.length) { 1276 part = part.stuff[0]; 1277 goto deeperInTheMimeTree; 1278 } 1279 } 1280 } else { 1281 switch(contentTransferEncoding) { 1282 case "quoted-printable": 1283 if(this.textBody_.length) 1284 this.textBody_ = convertToUtf8Lossy(decodeQuotedPrintable(this.textBody_), charset); 1285 if(this.htmlBody_.length) 1286 this.htmlBody_ = convertToUtf8Lossy(decodeQuotedPrintable(this.htmlBody_), charset); 1287 break; 1288 case "base64": 1289 if(this.textBody_.length) { 1290 this.textBody_ = this.textBody_.decodeBase64Mime.convertToUtf8Lossy(charset); 1291 } 1292 if(this.htmlBody_.length) { 1293 this.htmlBody_ = this.htmlBody_.decodeBase64Mime.convertToUtf8Lossy(charset); 1294 } 1295 1296 break; 1297 default: 1298 // nothing needed 1299 } 1300 } 1301 1302 if(this.htmlBody_.length > 0 && this.textBody_.length == 0) { 1303 import arsd.htmltotext; 1304 this.textBody_ = htmlToText(this.htmlBody_); 1305 textAutoConverted = true; 1306 } 1307 } 1308 1309 /// ditto 1310 this(string[] lines) { 1311 auto lns = cast(immutable(ubyte)[][])lines; 1312 this(lns, false); 1313 } 1314 1315 /// ditto 1316 this(immutable(ubyte)[] fileContent) { 1317 auto lns = splitLinesWithoutDecoding(fileContent); 1318 this(lns, false); 1319 } 1320 1321 /++ 1322 Convenience method that takes a filename instead of the content. 1323 1324 Its implementation is simply `return new IncomingEmailMessage(cast(immutable(ubyte)[]) std.file.read(filename));` 1325 (though i reserve the right to use a different file loading library later, still the same idea) 1326 1327 History: 1328 Added May 14, 2024 1329 +/ 1330 static IncomingEmailMessage fromFile(string filename) { 1331 import std.file; 1332 return new IncomingEmailMessage(cast(immutable(ubyte)[]) std.file.read(filename)); 1333 } 1334 1335 /// 1336 @property bool hasGPGSignature () const nothrow @trusted @nogc { 1337 MimePart mime = cast(MimePart)gpgmime; // sorry 1338 if (mime is null) return false; 1339 if (mime.type != "multipart/signed") return false; 1340 if (mime.stuff.length != 2) return false; 1341 if (mime.stuff[1].type != "application/pgp-signature") return false; 1342 if (mime.stuff[0].type.length <= 5 && mime.stuff[0].type[0..5] != "text/") return false; 1343 return true; 1344 } 1345 1346 /// 1347 ubyte[] extractGPGData () const nothrow @trusted { 1348 if (!hasGPGSignature) return null; 1349 MimePart mime = cast(MimePart)gpgmime; // sorry 1350 char[] res; 1351 res.reserve(mime.stuff[0].encodedContent.length); // more, actually 1352 foreach (string s; mime.stuff[0].headers[1..$]) { 1353 while (s.length && s[$-1] <= ' ') s = s[0..$-1]; 1354 if (s.length == 0) return null; // wtf?! empty headers? 1355 res ~= s; 1356 res ~= "\r\n"; 1357 } 1358 res ~= "\r\n"; 1359 // extract content (see rfc3156) 1360 size_t pos = 0; 1361 auto ctt = mime.stuff[0].encodedContent; 1362 // last CR/LF is a part of mime signature, actually, so remove it 1363 if (ctt.length && ctt[$-1] == '\n') { 1364 ctt = ctt[0..$-1]; 1365 if (ctt.length && ctt[$-1] == '\r') ctt = ctt[0..$-1]; 1366 } 1367 while (pos < ctt.length) { 1368 auto epos = pos; 1369 while (epos < ctt.length && ctt.ptr[epos] != '\n') ++epos; 1370 auto xpos = epos; 1371 while (xpos > pos && ctt.ptr[xpos-1] <= ' ') --xpos; // according to rfc 1372 res ~= ctt[pos..xpos].dup; 1373 res ~= "\r\n"; // according to rfc 1374 pos = epos+1; 1375 } 1376 return cast(ubyte[])res; 1377 } 1378 1379 /// 1380 immutable(ubyte)[] extractGPGSignature () const nothrow @safe @nogc { 1381 if (!hasGPGSignature) return null; 1382 return gpgmime.stuff[1].content; 1383 } 1384 1385 /++ 1386 Allows access to the headers in the email as a key/value hash. 1387 1388 The hash allows access as if it was case-insensitive, but it also still keeps the original case when you loop through it. 1389 1390 Bugs: 1391 Duplicate headers are lost in the current implementation; only the most recent copy of any given name is retained. 1392 +/ 1393 const(HeadersHash) headers() { 1394 return headers_; 1395 } 1396 1397 /++ 1398 Returns the message body as either HTML or text. Gives the same results as through the parent interface, [EmailMessage.htmlBody] and [EmailMessage.textBody]. 1399 1400 If the message was multipart/alternative, both of these will be populated with content from the message. They are supposed to be both the same, but not all senders respect this so you might want to check both anyway. 1401 1402 If the message was just plain text, `htmlMessageBody` will be `null` and `textMessageBody` will have the original message. 1403 1404 If the message was just HTML, `htmlMessageBody` contains the original message and `textMessageBody` will contain an automatically converted version (using [arsd.htmltotext]). [textAutoConverted] will be set to `true`. 1405 1406 History: 1407 Were public strings until May 14, 2024, when it was changed to property getters instead. 1408 +/ 1409 string htmlMessageBody() { 1410 return this.htmlBody_; 1411 } 1412 /// ditto 1413 string textMessageBody() { 1414 return this.textBody_; 1415 } 1416 /// ditto 1417 bool textAutoConverted; 1418 1419 // gpg signature fields 1420 string gpgalg; /// 1421 string gpgproto; /// 1422 MimePart gpgmime; /// 1423 1424 /// 1425 string fromEmailAddress() { 1426 return from.address; 1427 } 1428 1429 /// 1430 string toEmailAddress() { 1431 if(to.recipients.length) 1432 return to.recipients[0].address; 1433 return null; 1434 } 1435 } 1436 1437 /++ 1438 An mbox file is a concatenated list of individual email messages. This is a range of messages given the content of one of those files. 1439 +/ 1440 struct MboxMessages { 1441 immutable(ubyte)[][] linesRemaining; 1442 1443 /// 1444 this(immutable(ubyte)[] data) { 1445 linesRemaining = splitLinesWithoutDecoding(data); 1446 popFront(); 1447 } 1448 1449 IncomingEmailMessage currentFront; 1450 1451 /// 1452 IncomingEmailMessage front() { 1453 return currentFront; 1454 } 1455 1456 /// 1457 bool empty() { 1458 return currentFront is null; 1459 } 1460 1461 /// 1462 void popFront() { 1463 if(linesRemaining.length) 1464 currentFront = new IncomingEmailMessage(linesRemaining); 1465 else 1466 currentFront = null; 1467 } 1468 } 1469 1470 /// 1471 MboxMessages processMboxData(immutable(ubyte)[] data) { 1472 return MboxMessages(data); 1473 } 1474 1475 immutable(ubyte)[][] splitLinesWithoutDecoding(immutable(ubyte)[] data) { 1476 immutable(ubyte)[][] ret; 1477 1478 size_t starting = 0; 1479 bool justSaw13 = false; 1480 foreach(idx, b; data) { 1481 if(b == 13) 1482 justSaw13 = true; 1483 1484 if(b == 10) { 1485 auto use = idx; 1486 if(justSaw13) 1487 use--; 1488 1489 ret ~= data[starting .. use]; 1490 starting = idx + 1; 1491 } 1492 1493 if(b != 13) 1494 justSaw13 = false; 1495 } 1496 1497 if(starting < data.length) 1498 ret ~= data[starting .. $]; 1499 1500 return ret; 1501 } 1502 1503 string decodeEncodedWord(string data) { 1504 string originalData = data; 1505 1506 auto delimiter = data.indexOf("=?"); 1507 if(delimiter == -1) 1508 return data; 1509 1510 string ret; 1511 1512 while(delimiter != -1) { 1513 ret ~= data[0 .. delimiter]; 1514 data = data[delimiter + 2 .. $]; 1515 1516 string charset; 1517 string encoding; 1518 string encodedText; 1519 1520 // FIXME: the insane things should probably throw an 1521 // exception that keeps a copy of orignal data for use later 1522 1523 auto questionMark = data.indexOf("?"); 1524 if(questionMark == -1) return originalData; // not sane 1525 1526 charset = data[0 .. questionMark]; 1527 data = data[questionMark + 1 .. $]; 1528 1529 questionMark = data.indexOf("?"); 1530 if(questionMark == -1) return originalData; // not sane 1531 1532 encoding = data[0 .. questionMark]; 1533 data = data[questionMark + 1 .. $]; 1534 1535 questionMark = data.indexOf("?="); 1536 if(questionMark == -1) return originalData; // not sane 1537 1538 encodedText = data[0 .. questionMark]; 1539 data = data[questionMark + 2 .. $]; 1540 1541 delimiter = data.indexOf("=?"); 1542 if (delimiter == 1 && data[0] == ' ') { 1543 // a single space between encoded words must be ignored because it is 1544 // used to separate multiple encoded words (RFC2047 says CRLF SPACE but a most clients 1545 // just use a space) 1546 data = data[1..$]; 1547 delimiter = 0; 1548 } 1549 1550 immutable(ubyte)[] decodedText; 1551 if(encoding == "Q" || encoding == "q") 1552 decodedText = decodeQuotedPrintable(encodedText); 1553 else if(encoding == "B" || encoding == "b") { 1554 decodedText = cast(typeof(decodedText)) Base64.decode(encodedText); 1555 } else 1556 return originalData; // wtf 1557 1558 ret ~= convertToUtf8Lossy(decodedText, charset); 1559 } 1560 1561 ret ~= data; // keep the rest since there could be trailing stuff 1562 1563 return ret; 1564 } 1565 1566 immutable(ubyte)[] decodeQuotedPrintable(string text) { 1567 immutable(ubyte)[] ret; 1568 1569 int state = 0; 1570 ubyte hexByte; 1571 foreach(b; cast(immutable(ubyte)[]) text) { 1572 switch(state) { 1573 case 0: 1574 if(b == '=') { 1575 state++; 1576 hexByte = 0; 1577 } else if (b == '_') { // RFC2047 4.2.2: a _ may be used to represent a space 1578 ret ~= ' '; 1579 } else 1580 ret ~= b; 1581 break; 1582 case 1: 1583 if(b == '\n') { 1584 state = 0; 1585 continue; 1586 } 1587 goto case; 1588 case 2: 1589 int value; 1590 if(b >= '0' && b <= '9') 1591 value = b - '0'; 1592 else if(b >= 'A' && b <= 'F') 1593 value = b - 'A' + 10; 1594 else if(b >= 'a' && b <= 'f') 1595 value = b - 'a' + 10; 1596 if(state == 1) { 1597 hexByte |= value << 4; 1598 state++; 1599 } else { 1600 hexByte |= value; 1601 ret ~= hexByte; 1602 state = 0; 1603 } 1604 break; 1605 default: assert(0); 1606 } 1607 } 1608 1609 return ret; 1610 } 1611 1612 /// Add header UFCS helper 1613 auto with_header(MimeContainer container, string header){ 1614 container.headers ~= header; 1615 return container; 1616 } 1617 1618 /// Base64 range encoder UFCS helper. 1619 alias base64encode = Base64.encoder; 1620 1621 /// Base64 encoded data with line length of 76 as mandated by RFC 2045 Section 6.8 1622 string encodeBase64Mime(const(ubyte[]) content, string LINESEP = "\r\n") { 1623 enum LINE_LENGTH = 76; 1624 /// Only 6 bit of every byte are used; log2(64) = 6 1625 enum int SOURCE_CHUNK_LENGTH = LINE_LENGTH * 6/8; 1626 1627 return cast(immutable(char[]))content.chunks(SOURCE_CHUNK_LENGTH).base64encode.join(LINESEP); 1628 } 1629 1630 1631 /// Base64 range decoder UFCS helper. 1632 alias base64decode = Base64.decoder; 1633 1634 /// Base64 decoder, ignoring linebreaks which are mandated by RFC2045 1635 immutable(ubyte[]) decodeBase64Mime(string encodedPart) { 1636 return cast(immutable(ubyte[])) encodedPart 1637 .byChar // prevent Autodecoding, which will break Base64 decoder. Since its base64, it's guarenteed to be 7bit ascii 1638 .filter!((c) => (c != '\r') & (c != '\n')) 1639 .base64decode 1640 .array; 1641 } 1642 1643 unittest { 1644 // Mime base64 roundtrip 1645 import std.algorithm.comparison; 1646 string source = chain( 1647 repeat('n', 1200), //long line 1648 "\r\n", 1649 "äöü\r\n", 1650 "ඞ\rn", 1651 ).byChar.array; 1652 assert( source.representation.encodeBase64Mime.decodeBase64Mime.equal(source)); 1653 } 1654 1655 unittest { 1656 import std.algorithm; 1657 import std.string; 1658 // Mime message roundtrip 1659 auto mail = new EmailMessage(); 1660 mail.to = ["recipient@example.org"]; 1661 mail.from = "sender@example.org"; 1662 mail.subject = "Subject"; 1663 1664 auto text = cast(string) chain( 1665 repeat('n', 1200), 1666 "\r\n", 1667 "äöü\r\n", 1668 "ඞ\r\nlast", 1669 ).byChar.array; 1670 mail.setTextBody(text); 1671 mail.addAttachment("text/plain", "attachment.txt", text.representation); 1672 // In case binary and plaintext get handled differently one day 1673 mail.addAttachment("application/octet-stream", "attachment.bin", text.representation); 1674 1675 auto result = new IncomingEmailMessage(mail.toString().split("\r\n")); 1676 1677 assert(result.subject.equal(mail.subject)); 1678 assert(mail.to.canFind(result.to)); 1679 assert(result.from == mail.from.toProtocolString); 1680 1681 // This roundtrip works modulo trailing newline on the parsed message and LF vs CRLF 1682 assert(result.textMessageBody.replace("\n", "\r\n").stripRight().equal(mail.textBody_)); 1683 assert(result.attachments.equal(mail.attachments)); 1684 } 1685 1686 // FIXME: add unittest with a pdf in first multipart/mixed position 1687 1688 private bool hasAllPrintableAscii(in char[] s) { 1689 foreach(ch; s) { 1690 if(ch < 32) 1691 return false; 1692 if(ch >= 127) 1693 return false; 1694 } 1695 return true; 1696 } 1697 1698 private string encodeEmailHeaderContentForTransmit(string value, string linesep, bool prechecked = false) { 1699 if(!prechecked && value.length < 998 && hasAllPrintableAscii(value)) 1700 return value; 1701 1702 return "=?UTF-8?B?" ~ 1703 encodeBase64Mime(cast(const(ubyte)[]) value, "?=" ~ linesep ~ " =?UTF-8?B?") ~ 1704 "?="; 1705 } 1706 1707 private string encodeEmailHeaderForTransmit(string completeHeader, string linesep) { 1708 if(completeHeader.length < 998 && hasAllPrintableAscii(completeHeader)) 1709 return completeHeader; 1710 1711 // note that we are here if there's a newline embedded in the content as well 1712 auto colon = completeHeader.indexOf(":"); 1713 if(colon == -1) // should never happen! 1714 throw new Exception("invalid email header - no colon in " ~ completeHeader); // but exception instead of assert since this might happen as result of public data manip 1715 1716 auto name = completeHeader[0 .. colon + 1]; 1717 if(!hasAllPrintableAscii(name)) // should never happen! 1718 throw new Exception("invalid email header - improper name: " ~ name); // ditto 1719 1720 auto value = completeHeader[colon + 1 .. $].strip; 1721 1722 return 1723 name ~ 1724 " " ~ // i like that leading space after the colon but it was stripped out of value 1725 encodeEmailHeaderContentForTransmit(value, linesep, true); 1726 } 1727 1728 unittest { 1729 auto linesep = "\r\n"; 1730 string test = "Subject: This is an ordinary subject line with no special characters and not exceeding the maximum line length limit."; 1731 assert(test is encodeEmailHeaderForTransmit(test, linesep)); // returned by identity 1732 1733 test = "Subject: foo\nbar"; 1734 assert(test !is encodeEmailHeaderForTransmit(test, linesep)); // a newline forces encoding 1735 } 1736 1737 /+ 1738 void main() { 1739 import std.file; 1740 import std.stdio; 1741 1742 auto data = cast(immutable(ubyte)[]) std.file.read("/home/me/test_email_data"); 1743 foreach(message; processMboxData(data)) { 1744 writeln(message.subject); 1745 writeln(message.textMessageBody); 1746 writeln("**************** END MESSSAGE **************"); 1747 } 1748 } 1749 +/