Skip to content

Commit 6f98f52

Browse files
committed
100% safe Rust
1 parent ee2d08b commit 6f98f52

28 files changed

+1173
-1482
lines changed

Cargo.lock

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22
name = "mail-parser"
33
description = "Fast and robust e-mail parsing library for Rust"
4-
version = "0.1.1"
4+
version = "0.2.0"
55
edition = "2018"
66
authors = [ "Stalwart Labs <hello@stalw.art>"]
77
license = "Apache-2.0 OR MIT"

README.md

+10-13
Original file line numberDiff line numberDiff line change
@@ -24,20 +24,18 @@ HTML and plain text inline body parts is done automatically when the _alternativ
2424

2525
Performance and memory safety were two important factors while designing _mail-parser_:
2626

27-
- **Zero-copy parsing** is done in most cases (unless when decoding non-UTF8 text or when RFC2047/RFC2231 encoded parts are present).
28-
Practically all strings and u8 slices returned by this library are `Cow<str>` or `Cow<[u8]>` references to the input raw message.
29-
- Memory allocations are always avoided unless they are really necessary. In fact, all Base64 and Quoted-Printable parts are decoded in
30-
place re-using the input buffer.
31-
- [Perfect hashing](https://en.wikipedia.org/wiki/Perfect_hash_function) is used for fast look-up of message header fields, character
32-
set names and aliases, HTML entities as well as month names while parsing _Date_ fields.
33-
- Although some `unsafe` code was used to obtain performance gains of about 10%, every function in the library has been
34-
[fuzzed](#testing-fuzzing--benchmarking) and also heavily [tested with MIRI](#testing-fuzzing--benchmarking).
35-
- Fully battle-tested with millions of real-world e-mail messages dating from 1995 until today.
27+
- **Zero-copy parsing**: Practically all strings returned by this library are `Cow<str>` references to the input raw message.
28+
- **High performance Base64 decoding** based on Chromium's decoder ([the fastest non-SIMD decoder](https://github.com/lemire/fastbase64)).
29+
- **Fast** parsing of message header fields, character set names and HTML entities using [perfect hashing](https://en.wikipedia.org/wiki/Perfect_hash_function).
30+
- Written in **100% safe** Rust with no external dependencies.
31+
- Every function in the library has been [fuzzed](#testing-fuzzing--benchmarking) and
32+
meticulously [tested with MIRI](#testing-fuzzing--benchmarking).
33+
- Thoroughly **battle-tested** with millions of real-world e-mail messages dating from 1995 until today.
3634

3735
## Usage Example
3836

3937
```rust
40-
let mut input = concat!(
38+
let input = concat!(
4139
"From: Art Vandelay <art@vandelay.com> (Vandelay Industries)\n",
4240
"To: \"Colleagues\": \"James Smythe\" <james@vandelay.com>; Friends:\n",
4341
" jane@example.com, =?UTF-8?Q?John_Sm=C3=AEth?= <john@example.com>;\n",
@@ -72,10 +70,9 @@ Performance and memory safety were two important factors while designing _mail-p
7270
"--giddyup--\n",
7371
"--festivus--\n",
7472
)
75-
.as_bytes()
76-
.to_vec();
73+
.as_bytes();
7774

78-
let message = Message::parse(&mut input[..]);
75+
let message = Message::parse(input);
7976

8077
// Parses addresses (including comments), lists and groups
8178
assert_eq!(

benches/libraries.rs

+2-5
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use std::{fs, path::PathBuf};
1717

1818
use test::Bencher;
1919

20-
fn bench_all_samples(b: &mut Bencher, name: &str, fnc: fn(&mut [u8], &str)) {
20+
fn bench_all_samples(b: &mut Bencher, name: &str, fnc: fn(&[u8], &str)) {
2121
const SEPARATOR: &[u8] = "\n---- EXPECTED STRUCTURE ----\n".as_bytes();
2222

2323
println!("Benchmarking {}...\n", name);
@@ -63,10 +63,7 @@ fn bench_all_samples(b: &mut Bencher, name: &str, fnc: fn(&mut [u8], &str)) {
6363

6464
b.iter(|| {
6565
for test_msg in &test_data {
66-
let input_str = String::from_utf8_lossy(&test_msg);
67-
let mut input_bytes = test_msg.clone();
68-
69-
fnc(&mut input_bytes[..], &input_str);
66+
fnc(test_msg, String::from_utf8_lossy(&test_msg).as_ref());
7067
}
7168
});
7269
}

examples/email_to_json_and_yaml.rs

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use mail_parser::{Addr, Address, BodyPart, Group, Message, MessagePart, MimeFieldGet};
22

33
fn main() {
4-
let mut input = concat!(
4+
let input = concat!(
55
"From: Art Vandelay <art@vandelay.com> (Vandelay Industries)\n",
66
"To: \"Colleagues\": \"James Smythe\" <james@vandelay.com>; Friends:\n",
77
" jane@example.com, =?UTF-8?Q?John_Sm=C3=AEth?= <john@example.com>;\n",
@@ -36,10 +36,9 @@ fn main() {
3636
"--giddyup--\n",
3737
"--festivus--\n",
3838
)
39-
.as_bytes()
40-
.to_vec();
39+
.as_bytes();
4140

42-
let message = Message::parse(&mut input[..]);
41+
let message = Message::parse(input);
4342

4443
// Parses addresses (including comments), lists and groups
4544
assert_eq!(

fuzz/Cargo.lock

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

fuzz/fuzz_targets/mail_parser.rs

+7-10
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
#![no_main]
22
use libfuzzer_sys::fuzz_target;
3-
use std::borrow::Cow;
43

54
use mail_parser::{
65
decoders::{
@@ -34,8 +33,7 @@ use mail_parser::{
3433
fuzz_target!(|data: &[u8]| {
3534
// Fuzz every parsing function
3635
for n_fuzz in 1..=24 {
37-
let mut data = Vec::from(data);
38-
let stream = MessageStream::new(&mut data);
36+
let stream = MessageStream::new(&data);
3937

4038
match n_fuzz {
4139
1 => {
@@ -116,16 +114,16 @@ fuzz_target!(|data: &[u8]| {
116114

117115
// Fuzz HTML functions
118116
let mut html_str = String::with_capacity(data.len());
119-
add_html_token(&mut html_str, data, false);
120-
let html_str = String::from_utf8_lossy(data);
121-
html_to_text(&html_str);
122-
text_to_html(&html_str);
117+
let str_data = String::from_utf8_lossy(data);
118+
add_html_token(&mut html_str, str_data.as_ref().as_bytes(), false);
119+
html_to_text(&str_data);
120+
text_to_html(&str_data);
123121

124122
// Fuzz decoding functions
125123
decode_hex(data);
126124
get_charset_decoder(data);
127125

128-
let decoders: &[for<'x> fn(&'x [u8]) -> Cow<'x, str>] = &[
126+
let decoders: &[for<'x> fn(&'x [u8]) -> String] = &[
129127
decoder_utf7,
130128
decoder_utf16_le,
131129
decoder_utf16_be,
@@ -138,6 +136,5 @@ fuzz_target!(|data: &[u8]| {
138136
}
139137

140138
// Fuzz the entire library
141-
let mut data = Vec::from(data);
142-
Message::parse(&mut data[..]);
139+
Message::parse(&data[..]);
143140
});

0 commit comments

Comments
 (0)