1
#![doc = include_str!("../README.md")]
2
#![forbid(unsafe_code)]
3
#![warn(
4
    clippy::cargo,
5
    missing_docs,
6
    clippy::pedantic,
7
    future_incompatible,
8
    rust_2018_idioms
9
)]
10
#![allow(
11
    clippy::option_if_let_else,
12
    clippy::module_name_repetitions,
13
    clippy::missing_errors_doc
14
)]
15

            
16
use std::vec;
17

            
18
use pulldown_cmark::{CodeBlockKind, CowStr, Event, DefaultBrokenLinkCallback};
19

            
20
/// Extracts [`Frontmatter`] from any `Iterator<Item =
21
/// pulldown_cmark::Event<'_>>`.
22
///
23
/// This type implements `Iterator<Item = pulldown_cmark::Event<'_>>`, so it can
24
/// be used interchangeably with any Markdown processing code that previously
25
/// interacted with [`pulldown_cmark::Parser`].
26
///
27
/// This type's [`Event`] iterator will look for a top-level
28
/// heading (h1) and/or a code block at the start of the document. If either or
29
/// both are detected, [`FrontmatterExtractor::frontmatter`] will be populated
30
/// with the detected [`Frontmatter`].
31
///
32
/// If a code block is detected in the frontmatter, the code block's
33
/// [`Event`]s not be returned when iterating. The top-level
34
/// heading's events will be returned as they appear in the original iterator.
35
pub struct FrontmatterExtractor<'a, T>
36
where
37
    T: Iterator<Item = Event<'a>>,
38
{
39
    /// The detected frontmatter, if any.
40
    pub frontmatter: Option<Frontmatter<'a>>,
41
    source: T,
42
    state: DocumentAttributeParserState<'a>,
43
}
44

            
45
impl<'a, T> FrontmatterExtractor<'a, T>
46
where
47
    T: Iterator<Item = Event<'a>>,
48
{
49
    /// Returns a new instance that extracts frontmatter from the provided
50
    /// [`Event`] iterator.
51
7
    pub fn new(parser: T) -> Self {
52
7
        Self {
53
7
            source: parser,
54
7
            frontmatter: None,
55
7
            state: DocumentAttributeParserState::Parsing,
56
7
        }
57
7
    }
58

            
59
12
    fn frontmater_mut(&mut self) -> &mut Frontmatter<'a> {
60
12
        if self.frontmatter.is_none() {
61
5
            self.frontmatter = Some(Frontmatter {
62
5
                title: None,
63
5
                code_block: None,
64
5
            });
65
7
        }
66

            
67
12
        self.frontmatter.as_mut().expect("always initialized")
68
12
    }
69

            
70
    /// Scans the start of the document looking for [`Frontmatter`]. If
71
    /// frontmatter is detected, it will be returned.
72
    ///
73
    /// This function will not consume the original iterator completely. It will
74
    /// stop as soon as it is done detecting the frontmatter.
75
1
    pub fn extract(mut self) -> Option<Frontmatter<'a>> {
76
4
        while let Some(_) = self.next() {
77
4
            if matches!(self.state, DocumentAttributeParserState::InDocument) {
78
1
                break;
79
3
            }
80
        }
81

            
82
1
        self.frontmatter
83
1
    }
84

            
85
    /// Scans the start of the document looking for [`Frontmatter`]. If
86
    /// frontmatter is detected, it will be returned.
87
    ///
88
    /// The underlying iterator will be advanced to find the frontmatter, and
89
    /// any [`Event`]s that would normally be returned will be buffered so that
90
    /// they can still be returned from [`FrontmatterExtractor::next()`].
91
1
    pub fn extract_buffered(&mut self) -> Option<&Frontmatter<'a>> {
92
1
        let mut buffered_events = Vec::new();
93
4
        while let Some(event) = self.next() {
94
4
            buffered_events.push(event);
95
4
            if self.extracted() {
96
1
                break;
97
3
            }
98
        }
99

            
100
1
        self.state = DocumentAttributeParserState::InDocumentBuffered(buffered_events.into_iter());
101
1

            
102
1
        self.frontmatter.as_ref()
103
1
    }
104

            
105
    /// Returns true once the extractor is finished extracting the frontmatter.
106
    ///
107
    /// [`self.frontmatter`](Self::frontmatter) may not contain the full data
108
    /// from the underlying document until this function returns true.
109
    #[must_use]
110
4
    pub const fn extracted(&self) -> bool {
111
4
        matches!(self.state, DocumentAttributeParserState::InDocument)
112
4
    }
113
}
114

            
115
impl<'a> FrontmatterExtractor<'a, pulldown_cmark::Parser<'a, DefaultBrokenLinkCallback>> {
116
    /// Returns an instance that parses `markdown` with the default
117
    /// [`pulldown_cmark::Parser`].
118
    #[must_use]
119
5
    pub fn from_markdown(markdown: &'a str) -> Self {
120
5
        Self::new(pulldown_cmark::Parser::new(markdown))
121
5
    }
122
}
123

            
124
impl<'a, T> Iterator for FrontmatterExtractor<'a, T>
125
where
126
    T: Iterator<Item = Event<'a>>,
127
{
128
    type Item = Event<'a>;
129

            
130
46
    fn next(&mut self) -> Option<Self::Item> {
131
46
        match &mut self.state {
132
5
            DocumentAttributeParserState::InDocumentBuffered(buffered) => {
133
5
                if let Some(event) = buffered.next() {
134
4
                    return Some(event);
135
1
                }
136
1

            
137
1
                self.state = DocumentAttributeParserState::InDocument;
138
1
                return self.source.next();
139
            }
140
15
            DocumentAttributeParserState::InDocument => return self.source.next(),
141
26
            _ => {}
142
        }
143

            
144
        loop {
145
41
            match self.source.next()? {
146
12
                Event::Text(text) if self.state.in_document_title() => {
147
7
                    self.frontmater_mut().title_mut().push_str(&text);
148
7
                    return Some(Event::Text(text));
149
                }
150
5
                Event::Text(text) if self.state.in_code() => {
151
5
                    let language = match self.state.code_block_kind() {
152
2
                        CodeBlockKind::Indented => None,
153
3
                        CodeBlockKind::Fenced(language) => Some(language),
154
                    };
155
5
                    let frontmatter = self.frontmater_mut();
156
5
                    frontmatter.code_block = Some(CodeBlock {
157
5
                        source: text,
158
5
                        language,
159
5
                    });
160
                }
161
                Event::Start(pulldown_cmark::Tag::Heading {
162
                    level: pulldown_cmark::HeadingLevel::H1,
163
5
                    id,
164
5
                    classes,
165
5
                    attrs,
166
5
                }) if !self.state.in_document() => {
167
5
                    self.state = DocumentAttributeParserState::InTitle;
168
5
                    return Some(Event::Start(pulldown_cmark::Tag::Heading {
169
5
                        level: pulldown_cmark::HeadingLevel::H1,
170
5
                        id,
171
5
                        classes,
172
5
                        attrs,
173
5
                    }));
174
                }
175
                Event::End(pulldown_cmark::TagEnd::Heading (
176
                    pulldown_cmark::HeadingLevel::H1,
177
5
                )) if !self.state.in_document() => {
178
5
                    self.state = DocumentAttributeParserState::Parsing;
179
5
                    return Some(Event::End(pulldown_cmark::TagEnd::Heading (
180
5
                        pulldown_cmark::HeadingLevel::H1,
181
5
                    )));
182
                }
183
5
                Event::Start(pulldown_cmark::Tag::CodeBlock(kind)) if !self.state.in_document() => {
184
5
                    self.state = DocumentAttributeParserState::InAttributeCodeBlock(kind);
185
5
                }
186
5
                Event::End(pulldown_cmark::TagEnd::CodeBlock) if !self.state.in_document() => {
187
5
                    self.state = DocumentAttributeParserState::InDocument;
188
5
                }
189
9
                other => {
190
9
                    if !self.state.in_document_title() {
191
5
                        self.state = DocumentAttributeParserState::InDocument;
192
6
                    }
193

            
194
9
                    return Some(other);
195
                }
196
            }
197
        }
198
46
    }
199
}
200

            
201
enum DocumentAttributeParserState<'a> {
202
    Parsing,
203
    InTitle,
204
    InAttributeCodeBlock(CodeBlockKind<'a>),
205
    InDocumentBuffered(vec::IntoIter<Event<'a>>),
206
    InDocument,
207
}
208

            
209
impl<'a> DocumentAttributeParserState<'a> {
210
28
    pub fn in_document(&self) -> bool {
211
28
        matches!(self, Self::InDocument)
212
28
    }
213

            
214
7
    pub fn in_code(&self) -> bool {
215
7
        matches!(self, Self::InAttributeCodeBlock(_))
216
7
    }
217

            
218
7
    pub fn code_block_kind(&self) -> CodeBlockKind<'a> {
219
7
        if let Self::InAttributeCodeBlock(kind) = self {
220
7
            kind.clone()
221
        } else {
222
            CodeBlockKind::Indented
223
        }
224
7
    }
225

            
226
27
    pub fn in_document_title(&self) -> bool {
227
27
        matches!(self, Self::InTitle)
228
27
    }
229
}
230

            
231
/// Metadata stored within a Markdown document
232
#[derive(Debug, Clone)]
233
pub struct Frontmatter<'a> {
234
    /// The top-level heading's plain-text contents, if the document began with
235
    /// a top-level heading.
236
    pub title: Option<String>,
237
    /// The frontmatter code block, if detected.
238
    pub code_block: Option<CodeBlock<'a>>,
239
}
240

            
241
impl<'a> Frontmatter<'a> {
242
9
    fn title_mut(&mut self) -> &mut String {
243
9
        if self.title.is_none() {
244
7
            self.title = Some(String::new());
245
7
        }
246

            
247
9
        self.title.as_mut().expect("always initialized")
248
9
    }
249
}
250

            
251
/// A code block from a Markdown document's [`Frontmatter`].
252
#[derive(Clone, Debug)]
253
pub struct CodeBlock<'a> {
254
    /// The contents of the code block.
255
    pub source: CowStr<'a>,
256
    /// The language of the code block, which is the identifier following the
257
    /// three backticks in a fenced Markdown code block.
258
    pub language: Option<CowStr<'a>>,
259
}
260

            
261
#[test]
262
1
fn attribute_parser_test() {
263
2
    #[derive(serde::Serialize, serde::Deserialize, Debug)]
264
    struct Attributes {
265
        hello: String,
266
    }
267
1
    let source = r#"# My **Document**
268
1

            
269
1
```toml
270
1
hello = "world"
271
1
```
272
1

            
273
1
This is regular text
274
1
"#;
275
1
    let mut parser = FrontmatterExtractor::from_markdown(source);
276
1
    let mut html = String::new();
277
1
    pulldown_cmark::html::push_html(&mut html, &mut parser);
278
1
    assert_eq!(
279
1
        html,
280
1
        "<h1>My <strong>Document</strong></h1>\n<p>This is regular text</p>\n"
281
1
    );
282

            
283
1
    let frontmatter = parser.frontmatter.expect("frontmatter not detected");
284
1

            
285
1
    assert_eq!(frontmatter.title.as_deref(), Some("My Document"));
286

            
287
1
    let code_block = frontmatter.code_block.expect("code block not detected");
288
1
    assert_eq!(code_block.language, Some(CowStr::from("toml")));
289
1
    let deserialized: Attributes = toml::from_str(&code_block.source).unwrap();
290
1

            
291
1
    assert_eq!(deserialized.hello, "world");
292
1
}
293

            
294
#[test]
295
1
fn extract_buffered() {
296
1
    let mut parser = FrontmatterExtractor::from_markdown("# Heading\n\n    hello world\n\nBody");
297
1
    let frontmatter = parser.extract_buffered().unwrap();
298
1
    assert_eq!(frontmatter.title.as_deref(), Some("Heading"));
299
1
    assert_eq!(
300
1
        frontmatter.code_block.as_ref().unwrap().source.as_ref(),
301
1
        "hello world\n"
302
1
    );
303
1
    let mut html = String::new();
304
1
    pulldown_cmark::html::push_html(&mut html, parser);
305
1
    assert_eq!(html, "<h1>Heading</h1>\n<p>Body</p>\n");
306
1
}
307

            
308
#[test]
309
1
fn indented_parse_test() {
310
2
    #[derive(serde::Serialize, serde::Deserialize, Debug)]
311
    struct Attributes {
312
        hello: String,
313
    }
314
1
    let source = r#"# My **Document**
315
1

            
316
1
    hello = "world"
317
1

            
318
1
This is regular text
319
1
"#;
320
1
    let mut parser = FrontmatterExtractor::from_markdown(source);
321
1
    let mut html = String::new();
322
1
    pulldown_cmark::html::push_html(&mut html, &mut parser);
323
1
    assert_eq!(
324
1
        html,
325
1
        "<h1>My <strong>Document</strong></h1>\n<p>This is regular text</p>\n"
326
1
    );
327

            
328
1
    let frontmatter = parser.frontmatter.expect("frontmatter not detected");
329
1

            
330
1
    assert_eq!(frontmatter.title.as_deref(), Some("My Document"));
331

            
332
1
    let code_block = frontmatter.code_block.expect("code block not detected");
333
1
    assert_eq!(code_block.language, None);
334
1
    let deserialized: Attributes = toml::from_str(&code_block.source).unwrap();
335
1

            
336
1
    assert_eq!(deserialized.hello, "world");
337
1
}