1
// We use the Range type which doesn't allow this, and it's not worth making a
2
// helper function to appease clippy.
3
#![allow(clippy::range_plus_one)]
4
//! Utilities shared between Bud Assembly and Bud.
5
use std::{
6
    borrow::Cow,
7
    fmt::Display,
8
    num::{ParseFloatError, ParseIntError},
9
    ops::Range,
10
    str::CharIndices, error::Error,
11
};
12

            
13
/// An iterator adapter thatallows peeking up to two positions ahead.
14
pub struct DoublePeekable<I>
15
where
16
    I: Iterator,
17
{
18
    iter: I,
19
    peeked: Peeked<I::Item>,
20
}
21

            
22
impl<I> DoublePeekable<I>
23
where
24
    I: Iterator,
25
{
26
    /// Returns a new instance wrapping `iter`.
27
1250
    pub fn new(iter: I) -> Self {
28
1250
        Self {
29
1250
            iter,
30
1250
            peeked: Peeked(None),
31
1250
        }
32
1250
    }
33

            
34
    /// Returns a reference to the next item the iterator will return, if
35
    /// present.
36
28880
    pub fn peek(&mut self) -> Option<&I::Item> {
37
28880
        if self.peeked.len() < 1 {
38
26855
            if let Some(next_value) = self.iter.next() {
39
25714
                self.peeked = Peeked(Some(PeekedData::One(next_value)));
40
25714
            } else {
41
1141
                return None;
42
            }
43
2025
        }
44

            
45
27739
        self.peeked.nth(0)
46
28880
    }
47

            
48
    /// Reterns a reference to the second item the iterator will return, if
49
    /// present.
50
157
    pub fn peek_second(&mut self) -> Option<&I::Item> {
51
157
        if self.peeked.len() < 2 {
52
157
            if let Some(next_value) = self.iter.next() {
53
157
                self.peeked.0 = match self.peeked.0.take() {
54
                    None => match self.iter.next() {
55
                        Some(second_value) => Some(PeekedData::Two(next_value, second_value)),
56
                        None => Some(PeekedData::One(next_value)),
57
                    },
58
157
                    Some(PeekedData::One(existing_value)) => {
59
157
                        Some(PeekedData::Two(existing_value, next_value))
60
                    }
61
                    Some(PeekedData::Two(first, second)) => Some(PeekedData::Two(first, second)),
62
                }
63
            }
64
        }
65

            
66
157
        self.peeked.nth(1)
67
157
    }
68
}
69

            
70
impl<I> Iterator for DoublePeekable<I>
71
where
72
    I: Iterator,
73
{
74
    type Item = I::Item;
75

            
76
    fn next(&mut self) -> Option<Self::Item> {
77
83844
        if let Some(peeked) = self.peeked.next() {
78
25679
            Some(peeked)
79
        } else {
80
58165
            self.iter.next()
81
        }
82
83844
    }
83
}
84

            
85
struct Peeked<T>(Option<PeekedData<T>>);
86

            
87
enum PeekedData<T> {
88
    One(T),
89
    Two(T, T),
90
}
91

            
92
impl<T> Peeked<T> {
93
    const fn len(&self) -> usize {
94
2182
        match &self.0 {
95
26855
            None => 0,
96
2182
            Some(PeekedData::One(_)) => 1,
97
            Some(PeekedData::Two(_, _)) => 2,
98
        }
99
29037
    }
100

            
101
    fn nth(&self, index: usize) -> Option<&T> {
102
27896
        match &self.0 {
103
            None => None,
104
27739
            Some(PeekedData::One(value)) => {
105
27739
                if index == 0 {
106
27739
                    Some(value)
107
                } else {
108
                    None
109
                }
110
            }
111
157
            Some(PeekedData::Two(first, second)) => {
112
157
                if index == 0 {
113
                    Some(first)
114
157
                } else if index == 1 {
115
157
                    Some(second)
116
                } else {
117
                    None
118
                }
119
            }
120
        }
121
27896
    }
122
}
123

            
124
impl<T> Iterator for Peeked<T> {
125
    type Item = T;
126

            
127
    fn next(&mut self) -> Option<Self::Item> {
128
83832
        match self.0.take() {
129
57961
            None => None,
130
25714
            Some(PeekedData::One(value)) => Some(value),
131
157
            Some(PeekedData::Two(first, second)) => {
132
157
                self.0 = Some(PeekedData::One(second));
133
157
                Some(first)
134
            }
135
        }
136
83832
    }
137
}
138

            
139
/// An error while decoding the contents of a string literal.
140
#[derive(Eq, PartialEq, Debug, Clone)]
141
pub enum DecodeStringError {
142
    /// An invalid hexadecimal character was encountered at the given offset.
143
    InvalidHexadecimalCharacter(usize),
144
    /// An invalid unicode codepoint was encountered.
145
    InvalidUnicodeCodepoint {
146
        /// The decoded codepoint.
147
        codepoint: u32,
148
        /// The range of the escape sequence.
149
        range: Range<usize>,
150
    },
151
    /// An invalid character was encountered while parsing a unicode escape.
152
    InvalidUnicodeEscape(usize),
153
    /// The end double-quote character was not found.
154
    EndQuoteNotFound,
155
}
156

            
157
impl DecodeStringError {
158
    #[must_use]
159
    /// Returns the location of the error within the original source.
160
    pub fn location(&self) -> Option<Range<usize>> {
161
        match self {
162
            DecodeStringError::InvalidUnicodeEscape(offset)
163
            | DecodeStringError::InvalidHexadecimalCharacter(offset) => Some(*offset..*offset + 1),
164
            DecodeStringError::InvalidUnicodeCodepoint { range, .. } => Some(range.clone()),
165

            
166
            DecodeStringError::EndQuoteNotFound => None,
167
        }
168
    }
169
}
170

            
171
impl Display for DecodeStringError {
172
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
173
        match self {
174
            DecodeStringError::InvalidHexadecimalCharacter(_) => {
175
                f.write_str("invalid hexadecimal character")
176
            }
177
            DecodeStringError::InvalidUnicodeCodepoint { codepoint, .. } => {
178
                write!(f, "{codepoint} is an invalid unicode codepoint")
179
            }
180
            DecodeStringError::InvalidUnicodeEscape(_) => {
181
                f.write_str("invalid unicode escape format. expected \\u{FFEF}")
182
            }
183
            DecodeStringError::EndQuoteNotFound => f.write_str("missing end quote"),
184
        }
185
    }
186
}
187

            
188
impl Error for DecodeStringError {}
189

            
190
/// The result of decoding a string literal's contents.
191
pub struct StringLiteral {
192
    /// The decoded contents of the literal.
193
    pub contents: String,
194
    /// The offset of the end quote in the original string.
195
    pub end_quote_offset: usize,
196
}
197

            
198
/// Decodes a string literal with escape sequences used by Bud and Bud Assembly.
199
205
pub fn decode_string_literal_contents(
200
205
    mut chars: &mut impl Iterator<Item = (usize, char)>,
201
205
    start_offset: usize,
202
205
) -> Result<StringLiteral, DecodeStringError> {
203
205
    let mut string = String::new();
204
205
    let mut end_offset = start_offset + 1;
205
945
    loop {
206
945
        let ch = chars.next().map(|r| r.1);
207
945
        if ch.is_some() {
208
933
            end_offset += 1;
209
933
        }
210

            
211
945
        match ch {
212
            Some('"') => {
213
                // Final quote
214
181
                break;
215
            }
216
            Some('\\') => {
217
84
                end_offset += 1;
218
                // Escaped character
219
84
                let unescaped = match chars.next() {
220
12
                    Some((_, 't')) => '\t',
221
12
                    Some((_, 'r')) => '\r',
222
12
                    Some((_, 'n')) => '\n',
223
                    Some((_, 'u')) => {
224
12
                        let escape_start = end_offset;
225
12
                        end_offset += 1;
226
12
                        match chars.next().map(|r| r.1) {
227
12
                            Some('{') => {}
228
                            _ => return Err(DecodeStringError::InvalidUnicodeEscape(end_offset)),
229
                        }
230

            
231
12
                        let mut codepoint = 0_u32;
232
60
                        for (offset, char) in &mut chars {
233
60
                            end_offset = offset + 1;
234
48
                            let nibble_value = match char {
235
                                '}' => {
236
12
                                    break;
237
                                }
238
48
                                ch if ch.is_numeric() => u32::from(ch) - u32::from(b'0'),
239
                                ch if ('a'..='f').contains(&ch) => u32::from(ch) - u32::from(b'a'),
240
                                ch if ('A'..='F').contains(&ch) => u32::from(ch) - u32::from(b'A'),
241
                                _ => {
242
                                    return Err(DecodeStringError::InvalidHexadecimalCharacter(
243
                                        offset,
244
                                    ))
245
                                }
246
                            };
247

            
248
48
                            codepoint <<= 4;
249
48
                            codepoint |= nibble_value;
250
                        }
251

            
252
12
                        if let Some(ch) = char::from_u32(codepoint) {
253
12
                            ch
254
                        } else {
255
                            return Err(DecodeStringError::InvalidUnicodeCodepoint {
256
                                codepoint,
257
                                range: escape_start..end_offset,
258
                            });
259
                        }
260
                    }
261
24
                    Some((_, other)) => other,
262
12
                    None => return Err(DecodeStringError::EndQuoteNotFound),
263
                };
264

            
265
72
                string.push(unescaped);
266
            }
267
668
            Some(ch) => {
268
668
                string.push(ch);
269
668
            }
270
12
            None => return Err(DecodeStringError::EndQuoteNotFound),
271
        }
272
    }
273

            
274
181
    Ok(StringLiteral {
275
181
        contents: string,
276
181
        end_quote_offset: end_offset - 1,
277
181
    })
278
205
}
279

            
280
/// Decodes all valid numeric literal formats supported by Bud and Bud Assembly.
281
2204
pub fn decode_numeric_literal(
282
2204
    chars: &mut DoublePeekable<CharIndices<'_>>,
283
2204
    source: &str,
284
2204
    start_offset: usize,
285
2204
) -> Result<NumericLiteral, DecodeNumericError> {
286
2204
    let mut end = start_offset;
287
2781
    while chars
288
2781
        .peek()
289
2781
        .map_or(false, |(_, char)| char.is_numeric() || *char == '_')
290
577
    {
291
577
        end = chars.next().expect("just peeked").0;
292
577
    }
293

            
294
    // If we have a period and another numeric, this is a floating point number.
295
2204
    if chars.peek().map_or(false, |(_, ch)| *ch == '.')
296
157
        && chars.peek_second().map_or(false, |(_, ch)| ch.is_numeric())
297
    {
298
        // Skip the decimal
299
157
        chars.next();
300
362
        while chars
301
362
            .peek()
302
362
            .map_or(false, |(_, char)| char.is_numeric() || *char == '_')
303
205
        {
304
205
            end = chars.next().expect("just peeked").0;
305
205
        }
306

            
307
157
        let source = &source[start_offset..=end];
308
157
        let source = if source.find('_').is_some() {
309
12
            Cow::Owned(source.replace('_', ""))
310
        } else {
311
145
            Cow::Borrowed(source)
312
        };
313

            
314
157
        let value = source.parse::<f64>()?;
315

            
316
157
        return Ok(NumericLiteral {
317
157
            contents: Numeric::Real(value),
318
157
            last_offset: end,
319
157
        });
320
2047
    }
321
2047

            
322
2047
    let source = &source[start_offset..=end];
323
2047
    let source = if source.find('_').is_some() {
324
12
        Cow::Owned(source.replace('_', ""))
325
    } else {
326
2035
        Cow::Borrowed(source)
327
    };
328
2047
    let value = source.parse::<i64>()?;
329
2047
    Ok(NumericLiteral {
330
2047
        contents: Numeric::Integer(value),
331
2047
        last_offset: end,
332
2047
    })
333
2204
}
334

            
335
/// A parsed numeric literal.
336
pub struct NumericLiteral {
337
    /// The value that was parsed.
338
    pub contents: Numeric,
339
    /// The position of the last character that was part of this literal value.
340
    pub last_offset: usize,
341
}
342

            
343
/// A numeric literal.
344
pub enum Numeric {
345
    /// A signed integer value.
346
    Integer(i64),
347
    /// A real number (floating point).
348
    Real(f64),
349
}
350

            
351
/// An error while decoding a numeric literal.
352
#[derive(Eq, PartialEq, Debug, Clone)]
353
pub enum DecodeNumericError {
354
    /// An error from parsing a float value.
355
    Float(ParseFloatError),
356
    /// An error from parsing an integer value.
357
    Integer(ParseIntError),
358
}
359

            
360
impl DecodeNumericError {
361
    /// Returns the location of the error within the original source.
362
    #[must_use]
363
    pub fn location(&self) -> Option<Range<usize>> {
364
        None // TODO
365
    }
366
}
367

            
368
impl Display for DecodeNumericError {
369
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
370
        match self {
371
            DecodeNumericError::Float(err) => Display::fmt(err, f),
372
            DecodeNumericError::Integer(err) => Display::fmt(err, f),
373
        }
374
    }
375
}
376

            
377
impl From<ParseFloatError> for DecodeNumericError {
378
    fn from(err: ParseFloatError) -> Self {
379
        Self::Float(err)
380
    }
381
}
382

            
383
impl From<ParseIntError> for DecodeNumericError {
384
    fn from(err: ParseIntError) -> Self {
385
        Self::Integer(err)
386
    }
387
}
388

            
389
impl Error for DecodeNumericError {
390
    fn source(&self) -> Option<&(dyn Error + 'static)> {
391
        match self {
392
            DecodeNumericError::Float(error) => Some(error),
393
            DecodeNumericError::Integer(error) => Some(error),
394
        }
395
    }
396
}