1use std::ops::Range;
5use std::str::Chars;
6
7use Mode::*;
8
9#[cfg(test)]
10mod tests;
11
12#[derive(Debug, PartialEq, Eq)]
16pub enum EscapeError {
17 ZeroChars,
19 MoreThanOneChar,
21
22 LoneSlash,
24 InvalidEscape,
26 BareCarriageReturn,
28 BareCarriageReturnInRawString,
30 EscapeOnlyChar,
32
33 TooShortHexEscape,
35 InvalidCharInHexEscape,
37 OutOfRangeHexEscape,
39
40 NoBraceInUnicodeEscape,
42 InvalidCharInUnicodeEscape,
44 EmptyUnicodeEscape,
46 UnclosedUnicodeEscape,
48 LeadingUnderscoreUnicodeEscape,
50 OverlongUnicodeEscape,
52 LoneSurrogateUnicodeEscape,
54 OutOfRangeUnicodeEscape,
56
57 UnicodeEscapeInByte,
59 NonAsciiCharInByte,
61
62 NulInCStr,
64
65 UnskippedWhitespaceWarning,
68
69 MultipleSkippedLinesWarning,
71}
72
73impl EscapeError {
74 pub fn is_fatal(&self) -> bool {
76 !matches!(
77 self,
78 EscapeError::UnskippedWhitespaceWarning | EscapeError::MultipleSkippedLinesWarning
79 )
80 }
81}
82
83pub fn unescape_unicode<F>(src: &str, mode: Mode, callback: &mut F)
89where
90 F: FnMut(Range<usize>, Result<char, EscapeError>),
91{
92 match mode {
93 Char | Byte => {
94 let mut chars = src.chars();
95 let res = unescape_char_or_byte(&mut chars, mode);
96 callback(0..(src.len() - chars.as_str().len()), res);
97 }
98 Str | ByteStr => unescape_non_raw_common(src, mode, callback),
99 RawStr | RawByteStr => check_raw_common(src, mode, callback),
100 RawCStr => check_raw_common(src, mode, &mut |r, mut result| {
101 if let Ok('\0') = result {
102 result = Err(EscapeError::NulInCStr);
103 }
104 callback(r, result)
105 }),
106 CStr => unreachable!(),
107 }
108}
109
110pub enum MixedUnit {
113 Char(char),
120
121 HighByte(u8),
127}
128
129impl From<char> for MixedUnit {
130 fn from(c: char) -> Self {
131 MixedUnit::Char(c)
132 }
133}
134
135impl From<u8> for MixedUnit {
136 fn from(n: u8) -> Self {
137 if n.is_ascii() {
138 MixedUnit::Char(n as char)
139 } else {
140 MixedUnit::HighByte(n)
141 }
142 }
143}
144
145pub fn unescape_mixed<F>(src: &str, mode: Mode, callback: &mut F)
150where
151 F: FnMut(Range<usize>, Result<MixedUnit, EscapeError>),
152{
153 match mode {
154 CStr => unescape_non_raw_common(src, mode, &mut |r, mut result| {
155 if let Ok(MixedUnit::Char('\0')) = result {
156 result = Err(EscapeError::NulInCStr);
157 }
158 callback(r, result)
159 }),
160 Char | Byte | Str | RawStr | ByteStr | RawByteStr | RawCStr => unreachable!(),
161 }
162}
163
164pub fn unescape_char(src: &str) -> Result<char, EscapeError> {
167 unescape_char_or_byte(&mut src.chars(), Char)
168}
169
170pub fn unescape_byte(src: &str) -> Result<u8, EscapeError> {
173 unescape_char_or_byte(&mut src.chars(), Byte).map(byte_from_char)
174}
175
176#[derive(Debug, Clone, Copy, PartialEq)]
178pub enum Mode {
179 Char,
180
181 Byte,
182
183 Str,
184 RawStr,
185
186 ByteStr,
187 RawByteStr,
188
189 CStr,
190 RawCStr,
191}
192
193impl Mode {
194 pub fn in_double_quotes(self) -> bool {
195 match self {
196 Str | RawStr | ByteStr | RawByteStr | CStr | RawCStr => true,
197 Char | Byte => false,
198 }
199 }
200
201 fn allow_high_bytes(self) -> bool {
203 match self {
204 Char | Str => false,
205 Byte | ByteStr | CStr => true,
206 RawStr | RawByteStr | RawCStr => unreachable!(),
207 }
208 }
209
210 #[inline]
212 fn allow_unicode_chars(self) -> bool {
213 match self {
214 Byte | ByteStr | RawByteStr => false,
215 Char | Str | RawStr | CStr | RawCStr => true,
216 }
217 }
218
219 fn allow_unicode_escapes(self) -> bool {
221 match self {
222 Byte | ByteStr => false,
223 Char | Str | CStr => true,
224 RawByteStr | RawStr | RawCStr => unreachable!(),
225 }
226 }
227
228 pub fn prefix_noraw(self) -> &'static str {
229 match self {
230 Char | Str | RawStr => "",
231 Byte | ByteStr | RawByteStr => "b",
232 CStr | RawCStr => "c",
233 }
234 }
235}
236
237fn scan_escape<T: From<char> + From<u8>>(
238 chars: &mut Chars<'_>,
239 mode: Mode,
240) -> Result<T, EscapeError> {
241 let res: char = match chars.next().ok_or(EscapeError::LoneSlash)? {
243 '"' => '"',
244 'n' => '\n',
245 'r' => '\r',
246 't' => '\t',
247 '\\' => '\\',
248 '\'' => '\'',
249 '0' => '\0',
250 'x' => {
251 let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
254 let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
255
256 let lo = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
257 let lo = lo.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
258
259 let value = (hi * 16 + lo) as u8;
260
261 return if !mode.allow_high_bytes() && !value.is_ascii() {
262 Err(EscapeError::OutOfRangeHexEscape)
263 } else {
264 Ok(T::from(value))
267 };
268 }
269 'u' => return scan_unicode(chars, mode.allow_unicode_escapes()).map(T::from),
270 _ => return Err(EscapeError::InvalidEscape),
271 };
272 Ok(T::from(res))
273}
274
275fn scan_unicode(chars: &mut Chars<'_>, allow_unicode_escapes: bool) -> Result<char, EscapeError> {
276 if chars.next() != Some('{') {
279 return Err(EscapeError::NoBraceInUnicodeEscape);
280 }
281
282 let mut n_digits = 1;
284 let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
285 '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
286 '}' => return Err(EscapeError::EmptyUnicodeEscape),
287 c => c
288 .to_digit(16)
289 .ok_or(EscapeError::InvalidCharInUnicodeEscape)?,
290 };
291
292 loop {
295 match chars.next() {
296 None => return Err(EscapeError::UnclosedUnicodeEscape),
297 Some('_') => continue,
298 Some('}') => {
299 if n_digits > 6 {
300 return Err(EscapeError::OverlongUnicodeEscape);
301 }
302
303 if !allow_unicode_escapes {
306 return Err(EscapeError::UnicodeEscapeInByte);
307 }
308
309 break std::char::from_u32(value).ok_or({
310 if value > 0x10FFFF {
311 EscapeError::OutOfRangeUnicodeEscape
312 } else {
313 EscapeError::LoneSurrogateUnicodeEscape
314 }
315 });
316 }
317 Some(c) => {
318 let digit: u32 = c
319 .to_digit(16)
320 .ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
321 n_digits += 1;
322 if n_digits > 6 {
323 continue;
325 }
326 value = value * 16 + digit;
327 }
328 };
329 }
330}
331
332#[inline]
333fn ascii_check(c: char, allow_unicode_chars: bool) -> Result<char, EscapeError> {
334 if allow_unicode_chars || c.is_ascii() {
335 Ok(c)
336 } else {
337 Err(EscapeError::NonAsciiCharInByte)
338 }
339}
340
341fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
342 let c = chars.next().ok_or(EscapeError::ZeroChars)?;
343 let res = match c {
344 '\\' => scan_escape(chars, mode),
345 '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar),
346 '\r' => Err(EscapeError::BareCarriageReturn),
347 _ => ascii_check(c, mode.allow_unicode_chars()),
348 }?;
349 if chars.next().is_some() {
350 return Err(EscapeError::MoreThanOneChar);
351 }
352 Ok(res)
353}
354
355fn unescape_non_raw_common<F, T: From<char> + From<u8>>(src: &str, mode: Mode, callback: &mut F)
358where
359 F: FnMut(Range<usize>, Result<T, EscapeError>),
360{
361 let mut chars = src.chars();
362 let allow_unicode_chars = mode.allow_unicode_chars(); while let Some(c) = chars.next() {
368 let start = src.len() - chars.as_str().len() - c.len_utf8();
369 let res = match c {
370 '\\' => {
371 match chars.clone().next() {
372 Some('\n') => {
373 skip_ascii_whitespace(&mut chars, start, &mut |range, err| {
378 callback(range, Err(err))
379 });
380 continue;
381 }
382 _ => scan_escape::<T>(&mut chars, mode),
383 }
384 }
385 '"' => Err(EscapeError::EscapeOnlyChar),
386 '\r' => Err(EscapeError::BareCarriageReturn),
387 _ => ascii_check(c, allow_unicode_chars).map(T::from),
388 };
389 let end = src.len() - chars.as_str().len();
390 callback(start..end, res);
391 }
392}
393
394fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
395where
396 F: FnMut(Range<usize>, EscapeError),
397{
398 let tail = chars.as_str();
399 let first_non_space = tail
400 .bytes()
401 .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
402 .unwrap_or(tail.len());
403 if tail[1..first_non_space].contains('\n') {
404 let end = start + first_non_space + 1;
406 callback(start..end, EscapeError::MultipleSkippedLinesWarning);
407 }
408 let tail = &tail[first_non_space..];
409 if let Some(c) = tail.chars().next() {
410 if c.is_whitespace() {
411 let end = start + first_non_space + c.len_utf8() + 1;
414 callback(start..end, EscapeError::UnskippedWhitespaceWarning);
415 }
416 }
417 *chars = tail.chars();
418}
419
420fn check_raw_common<F>(src: &str, mode: Mode, callback: &mut F)
425where
426 F: FnMut(Range<usize>, Result<char, EscapeError>),
427{
428 let mut chars = src.chars();
429 let allow_unicode_chars = mode.allow_unicode_chars(); while let Some(c) = chars.next() {
435 let start = src.len() - chars.as_str().len() - c.len_utf8();
436 let res = match c {
437 '\r' => Err(EscapeError::BareCarriageReturnInRawString),
438 _ => ascii_check(c, allow_unicode_chars),
439 };
440 let end = src.len() - chars.as_str().len();
441 callback(start..end, res);
442 }
443}
444
445#[inline]
446pub fn byte_from_char(c: char) -> u8 {
447 let res = c as u32;
448 debug_assert!(res <= u8::MAX as u32, "guaranteed because of ByteStr");
449 res as u8
450}