core/char/convert.rs
1//! Character conversions.
2
3use crate::char::TryFromCharError;
4use crate::error::Error;
5use crate::fmt;
6use crate::mem::transmute;
7use crate::str::FromStr;
8use crate::ub_checks::assert_unsafe_precondition;
9
10/// Converts a `u32` to a `char`. See [`char::from_u32`].
11#[must_use]
12#[inline]
13pub(super) const fn from_u32(i: u32) -> Option<char> {
14 // FIXME(const-hack): once Result::ok is const fn, use it here
15 match char_try_from_u32(i) {
16 Ok(c) => Some(c),
17 Err(_) => None,
18 }
19}
20
21/// Converts a `u32` to a `char`, ignoring validity. See [`char::from_u32_unchecked`].
22#[inline]
23#[must_use]
24#[cfg_attr(not(bootstrap), allow(unnecessary_transmutes))]
25pub(super) const unsafe fn from_u32_unchecked(i: u32) -> char {
26 // SAFETY: the caller must guarantee that `i` is a valid char value.
27 unsafe {
28 assert_unsafe_precondition!(
29 check_language_ub,
30 "invalid value for `char`",
31 (i: u32 = i) => char_try_from_u32(i).is_ok()
32 );
33 transmute(i)
34 }
35}
36
37#[stable(feature = "char_convert", since = "1.13.0")]
38impl From<char> for u32 {
39 /// Converts a [`char`] into a [`u32`].
40 ///
41 /// # Examples
42 ///
43 /// ```
44 /// let c = 'c';
45 /// let u = u32::from(c);
46 /// assert!(4 == size_of_val(&u))
47 /// ```
48 #[inline]
49 fn from(c: char) -> Self {
50 c as u32
51 }
52}
53
54#[stable(feature = "more_char_conversions", since = "1.51.0")]
55impl From<char> for u64 {
56 /// Converts a [`char`] into a [`u64`].
57 ///
58 /// # Examples
59 ///
60 /// ```
61 /// let c = '👤';
62 /// let u = u64::from(c);
63 /// assert!(8 == size_of_val(&u))
64 /// ```
65 #[inline]
66 fn from(c: char) -> Self {
67 // The char is casted to the value of the code point, then zero-extended to 64 bit.
68 // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
69 c as u64
70 }
71}
72
73#[stable(feature = "more_char_conversions", since = "1.51.0")]
74impl From<char> for u128 {
75 /// Converts a [`char`] into a [`u128`].
76 ///
77 /// # Examples
78 ///
79 /// ```
80 /// let c = 'âš™';
81 /// let u = u128::from(c);
82 /// assert!(16 == size_of_val(&u))
83 /// ```
84 #[inline]
85 fn from(c: char) -> Self {
86 // The char is casted to the value of the code point, then zero-extended to 128 bit.
87 // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
88 c as u128
89 }
90}
91
92/// Maps a `char` with code point in U+0000..=U+00FF to a byte in 0x00..=0xFF with same value,
93/// failing if the code point is greater than U+00FF.
94///
95/// See [`impl From<u8> for char`](char#impl-From<u8>-for-char) for details on the encoding.
96#[stable(feature = "u8_from_char", since = "1.59.0")]
97impl TryFrom<char> for u8 {
98 type Error = TryFromCharError;
99
100 /// Tries to convert a [`char`] into a [`u8`].
101 ///
102 /// # Examples
103 ///
104 /// ```
105 /// let a = 'ÿ'; // U+00FF
106 /// let b = 'Ä€'; // U+0100
107 /// assert_eq!(u8::try_from(a), Ok(0xFF_u8));
108 /// assert!(u8::try_from(b).is_err());
109 /// ```
110 #[inline]
111 fn try_from(c: char) -> Result<u8, Self::Error> {
112 u8::try_from(u32::from(c)).map_err(|_| TryFromCharError(()))
113 }
114}
115
116/// Maps a `char` with code point in U+0000..=U+FFFF to a `u16` in 0x0000..=0xFFFF with same value,
117/// failing if the code point is greater than U+FFFF.
118///
119/// This corresponds to the UCS-2 encoding, as specified in ISO/IEC 10646:2003.
120#[stable(feature = "u16_from_char", since = "1.74.0")]
121impl TryFrom<char> for u16 {
122 type Error = TryFromCharError;
123
124 /// Tries to convert a [`char`] into a [`u16`].
125 ///
126 /// # Examples
127 ///
128 /// ```
129 /// let trans_rights = 'âš§'; // U+26A7
130 /// let ninjas = '🥷'; // U+1F977
131 /// assert_eq!(u16::try_from(trans_rights), Ok(0x26A7_u16));
132 /// assert!(u16::try_from(ninjas).is_err());
133 /// ```
134 #[inline]
135 fn try_from(c: char) -> Result<u16, Self::Error> {
136 u16::try_from(u32::from(c)).map_err(|_| TryFromCharError(()))
137 }
138}
139
140/// Maps a byte in 0x00..=0xFF to a `char` whose code point has the same value, in U+0000..=U+00FF.
141///
142/// Unicode is designed such that this effectively decodes bytes
143/// with the character encoding that IANA calls ISO-8859-1.
144/// This encoding is compatible with ASCII.
145///
146/// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hyphen),
147/// which leaves some "blanks", byte values that are not assigned to any character.
148/// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes.
149///
150/// Note that this is *also* different from Windows-1252 a.k.a. code page 1252,
151/// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks
152/// to punctuation and various Latin characters.
153///
154/// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/)
155/// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases
156/// for a superset of Windows-1252 that fills the remaining blanks with corresponding
157/// C0 and C1 control codes.
158#[stable(feature = "char_convert", since = "1.13.0")]
159impl From<u8> for char {
160 /// Converts a [`u8`] into a [`char`].
161 ///
162 /// # Examples
163 ///
164 /// ```
165 /// let u = 32 as u8;
166 /// let c = char::from(u);
167 /// assert!(4 == size_of_val(&c))
168 /// ```
169 #[inline]
170 fn from(i: u8) -> Self {
171 i as char
172 }
173}
174
175/// An error which can be returned when parsing a char.
176///
177/// This `struct` is created when using the [`char::from_str`] method.
178#[stable(feature = "char_from_str", since = "1.20.0")]
179#[derive(Clone, Debug, PartialEq, Eq)]
180pub struct ParseCharError {
181 kind: CharErrorKind,
182}
183
184#[derive(Copy, Clone, Debug, PartialEq, Eq)]
185enum CharErrorKind {
186 EmptyString,
187 TooManyChars,
188}
189
190#[stable(feature = "char_from_str", since = "1.20.0")]
191impl Error for ParseCharError {
192 #[allow(deprecated)]
193 fn description(&self) -> &str {
194 match self.kind {
195 CharErrorKind::EmptyString => "cannot parse char from empty string",
196 CharErrorKind::TooManyChars => "too many characters in string",
197 }
198 }
199}
200
201#[stable(feature = "char_from_str", since = "1.20.0")]
202impl fmt::Display for ParseCharError {
203 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
204 #[allow(deprecated)]
205 self.description().fmt(f)
206 }
207}
208
209#[stable(feature = "char_from_str", since = "1.20.0")]
210impl FromStr for char {
211 type Err = ParseCharError;
212
213 #[inline]
214 fn from_str(s: &str) -> Result<Self, Self::Err> {
215 let mut chars = s.chars();
216 match (chars.next(), chars.next()) {
217 (None, _) => Err(ParseCharError { kind: CharErrorKind::EmptyString }),
218 (Some(c), None) => Ok(c),
219 _ => Err(ParseCharError { kind: CharErrorKind::TooManyChars }),
220 }
221 }
222}
223
224#[inline]
225#[cfg_attr(not(bootstrap), allow(unnecessary_transmutes))]
226const fn char_try_from_u32(i: u32) -> Result<char, CharTryFromError> {
227 // This is an optimized version of the check
228 // (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF),
229 // which can also be written as
230 // i >= 0x110000 || (i >= 0xD800 && i < 0xE000).
231 //
232 // The XOR with 0xD800 permutes the ranges such that 0xD800..0xE000 is
233 // mapped to 0x0000..0x0800, while keeping all the high bits outside 0xFFFF the same.
234 // In particular, numbers >= 0x110000 stay in this range.
235 //
236 // Subtracting 0x800 causes 0x0000..0x0800 to wrap, meaning that a single
237 // unsigned comparison against 0x110000 - 0x800 will detect both the wrapped
238 // surrogate range as well as the numbers originally larger than 0x110000.
239 //
240 if (i ^ 0xD800).wrapping_sub(0x800) >= 0x110000 - 0x800 {
241 Err(CharTryFromError(()))
242 } else {
243 // SAFETY: checked that it's a legal unicode value
244 Ok(unsafe { transmute(i) })
245 }
246}
247
248#[stable(feature = "try_from", since = "1.34.0")]
249impl TryFrom<u32> for char {
250 type Error = CharTryFromError;
251
252 #[inline]
253 fn try_from(i: u32) -> Result<Self, Self::Error> {
254 char_try_from_u32(i)
255 }
256}
257
258/// The error type returned when a conversion from [`prim@u32`] to [`prim@char`] fails.
259///
260/// This `struct` is created by the [`char::try_from<u32>`](char#impl-TryFrom<u32>-for-char) method.
261/// See its documentation for more.
262#[stable(feature = "try_from", since = "1.34.0")]
263#[derive(Copy, Clone, Debug, PartialEq, Eq)]
264pub struct CharTryFromError(());
265
266#[stable(feature = "try_from", since = "1.34.0")]
267impl fmt::Display for CharTryFromError {
268 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
269 "converted integer out of range for `char`".fmt(f)
270 }
271}
272
273/// Converts a digit in the given radix to a `char`. See [`char::from_digit`].
274#[inline]
275#[must_use]
276pub(super) const fn from_digit(num: u32, radix: u32) -> Option<char> {
277 if radix > 36 {
278 panic!("from_digit: radix is too high (maximum 36)");
279 }
280 if num < radix {
281 let num = num as u8;
282 if num < 10 { Some((b'0' + num) as char) } else { Some((b'a' + num - 10) as char) }
283 } else {
284 None
285 }
286}