vintage_schematics/
ascii85.rs

1//! Encoding and decoding of [ASCII85](https://en.wikipedia.org/wiki/Ascii85)-encoded data.
2//!
3//! # Examples
4//!
5//! ```rust
6//! use vintage_schematics::ascii85::decode;
7//!
8//! let decoded = decode("87cURD_*#TDfTZ)+T").unwrap();
9//! assert_eq!(decoded, b"Hello, world!");
10//! ```
11
12/// Error type returned by [`decode`].
13#[derive(Debug, Copy, Clone, PartialEq, Eq)]
14pub struct DecodeError {
15	/// The position in the input where the error occurred.
16	pub position: usize,
17
18	/// The kind of error that occurred.
19	pub kind: ErrorKind,
20}
21
22impl std::fmt::Display for DecodeError {
23	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
24		write!(f, "ascii85 decode error at position {}: {}", self.position, self.kind)
25	}
26}
27
28impl std::error::Error for DecodeError {}
29
30/// The kind of error that occurred during decoding ASCII85 input.
31#[derive(Debug, Copy, Clone, PartialEq, Eq)]
32pub enum ErrorKind {
33	/// Invalid character.
34	InvalidCharacter(char),
35
36	/// Input data would overflow when decoded.
37	Overflow { char: char, encoded_count: usize },
38
39	/// 'y' at invalid position.
40	BadYPosition,
41
42	/// 'z' at invalid position.
43	BadZPosition,
44}
45
46impl std::fmt::Display for ErrorKind {
47	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
48		match self {
49			Self::InvalidCharacter(char) => write!(f, "invalid character: {char}"),
50			Self::Overflow { char, encoded_count } => {
51				write!(f, "malformed input: character '{char}' at chunk position {encoded_count} would overflow",)
52			}
53			Self::BadYPosition => write!(f, "encountered 'y' at invalid position"),
54			Self::BadZPosition => write!(f, "encountered 'z' at invalid position"),
55		}
56	}
57}
58
59/// Array of powers of 85.
60// when decoding ASCII85, wikipedia says that the fifth char shouldn't be multiplied by anything.
61// however, in the hardcoded array, i'm instead multiplying by 85^0.
62// this would mean that, if the fifth char is 'a', which has a value of (97-33=64), instead of adding `64*85^0`, we
63// should just add `64` to the decoded value.
64// HOWEVER, there are two cool and epic mathematical properties that help us out here:
65// - CaEP 1: multiplying a number by 1 doesn't change it: https://en.wikipedia.org/wiki/Multiplicative_identity
66// - CaEP 2: any number raised to the zeroth power is 1: https://en.wikipedia.org/wiki/Exponentiation#Zero_exponent
67// this means that `64*85^0 = 64*1 = 64`.
68// haters will say it's fake.
69// i would direct the hater's attention to this exhaustive proof by LLVM: https://godbolt.org/z/9nM6Yze5s
70// and also this: https://github.com/rust-lang/rust/blob/a33907a/library/core/src/num/uint_macros.rs#L3433-L3435
71const POWS: [u32; 5] = [85u32.pow(4), 85u32.pow(3), 85u32.pow(2), 85u32.pow(1), 85u32.pow(0)];
72
73/// Decodes an [ASCII85](https://en.wikipedia.org/wiki/Ascii85)-encoded string into a byte vector.
74///
75/// # Errors
76///
77/// Returns an error if the input data is not valid ASCII85.
78/// See [`DecodeError`] for more information.
79#[allow(clippy::missing_panics_doc)]
80pub fn decode(input: &str) -> Result<Vec<u8>, DecodeError> {
81	// janky ascii85 decoder
82
83	// read these example tables upside-down for a clear description of the decoding behaviour:
84	// https://en.wikipedia.org/wiki/Ascii85#Example_for_Ascii85
85
86	// here's an example of how it works for the input on wikipedia:
87	// - take the first five ASCII characters (`9jqo^`)
88	// - subtract 33 from each character's ASCII value
89	//   - INPUT:  |  9  |  j  |  q  |  o  |  ^  |
90	//   - ASCII:  |  57 | 106 | 113 | 111 |  94 |
91	//   - RESULT: |  24 |  73 |  80 |  78 |  61 |
92	// - multiply every nth char by (85^(4-n)) and add them together
93	//   - `POWS` contains precalculated powers of 85 in the correct order
94	//   - 24*85^4 + 73*85^3 + 80*85^2 + 78*85^1 + 61*85^0
95	//   - ≈1.25bn + ≈44.83m + 578,000 + 6630    + 61
96	//   - =1,298,230,816
97	// - encode as a big-endian 32-bit integer
98
99	// you have now converted five ASCII characters into four bytes of data :) repeat until end of input
100
101	let mut encoded_count = 0u32;
102	let mut decoded_chunk = 0u32;
103
104	// TODO: capacity calculation doesn't account for 'z's
105	let mut decoded = Vec::with_capacity(((input.len() / 5) * 4) + 5);
106
107	for (position, char) in input.chars().filter(|c| !c.is_ascii_whitespace()).enumerate() {
108		let decode_err = |kind| DecodeError { position, kind };
109
110		match char {
111			// special cases: 'z' (four nulls) and 'y' (four spaces)
112			'z' if encoded_count == 0 => decoded.extend_from_slice(&[0u8; 4]),
113			'y' if encoded_count == 0 => decoded.extend_from_slice(&[b' '; 4]),
114			'z' => {
115				return Err(decode_err(ErrorKind::BadZPosition));
116			}
117			'y' => {
118				return Err(decode_err(ErrorKind::BadYPosition));
119			}
120
121			// other chars
122			_ => decode_character(char, &mut encoded_count, &mut decoded_chunk, &mut decoded).map_err(decode_err)?,
123		}
124	}
125
126	// pad input with 'u's
127	// but why?: https://en.wikipedia.org/wiki/Ascii85#Adobe_version
128	let mut padding = 0;
129	while encoded_count != 0 {
130		decode_character('u', &mut encoded_count, &mut decoded_chunk, &mut decoded).map_err(|kind| DecodeError {
131			position: input.len() + padding,
132			kind,
133		})?;
134		padding += 1;
135	}
136
137	// remove trailing padding
138	decoded.drain((decoded.len() - padding)..decoded.len());
139
140	Ok(decoded)
141}
142
143fn decode_character(
144	char: char,
145	encoded_count: &mut u32,
146	decoded_chunk: &mut u32,
147	decoded: &mut Vec<u8>,
148) -> Result<(), ErrorKind> {
149	if !('!'..='u').contains(&char) {
150		return Err(ErrorKind::InvalidCharacter(char));
151	}
152	let value = (char as u8) - 33;
153
154	// each group of five characters encodes four bytes.
155	// within each group of five, the nth character adds (85^(4-n)) to the decoded value
156	let overflow_err = || ErrorKind::Overflow {
157		char,
158		encoded_count: *encoded_count as usize,
159	};
160
161	let value = u32::from(value).checked_mul(POWS[*encoded_count as usize]).ok_or_else(overflow_err)?;
162	*decoded_chunk = decoded_chunk.checked_add(value).ok_or_else(overflow_err)?;
163
164	*encoded_count += 1;
165	if (*encoded_count) == 5 {
166		// push bytes and reset state
167		decoded.extend_from_slice(&decoded_chunk.to_be_bytes());
168		*encoded_count = 0;
169		*decoded_chunk = 0;
170	}
171
172	Ok(())
173}
174
175/// Encodes a set of bytes into an [ASCII85](https://en.wikipedia.org/wiki/Ascii85)-encoded string.
176///
177/// If `encode_y` is set, four consecutive spaces will be encoded as 'y' as in
178/// [`btoa` version 4.2](https://en.wikipedia.org/wiki/Ascii85#btoa_version).
179#[must_use]
180#[allow(clippy::cast_possible_truncation, clippy::missing_panics_doc)]
181pub fn encode(input: &[u8], encode_y: bool) -> String {
182	let mut out = String::with_capacity((input.len() / 4) * 5);
183	for chunk in input.chunks(4) {
184		if chunk == [0u8; 4] {
185			out.push('z');
186			continue;
187		} else if encode_y && chunk == [b' '; 4] {
188			out.push('y');
189			continue;
190		}
191
192		let (chunk, encoded_count) = if chunk.len() < 4 {
193			let mut chunk = chunk.to_vec();
194			let len = 4 - chunk.len();
195			chunk.extend(std::iter::repeat_n(0u8, len));
196
197			(u32::from_be_bytes(chunk.try_into().expect("chunk length should be 4")), 5 - len)
198		} else {
199			(u32::from_be_bytes(chunk.try_into().expect("chunk length should be 4")), 5)
200		};
201
202		// encode four bytes as five ASCII characters
203		for pow in POWS.iter().take(encoded_count) {
204			let encoded = ((((chunk / pow) % 85) + 33) as u8) as char;
205			out.push(encoded);
206		}
207	}
208
209	out
210}