Use consistent byte/char offsets when parsing invoice HRPs

When parsing lightning-invoice HRPs we want to read them char-by-char, tracking at which offset different fields were. Prior to this commit this was done first by reading char-by-char and then by indexing using the byte offset which works for ASCII strings but fails on multi-byte characters. This commit fixes this issue by simply always walking byte-by-byte and rejecting multi-byte characters which don't belong in HRPs.
2025-02-24 15:02:20 +01:00 · 2024-04-30 17:11:54 +00:00 · 2024-04-30 17:11:54 +00:00 · 8db1226ae4
commit 8db1226ae4
parent 65ba75d500
1 changed files with 7 additions and 3 deletions
--- a/lightning-invoice/src/de.rs
+++ b/lightning-invoice/src/de.rs
@ -43,7 +43,11 @@ mod hrp_sm {
 	}

 	impl States {
-		fn next_state(&self, read_symbol: char) -> Result<States, super::Bolt11ParseError> {
+		fn next_state(&self, read_byte: u8) -> Result<States, super::Bolt11ParseError> {
+			let read_symbol = match char::from_u32(read_byte.into()) {
+				Some(symb) if symb.is_ascii() => symb,
+				_ => return Err(super::Bolt11ParseError::MalformedHRP),
+			};
 			match *self {
 				States::Start => {
 					if read_symbol == 'l' {
@ -119,7 +123,7 @@ mod hrp_sm {
 			*range = Some(new_range);
 		}

-		fn step(&mut self, c: char) -> Result<(), super::Bolt11ParseError> {
+		fn step(&mut self, c: u8) -> Result<(), super::Bolt11ParseError> {
 			let next_state = self.state.next_state(c)?;
 			match next_state {
 				States::ParseCurrencyPrefix => {
@ -158,7 +162,7 @@ mod hrp_sm {

 	pub fn parse_hrp(input: &str) -> Result<(&str, &str, &str), super::Bolt11ParseError> {
 		let mut sm = StateMachine::new();
-		for c in input.chars() {
+		for c in input.bytes() {
 			sm.step(c)?;
 		}