Use consistent byte/char offsets when parsing invoice HRPs

When parsing lightning-invoice HRPs we want to read them
char-by-char, tracking at which offset different fields were. Prior
to this commit this was done first by reading char-by-char and then
by indexing using the byte offset which works for ASCII strings but
fails on multi-byte characters.

This commit fixes this issue by simply always walking byte-by-byte
and rejecting multi-byte characters which don't belong in HRPs.
This commit is contained in:
Matt Corallo 2024-04-30 17:11:54 +00:00
parent 65ba75d500
commit 8db1226ae4

View file

@ -43,7 +43,11 @@ mod hrp_sm {
} }
impl States { impl States {
fn next_state(&self, read_symbol: char) -> Result<States, super::Bolt11ParseError> { fn next_state(&self, read_byte: u8) -> Result<States, super::Bolt11ParseError> {
let read_symbol = match char::from_u32(read_byte.into()) {
Some(symb) if symb.is_ascii() => symb,
_ => return Err(super::Bolt11ParseError::MalformedHRP),
};
match *self { match *self {
States::Start => { States::Start => {
if read_symbol == 'l' { if read_symbol == 'l' {
@ -119,7 +123,7 @@ mod hrp_sm {
*range = Some(new_range); *range = Some(new_range);
} }
fn step(&mut self, c: char) -> Result<(), super::Bolt11ParseError> { fn step(&mut self, c: u8) -> Result<(), super::Bolt11ParseError> {
let next_state = self.state.next_state(c)?; let next_state = self.state.next_state(c)?;
match next_state { match next_state {
States::ParseCurrencyPrefix => { States::ParseCurrencyPrefix => {
@ -158,7 +162,7 @@ mod hrp_sm {
pub fn parse_hrp(input: &str) -> Result<(&str, &str, &str), super::Bolt11ParseError> { pub fn parse_hrp(input: &str) -> Result<(&str, &str, &str), super::Bolt11ParseError> {
let mut sm = StateMachine::new(); let mut sm = StateMachine::new();
for c in input.chars() { for c in input.bytes() {
sm.step(c)?; sm.step(c)?;
} }