Use consistent byte/char offsets when parsing invoice HRPs

When parsing lightning-invoice HRPs we want to read them
char-by-char, tracking at which offset different fields were. Prior
to this commit this was done first by reading char-by-char and then
by indexing using the byte offset which works for ASCII strings but
fails on multi-byte characters.

This commit fixes this issue by simply always walking byte-by-byte
and rejecting multi-byte characters which don't belong in HRPs.
This commit is contained in:
Matt Corallo 2024-04-30 17:11:54 +00:00
parent 65ba75d500
commit 8db1226ae4

View file

@ -43,7 +43,11 @@ mod hrp_sm {
}
impl States {
fn next_state(&self, read_symbol: char) -> Result<States, super::Bolt11ParseError> {
fn next_state(&self, read_byte: u8) -> Result<States, super::Bolt11ParseError> {
let read_symbol = match char::from_u32(read_byte.into()) {
Some(symb) if symb.is_ascii() => symb,
_ => return Err(super::Bolt11ParseError::MalformedHRP),
};
match *self {
States::Start => {
if read_symbol == 'l' {
@ -119,7 +123,7 @@ mod hrp_sm {
*range = Some(new_range);
}
fn step(&mut self, c: char) -> Result<(), super::Bolt11ParseError> {
fn step(&mut self, c: u8) -> Result<(), super::Bolt11ParseError> {
let next_state = self.state.next_state(c)?;
match next_state {
States::ParseCurrencyPrefix => {
@ -158,7 +162,7 @@ mod hrp_sm {
pub fn parse_hrp(input: &str) -> Result<(&str, &str, &str), super::Bolt11ParseError> {
let mut sm = StateMachine::new();
for c in input.chars() {
for c in input.bytes() {
sm.step(c)?;
}