geoip script: add options to output AS numbers.

The --include-asn option includes AS numbers in the geoip mapping.

The --output-asn option makes the program generate a number-to-name
mapping file.

Additionally, the script now outputs ?? CC entries for networks that
are listed but which have no country known.
This commit is contained in:
Nick Mathewson 2021-04-14 10:28:44 -04:00
parent 91569c4dad
commit e71154428e
2 changed files with 115 additions and 29 deletions

View file

@ -3,7 +3,7 @@ use std::collections::HashMap;
use std::convert::TryInto; use std::convert::TryInto;
use std::iter::Peekable; use std::iter::Peekable;
use super::NetBlock; use super::{AsBlock, NetBlock};
pub struct BlockReader<I> pub struct BlockReader<I>
where where
@ -12,9 +12,10 @@ where
iter: Peekable<I>, iter: Peekable<I>,
} }
enum AnyBlock { pub enum AnyBlock {
NotNet,
NetBlock(NetBlock), NetBlock(NetBlock),
AsBlock(AsBlock),
OtherBlock,
} }
impl<I> BlockReader<I> impl<I> BlockReader<I>
@ -74,17 +75,31 @@ where
return None; return None;
} }
if let Some(name) = kv.remove("name") {
// This is an AS block.
let asn = kv.get("aut-num").unwrap(); // XXXX handle error better
assert!(asn.starts_with("AS"));
let asn = asn[2..].parse().unwrap();
return Some(Ok(AnyBlock::AsBlock(AsBlock { name, asn })));
}
let net = if let Some(net) = kv.get("net") { let net = if let Some(net) = kv.get("net") {
net.parse().unwrap() //XXXX handle the error better. net.parse().unwrap() //XXXX handle the error better.
} else { } else {
return Some(Ok(AnyBlock::NotNet)); return Some(Ok(AnyBlock::OtherBlock));
};
let asn = if let Some(asn) = kv.get("aut-num") {
asn.parse().ok()
} else {
None
}; };
let cc = if let Some(country) = kv.get("country") { let cc = if let Some(country) = kv.get("country") {
assert!(country.as_bytes().len() == 2); assert!(country.as_bytes().len() == 2);
country.as_bytes()[0..2].try_into().unwrap() country.as_bytes()[0..2].try_into().unwrap()
} else { } else {
return Some(Ok(AnyBlock::NotNet)); *b"??"
}; };
fn is_true(v: Option<&String>) -> bool { fn is_true(v: Option<&String>) -> bool {
@ -100,6 +115,7 @@ where
Some(Ok(AnyBlock::NetBlock(NetBlock { Some(Ok(AnyBlock::NetBlock(NetBlock {
net, net,
asn,
cc, cc,
is_anon_proxy, is_anon_proxy,
is_anycast, is_anycast,
@ -112,15 +128,11 @@ impl<I> Iterator for BlockReader<I>
where where
I: Iterator<Item = std::io::Result<String>>, I: Iterator<Item = std::io::Result<String>>,
{ {
type Item = NetBlock; type Item = AnyBlock;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
loop { match self.get_block() {
match self.get_block() { Some(Ok(b)) => Some(b),
None => return None, _ => None,
Some(Err(_)) => return None,
Some(Ok(AnyBlock::NotNet)) => continue,
Some(Ok(AnyBlock::NetBlock(n))) => return Some(n),
}
} }
} }
} }

View file

@ -9,7 +9,8 @@ use rangemap::RangeInclusiveMap;
use std::fs::File; use std::fs::File;
use std::io::{BufRead, BufReader, BufWriter, Write}; use std::io::{BufRead, BufReader, BufWriter, Write};
use std::net::{IpAddr, Ipv6Addr}; use std::net::{IpAddr, Ipv6Addr};
use std::path::{Path, PathBuf}; use std::num::NonZeroU32;
use std::path::PathBuf;
fn default_ipv4_path() -> PathBuf { fn default_ipv4_path() -> PathBuf {
"./geoip".into() "./geoip".into()
@ -32,6 +33,14 @@ struct Args {
/// where to find the dump file /// where to find the dump file
#[argh(option, short = 'i')] #[argh(option, short = 'i')]
input: PathBuf, input: PathBuf,
/// whether to include AS information in our output
#[argh(switch)]
include_asn: bool,
/// where to store the AS map.
#[argh(option)]
output_asn: Option<PathBuf>,
} }
/// Represents a network block from running `location dump`. /// Represents a network block from running `location dump`.
@ -39,11 +48,19 @@ struct Args {
pub struct NetBlock { pub struct NetBlock {
pub net: IpNetwork, pub net: IpNetwork,
pub cc: [u8; 2], pub cc: [u8; 2],
pub asn: Option<NonZeroU32>,
pub is_anon_proxy: bool, pub is_anon_proxy: bool,
pub is_anycast: bool, pub is_anycast: bool,
pub is_satellite: bool, pub is_satellite: bool,
} }
/// Represents an AS definition from running `location dump`.
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq)]
pub struct AsBlock {
pub asn: NonZeroU32,
pub name: String,
}
impl PartialEq for NetBlock { impl PartialEq for NetBlock {
fn eq(&self, other: &Self) -> bool { fn eq(&self, other: &Self) -> bool {
self.net == other.net self.net == other.net
@ -69,6 +86,40 @@ impl PartialOrd for NetBlock {
impl Eq for NetBlock {} impl Eq for NetBlock {}
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
struct NetDefn {
cc: [u8; 2],
asn: Option<NonZeroU32>,
}
impl NetBlock {
fn into_defn(self, include_asn: bool) -> NetDefn {
if include_asn {
NetDefn {
cc: self.cc,
asn: self.asn,
}
} else {
NetDefn {
cc: self.cc,
asn: None,
}
}
}
}
impl NetDefn {
fn cc(&self) -> &str {
std::str::from_utf8(&self.cc).unwrap()
}
fn asn(&self) -> u32 {
match self.asn {
Some(v) => v.into(),
None => 0,
}
}
}
const PROLOGUE: &str = "\ const PROLOGUE: &str = "\
# This file has been converted from the IPFire Location database # This file has been converted from the IPFire Location database
# using Tor's geoip-db-tool. For more information on the data, see # using Tor's geoip-db-tool. For more information on the data, see
@ -82,16 +133,26 @@ const PROLOGUE: &str = "\
/// ///
/// This code tries to be "efficient enough"; most of the logic is handled by /// This code tries to be "efficient enough"; most of the logic is handled by
/// using the rangemap crate. /// using the rangemap crate.
fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result<()> { fn convert(args: Args) -> std::io::Result<()> {
let input = args.input.as_path();
let output_v4 = args.output_ipv4.as_path();
let output_v6 = args.output_ipv6.as_path();
let include_asn = args.include_asn;
let f = File::open(input)?; let f = File::open(input)?;
let f = BufReader::new(f); let f = BufReader::new(f);
let mut blocks = Vec::new(); let mut blocks = Vec::new();
let mut networks = Vec::new();
let mut reader = db::BlockReader::new(f.lines()); let mut reader = db::BlockReader::new(f.lines());
let hdr = reader.extract_header(); let hdr = reader.extract_header();
// Read blocks, and then sort them by specificity and address. // Read blocks, and then sort them by specificity and address.
for nb in reader { for nb in reader {
blocks.push(nb); match nb {
db::AnyBlock::AsBlock(a) => networks.push(a),
db::AnyBlock::NetBlock(n) => blocks.push(n),
_ => {}
}
} }
blocks.sort(); blocks.sort();
@ -104,8 +165,8 @@ fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result<
// //
// We use u32 and u128 as the index types for these RangeInclusiveMaps, // We use u32 and u128 as the index types for these RangeInclusiveMaps,
// so that we don't need to implement a step function for IpAddr. // so that we don't need to implement a step function for IpAddr.
let mut v4map: RangeInclusiveMap<u32, [u8; 2], _> = RangeInclusiveMap::new(); let mut v4map: RangeInclusiveMap<u32, NetDefn, _> = RangeInclusiveMap::new();
let mut v6map: RangeInclusiveMap<u128, [u8; 2], _> = RangeInclusiveMap::new(); let mut v6map: RangeInclusiveMap<u128, NetDefn, _> = RangeInclusiveMap::new();
let mut n = 0usize; let mut n = 0usize;
let num_blocks = blocks.len(); let num_blocks = blocks.len();
@ -118,10 +179,10 @@ fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result<
let end = nb.net.broadcast(); let end = nb.net.broadcast();
match (start, end) { match (start, end) {
(IpAddr::V4(a), IpAddr::V4(b)) => { (IpAddr::V4(a), IpAddr::V4(b)) => {
v4map.insert(a.into()..=b.into(), nb.cc); v4map.insert(a.into()..=b.into(), nb.into_defn(include_asn));
} }
(IpAddr::V6(a), IpAddr::V6(b)) => { (IpAddr::V6(a), IpAddr::V6(b)) => {
v6map.insert(a.into()..=b.into(), nb.cc); v6map.insert(a.into()..=b.into(), nb.into_defn(include_asn));
} }
(_, _) => panic!("network started and ended in different families!?"), (_, _) => panic!("network started and ended in different families!?"),
} }
@ -133,33 +194,46 @@ fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result<
v4.write_all(PROLOGUE.as_bytes())?; v4.write_all(PROLOGUE.as_bytes())?;
v4.write_all(hdr.as_bytes())?; v4.write_all(hdr.as_bytes())?;
for (r, cc) in v4map.iter() { for (r, defn) in v4map.iter() {
let a: u32 = *r.start(); let a: u32 = *r.start();
let b: u32 = *r.end(); let b: u32 = *r.end();
writeln!(&mut v4, "{},{},{}", a, b, std::str::from_utf8(cc).unwrap())?; if include_asn {
writeln!(&mut v4, "{},{},{},{}", a, b, defn.cc(), defn.asn())?;
} else {
writeln!(&mut v4, "{},{},{}", a, b, defn.cc())?;
}
} }
v6.write_all(PROLOGUE.as_bytes())?; v6.write_all(PROLOGUE.as_bytes())?;
v6.write_all(hdr.as_bytes())?; v6.write_all(hdr.as_bytes())?;
for (r, cc) in v6map.iter() { for (r, defn) in v6map.iter() {
let a: Ipv6Addr = (*r.start()).into(); let a: Ipv6Addr = (*r.start()).into();
let b: Ipv6Addr = (*r.end()).into(); let b: Ipv6Addr = (*r.end()).into();
writeln!(&mut v6, "{},{},{}", a, b, std::str::from_utf8(cc).unwrap())?; if include_asn {
writeln!(&mut v6, "{},{},{},{}", a, b, defn.cc(), defn.asn())?;
} else {
writeln!(&mut v6, "{},{},{}", a, b, defn.cc())?;
}
} }
// The documentation says you should always flush a BufWriter. // The documentation says you should always flush a BufWriter.
v4.flush()?; v4.flush()?;
v6.flush()?; v6.flush()?;
if let Some(output_asn) = args.output_asn {
networks.sort();
let mut asn = BufWriter::new(File::create(output_asn)?);
for net in networks {
writeln!(&mut asn, "{},{}", net.asn, net.name)?;
}
asn.flush()?;
}
Ok(()) Ok(())
} }
fn main() -> std::io::Result<()> { fn main() -> std::io::Result<()> {
let args: Args = argh::from_env(); let args: Args = argh::from_env();
convert( convert(args)
args.input.as_path(),
args.output_ipv4.as_path(),
args.output_ipv6.as_path(),
)
} }