mirror of
https://gitlab.torproject.org/tpo/core/tor.git
synced 2025-02-24 14:51:11 +01:00
geoip script: add options to output AS numbers.
The --include-asn option includes AS numbers in the geoip mapping. The --output-asn option makes the program generate a number-to-name mapping file. Additionally, the script now outputs ?? CC entries for networks that are listed but which have no country known.
This commit is contained in:
parent
91569c4dad
commit
e71154428e
2 changed files with 115 additions and 29 deletions
|
@ -3,7 +3,7 @@ use std::collections::HashMap;
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
use std::iter::Peekable;
|
use std::iter::Peekable;
|
||||||
|
|
||||||
use super::NetBlock;
|
use super::{AsBlock, NetBlock};
|
||||||
|
|
||||||
pub struct BlockReader<I>
|
pub struct BlockReader<I>
|
||||||
where
|
where
|
||||||
|
@ -12,9 +12,10 @@ where
|
||||||
iter: Peekable<I>,
|
iter: Peekable<I>,
|
||||||
}
|
}
|
||||||
|
|
||||||
enum AnyBlock {
|
pub enum AnyBlock {
|
||||||
NotNet,
|
|
||||||
NetBlock(NetBlock),
|
NetBlock(NetBlock),
|
||||||
|
AsBlock(AsBlock),
|
||||||
|
OtherBlock,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<I> BlockReader<I>
|
impl<I> BlockReader<I>
|
||||||
|
@ -74,17 +75,31 @@ where
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(name) = kv.remove("name") {
|
||||||
|
// This is an AS block.
|
||||||
|
let asn = kv.get("aut-num").unwrap(); // XXXX handle error better
|
||||||
|
assert!(asn.starts_with("AS"));
|
||||||
|
let asn = asn[2..].parse().unwrap();
|
||||||
|
return Some(Ok(AnyBlock::AsBlock(AsBlock { name, asn })));
|
||||||
|
}
|
||||||
|
|
||||||
let net = if let Some(net) = kv.get("net") {
|
let net = if let Some(net) = kv.get("net") {
|
||||||
net.parse().unwrap() //XXXX handle the error better.
|
net.parse().unwrap() //XXXX handle the error better.
|
||||||
} else {
|
} else {
|
||||||
return Some(Ok(AnyBlock::NotNet));
|
return Some(Ok(AnyBlock::OtherBlock));
|
||||||
|
};
|
||||||
|
|
||||||
|
let asn = if let Some(asn) = kv.get("aut-num") {
|
||||||
|
asn.parse().ok()
|
||||||
|
} else {
|
||||||
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
let cc = if let Some(country) = kv.get("country") {
|
let cc = if let Some(country) = kv.get("country") {
|
||||||
assert!(country.as_bytes().len() == 2);
|
assert!(country.as_bytes().len() == 2);
|
||||||
country.as_bytes()[0..2].try_into().unwrap()
|
country.as_bytes()[0..2].try_into().unwrap()
|
||||||
} else {
|
} else {
|
||||||
return Some(Ok(AnyBlock::NotNet));
|
*b"??"
|
||||||
};
|
};
|
||||||
|
|
||||||
fn is_true(v: Option<&String>) -> bool {
|
fn is_true(v: Option<&String>) -> bool {
|
||||||
|
@ -100,6 +115,7 @@ where
|
||||||
|
|
||||||
Some(Ok(AnyBlock::NetBlock(NetBlock {
|
Some(Ok(AnyBlock::NetBlock(NetBlock {
|
||||||
net,
|
net,
|
||||||
|
asn,
|
||||||
cc,
|
cc,
|
||||||
is_anon_proxy,
|
is_anon_proxy,
|
||||||
is_anycast,
|
is_anycast,
|
||||||
|
@ -112,15 +128,11 @@ impl<I> Iterator for BlockReader<I>
|
||||||
where
|
where
|
||||||
I: Iterator<Item = std::io::Result<String>>,
|
I: Iterator<Item = std::io::Result<String>>,
|
||||||
{
|
{
|
||||||
type Item = NetBlock;
|
type Item = AnyBlock;
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
loop {
|
match self.get_block() {
|
||||||
match self.get_block() {
|
Some(Ok(b)) => Some(b),
|
||||||
None => return None,
|
_ => None,
|
||||||
Some(Err(_)) => return None,
|
|
||||||
Some(Ok(AnyBlock::NotNet)) => continue,
|
|
||||||
Some(Ok(AnyBlock::NetBlock(n))) => return Some(n),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,7 +9,8 @@ use rangemap::RangeInclusiveMap;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{BufRead, BufReader, BufWriter, Write};
|
use std::io::{BufRead, BufReader, BufWriter, Write};
|
||||||
use std::net::{IpAddr, Ipv6Addr};
|
use std::net::{IpAddr, Ipv6Addr};
|
||||||
use std::path::{Path, PathBuf};
|
use std::num::NonZeroU32;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
fn default_ipv4_path() -> PathBuf {
|
fn default_ipv4_path() -> PathBuf {
|
||||||
"./geoip".into()
|
"./geoip".into()
|
||||||
|
@ -32,6 +33,14 @@ struct Args {
|
||||||
/// where to find the dump file
|
/// where to find the dump file
|
||||||
#[argh(option, short = 'i')]
|
#[argh(option, short = 'i')]
|
||||||
input: PathBuf,
|
input: PathBuf,
|
||||||
|
|
||||||
|
/// whether to include AS information in our output
|
||||||
|
#[argh(switch)]
|
||||||
|
include_asn: bool,
|
||||||
|
|
||||||
|
/// where to store the AS map.
|
||||||
|
#[argh(option)]
|
||||||
|
output_asn: Option<PathBuf>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Represents a network block from running `location dump`.
|
/// Represents a network block from running `location dump`.
|
||||||
|
@ -39,11 +48,19 @@ struct Args {
|
||||||
pub struct NetBlock {
|
pub struct NetBlock {
|
||||||
pub net: IpNetwork,
|
pub net: IpNetwork,
|
||||||
pub cc: [u8; 2],
|
pub cc: [u8; 2],
|
||||||
|
pub asn: Option<NonZeroU32>,
|
||||||
pub is_anon_proxy: bool,
|
pub is_anon_proxy: bool,
|
||||||
pub is_anycast: bool,
|
pub is_anycast: bool,
|
||||||
pub is_satellite: bool,
|
pub is_satellite: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Represents an AS definition from running `location dump`.
|
||||||
|
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq)]
|
||||||
|
pub struct AsBlock {
|
||||||
|
pub asn: NonZeroU32,
|
||||||
|
pub name: String,
|
||||||
|
}
|
||||||
|
|
||||||
impl PartialEq for NetBlock {
|
impl PartialEq for NetBlock {
|
||||||
fn eq(&self, other: &Self) -> bool {
|
fn eq(&self, other: &Self) -> bool {
|
||||||
self.net == other.net
|
self.net == other.net
|
||||||
|
@ -69,6 +86,40 @@ impl PartialOrd for NetBlock {
|
||||||
|
|
||||||
impl Eq for NetBlock {}
|
impl Eq for NetBlock {}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
|
||||||
|
struct NetDefn {
|
||||||
|
cc: [u8; 2],
|
||||||
|
asn: Option<NonZeroU32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NetBlock {
|
||||||
|
fn into_defn(self, include_asn: bool) -> NetDefn {
|
||||||
|
if include_asn {
|
||||||
|
NetDefn {
|
||||||
|
cc: self.cc,
|
||||||
|
asn: self.asn,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
NetDefn {
|
||||||
|
cc: self.cc,
|
||||||
|
asn: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NetDefn {
|
||||||
|
fn cc(&self) -> &str {
|
||||||
|
std::str::from_utf8(&self.cc).unwrap()
|
||||||
|
}
|
||||||
|
fn asn(&self) -> u32 {
|
||||||
|
match self.asn {
|
||||||
|
Some(v) => v.into(),
|
||||||
|
None => 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const PROLOGUE: &str = "\
|
const PROLOGUE: &str = "\
|
||||||
# This file has been converted from the IPFire Location database
|
# This file has been converted from the IPFire Location database
|
||||||
# using Tor's geoip-db-tool. For more information on the data, see
|
# using Tor's geoip-db-tool. For more information on the data, see
|
||||||
|
@ -82,16 +133,26 @@ const PROLOGUE: &str = "\
|
||||||
///
|
///
|
||||||
/// This code tries to be "efficient enough"; most of the logic is handled by
|
/// This code tries to be "efficient enough"; most of the logic is handled by
|
||||||
/// using the rangemap crate.
|
/// using the rangemap crate.
|
||||||
fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result<()> {
|
fn convert(args: Args) -> std::io::Result<()> {
|
||||||
|
let input = args.input.as_path();
|
||||||
|
let output_v4 = args.output_ipv4.as_path();
|
||||||
|
let output_v6 = args.output_ipv6.as_path();
|
||||||
|
let include_asn = args.include_asn;
|
||||||
|
|
||||||
let f = File::open(input)?;
|
let f = File::open(input)?;
|
||||||
let f = BufReader::new(f);
|
let f = BufReader::new(f);
|
||||||
let mut blocks = Vec::new();
|
let mut blocks = Vec::new();
|
||||||
|
let mut networks = Vec::new();
|
||||||
|
|
||||||
let mut reader = db::BlockReader::new(f.lines());
|
let mut reader = db::BlockReader::new(f.lines());
|
||||||
let hdr = reader.extract_header();
|
let hdr = reader.extract_header();
|
||||||
// Read blocks, and then sort them by specificity and address.
|
// Read blocks, and then sort them by specificity and address.
|
||||||
for nb in reader {
|
for nb in reader {
|
||||||
blocks.push(nb);
|
match nb {
|
||||||
|
db::AnyBlock::AsBlock(a) => networks.push(a),
|
||||||
|
db::AnyBlock::NetBlock(n) => blocks.push(n),
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
blocks.sort();
|
blocks.sort();
|
||||||
|
|
||||||
|
@ -104,8 +165,8 @@ fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result<
|
||||||
//
|
//
|
||||||
// We use u32 and u128 as the index types for these RangeInclusiveMaps,
|
// We use u32 and u128 as the index types for these RangeInclusiveMaps,
|
||||||
// so that we don't need to implement a step function for IpAddr.
|
// so that we don't need to implement a step function for IpAddr.
|
||||||
let mut v4map: RangeInclusiveMap<u32, [u8; 2], _> = RangeInclusiveMap::new();
|
let mut v4map: RangeInclusiveMap<u32, NetDefn, _> = RangeInclusiveMap::new();
|
||||||
let mut v6map: RangeInclusiveMap<u128, [u8; 2], _> = RangeInclusiveMap::new();
|
let mut v6map: RangeInclusiveMap<u128, NetDefn, _> = RangeInclusiveMap::new();
|
||||||
|
|
||||||
let mut n = 0usize;
|
let mut n = 0usize;
|
||||||
let num_blocks = blocks.len();
|
let num_blocks = blocks.len();
|
||||||
|
@ -118,10 +179,10 @@ fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result<
|
||||||
let end = nb.net.broadcast();
|
let end = nb.net.broadcast();
|
||||||
match (start, end) {
|
match (start, end) {
|
||||||
(IpAddr::V4(a), IpAddr::V4(b)) => {
|
(IpAddr::V4(a), IpAddr::V4(b)) => {
|
||||||
v4map.insert(a.into()..=b.into(), nb.cc);
|
v4map.insert(a.into()..=b.into(), nb.into_defn(include_asn));
|
||||||
}
|
}
|
||||||
(IpAddr::V6(a), IpAddr::V6(b)) => {
|
(IpAddr::V6(a), IpAddr::V6(b)) => {
|
||||||
v6map.insert(a.into()..=b.into(), nb.cc);
|
v6map.insert(a.into()..=b.into(), nb.into_defn(include_asn));
|
||||||
}
|
}
|
||||||
(_, _) => panic!("network started and ended in different families!?"),
|
(_, _) => panic!("network started and ended in different families!?"),
|
||||||
}
|
}
|
||||||
|
@ -133,33 +194,46 @@ fn convert(input: &Path, output_v4: &Path, output_v6: &Path) -> std::io::Result<
|
||||||
|
|
||||||
v4.write_all(PROLOGUE.as_bytes())?;
|
v4.write_all(PROLOGUE.as_bytes())?;
|
||||||
v4.write_all(hdr.as_bytes())?;
|
v4.write_all(hdr.as_bytes())?;
|
||||||
for (r, cc) in v4map.iter() {
|
for (r, defn) in v4map.iter() {
|
||||||
let a: u32 = *r.start();
|
let a: u32 = *r.start();
|
||||||
let b: u32 = *r.end();
|
let b: u32 = *r.end();
|
||||||
writeln!(&mut v4, "{},{},{}", a, b, std::str::from_utf8(cc).unwrap())?;
|
if include_asn {
|
||||||
|
writeln!(&mut v4, "{},{},{},{}", a, b, defn.cc(), defn.asn())?;
|
||||||
|
} else {
|
||||||
|
writeln!(&mut v4, "{},{},{}", a, b, defn.cc())?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
v6.write_all(PROLOGUE.as_bytes())?;
|
v6.write_all(PROLOGUE.as_bytes())?;
|
||||||
v6.write_all(hdr.as_bytes())?;
|
v6.write_all(hdr.as_bytes())?;
|
||||||
for (r, cc) in v6map.iter() {
|
for (r, defn) in v6map.iter() {
|
||||||
let a: Ipv6Addr = (*r.start()).into();
|
let a: Ipv6Addr = (*r.start()).into();
|
||||||
let b: Ipv6Addr = (*r.end()).into();
|
let b: Ipv6Addr = (*r.end()).into();
|
||||||
writeln!(&mut v6, "{},{},{}", a, b, std::str::from_utf8(cc).unwrap())?;
|
if include_asn {
|
||||||
|
writeln!(&mut v6, "{},{},{},{}", a, b, defn.cc(), defn.asn())?;
|
||||||
|
} else {
|
||||||
|
writeln!(&mut v6, "{},{},{}", a, b, defn.cc())?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// The documentation says you should always flush a BufWriter.
|
// The documentation says you should always flush a BufWriter.
|
||||||
v4.flush()?;
|
v4.flush()?;
|
||||||
v6.flush()?;
|
v6.flush()?;
|
||||||
|
|
||||||
|
if let Some(output_asn) = args.output_asn {
|
||||||
|
networks.sort();
|
||||||
|
let mut asn = BufWriter::new(File::create(output_asn)?);
|
||||||
|
for net in networks {
|
||||||
|
writeln!(&mut asn, "{},{}", net.asn, net.name)?;
|
||||||
|
}
|
||||||
|
asn.flush()?;
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() -> std::io::Result<()> {
|
fn main() -> std::io::Result<()> {
|
||||||
let args: Args = argh::from_env();
|
let args: Args = argh::from_env();
|
||||||
|
|
||||||
convert(
|
convert(args)
|
||||||
args.input.as_path(),
|
|
||||||
args.output_ipv4.as_path(),
|
|
||||||
args.output_ipv6.as_path(),
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue