From 0fcae6c3a63746e403590f5e971124e68642987a Mon Sep 17 00:00:00 2001 From: Travis Brown Date: Tue, 9 Aug 2022 13:37:06 +0200 Subject: [PATCH] Start moving over CLI tools --- projects/Cargo.toml | 1 + projects/hst-tools/Cargo.toml | 17 +++ projects/hst-tools/src/bin/hst-tw-db.rs | 118 ++++++++++++++++++++ projects/hst-tools/src/bin/hst-tw-images.rs | 84 ++++++++++++++ 4 files changed, 220 insertions(+) create mode 100644 projects/hst-tools/Cargo.toml create mode 100644 projects/hst-tools/src/bin/hst-tw-db.rs create mode 100644 projects/hst-tools/src/bin/hst-tw-images.rs diff --git a/projects/Cargo.toml b/projects/Cargo.toml index 1e01432..cfcb7eb 100644 --- a/projects/Cargo.toml +++ b/projects/Cargo.toml @@ -3,6 +3,7 @@ members = [ "hst-cli", "hst-deactivations", + "hst-tools", "hst-tw-db", "hst-tw-images", "hst-tw-profiles", diff --git a/projects/hst-tools/Cargo.toml b/projects/hst-tools/Cargo.toml new file mode 100644 index 0000000..cb19828 --- /dev/null +++ b/projects/hst-tools/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "hst-tw-tools" +authors = ["Travis Brown "] +repository = "https://github.com/travisbrown/hassreden-tracker" +version = "0.1.0" +edition = "2021" + +[dependencies] +apache-avro = { version = "0.14", features = ["snappy"] } +hst-cli = { path = "../hst-cli" } +hst-tw-db = { path = "../hst-tw-db" } +hst-tw-images = { path = "../hst-tw-images" } +hst-tw-profiles = { path = "../hst-tw-profiles" } +reqwest = { version = "0.11", features = ["gzip", "json"] } +serde_json = { version = "1", features = ["preserve_order"] } +thiserror = "1" +tokio = { version = "1", features = ["macros", "rt-multi-thread"] } \ No newline at end of file diff --git a/projects/hst-tools/src/bin/hst-tw-db.rs b/projects/hst-tools/src/bin/hst-tw-db.rs new file mode 100644 index 0000000..417d5b4 --- /dev/null +++ b/projects/hst-tools/src/bin/hst-tw-db.rs @@ -0,0 +1,118 @@ +use hst_cli::prelude::*; +use hst_tw_db::{ + table::{ReadOnly, Table, Writeable}, + ProfileDb, +}; +use hst_tw_profiles::model::User; +use std::collections::HashSet; +use std::fs::File; + +fn main() -> Result<(), Error> { + let opts: Opts = Opts::parse(); + opts.verbose.init_logging()?; + + match opts.command { + Command::Import { input } => { + let db = ProfileDb::::open(opts.db, false)?; + + let file = File::open(input)?; + let reader = hst_tw_profiles::avro::reader(file)?; + + for value in reader { + let user = apache_avro::from_value::(&value?)?; + db.update(&user)?; + } + } + Command::Lookup { id } => { + let db = ProfileDb::::open(opts.db, true)?; + let users = db.lookup(id)?; + + for (_, user) in users { + println!("{}", serde_json::to_value(user)?); + } + } + Command::Count => { + let db = ProfileDb::::open(opts.db, true)?; + let mut user_count = 0; + let mut screen_name_count = 0; + let mut verified = 0; + let mut protected = 0; + for result in db.iter() { + let (_, users) = result?; + let mut screen_names = HashSet::new(); + + user_count += 1; + + for (_, user) in &users { + screen_names.insert(user.screen_name.clone()); + } + + if let Some((_, user)) = users.last() { + if user.verified { + verified += 1; + } + if user.protected { + protected += 1; + } + } + + screen_name_count += screen_names.len(); + } + + println!("{} users, {} screen names", user_count, screen_name_count); + println!("{} verified, {} protected", verified, protected); + } + Command::Stats => { + let db = ProfileDb::::open(opts.db, true)?; + if let Some(count) = db.get_estimated_key_count()? { + println!("Estimated number of keys: {}", count); + } + println!("{:?}", db.statistics()); + } + } + + Ok(()) +} + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("ProfileDb error")] + ProfileDb(#[from] hst_tw_db::Error), + #[error("Profile Avro error")] + ProfileAvro(#[from] hst_tw_profiles::avro::Error), + #[error("Avro decoding error")] + Avro(#[from] apache_avro::Error), + #[error("JSON encoding error")] + Json(#[from] serde_json::Error), + #[error("I/O error")] + Io(#[from] std::io::Error), + #[error("Log initialization error")] + LogInitialization(#[from] log::SetLoggerError), +} + +#[derive(Debug, Parser)] +#[clap(name = "hst-tw-db", version, author)] +struct Opts { + #[clap(flatten)] + verbose: Verbosity, + /// Database directory path + #[clap(long)] + db: String, + #[clap(subcommand)] + command: Command, +} + +#[derive(Debug, Parser)] +enum Command { + Import { + /// Avro input path + #[clap(short, long)] + input: String, + }, + Lookup { + /// Twitter user ID + id: u64, + }, + Count, + Stats, +} diff --git a/projects/hst-tools/src/bin/hst-tw-images.rs b/projects/hst-tools/src/bin/hst-tw-images.rs new file mode 100644 index 0000000..52263ab --- /dev/null +++ b/projects/hst-tools/src/bin/hst-tw-images.rs @@ -0,0 +1,84 @@ +use hst_cli::prelude::*; +use hst_tw_images::{Image, Store}; +use reqwest::Url; +use std::fs::File; +use std::io::Write; +use std::path::Path; + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("Twitter image error")] + TwitterImage(#[from] hst_tw_images::Error), + #[error("Twitter image store error")] + TwitterImageStore(#[from] hst_tw_images::store::Error), + #[error("HTTP client error")] + HttpClient(#[from] reqwest::Error), + #[error("I/O error")] + Io(#[from] std::io::Error), + #[error("Log initialization error")] + LogInitialization(#[from] log::SetLoggerError), +} + +async fn download_image>( + client: &reqwest::Client, + image: &Image, + output: P, +) -> Result<(Url, bool), Error> { + let path = output.as_ref().join(image.path()); + + let url = image.url(); + + let response = client.get(url).send().await?; + let response_url = response.url().clone(); + let bytes = response.bytes().await?; + + if !bytes.is_empty() { + if let Some(parent) = path.parent() { + std::fs::create_dir_all(&parent)?; + } + + let mut file = File::create(path)?; + file.write_all(&bytes)?; + + Ok((response_url, false)) + } else { + Ok((response_url, true)) + } +} + +#[tokio::main] +async fn main() -> Result<(), Error> { + let opts: Opts = Opts::parse(); + opts.verbose.init_logging()?; + + match opts.command { + Command::StoreUrls { base } => { + let store = Store::new(base); + + for entry in &store { + let (image, _) = entry?; + println!("{}", image); + } + } + Command::Scrape => todo!(), + } + + Ok(()) +} + +#[derive(Parser)] +#[clap(name = "hst-tw-images", about, version, author)] +struct Opts { + #[clap(flatten)] + verbose: Verbosity, + #[clap(subcommand)] + command: Command, +} + +#[derive(Debug, Parser)] +enum Command { + /// Download + Scrape, + /// Dump a list of URLs (arbitrarily ordered) from a store as text + StoreUrls { base: String }, +}