Skip to content

Commit

Permalink
Start moving over CLI tools
Browse files Browse the repository at this point in the history
  • Loading branch information
travisbrown committed Aug 9, 2022
1 parent b2aeeff commit 0fcae6c
Show file tree
Hide file tree
Showing 4 changed files with 220 additions and 0 deletions.
1 change: 1 addition & 0 deletions projects/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
members = [
"hst-cli",
"hst-deactivations",
"hst-tools",
"hst-tw-db",
"hst-tw-images",
"hst-tw-profiles",
Expand Down
17 changes: 17 additions & 0 deletions projects/hst-tools/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[package]
name = "hst-tw-tools"
authors = ["Travis Brown <[email protected]>"]
repository = "https://github.com/travisbrown/hassreden-tracker"
version = "0.1.0"
edition = "2021"

[dependencies]
apache-avro = { version = "0.14", features = ["snappy"] }
hst-cli = { path = "../hst-cli" }
hst-tw-db = { path = "../hst-tw-db" }
hst-tw-images = { path = "../hst-tw-images" }
hst-tw-profiles = { path = "../hst-tw-profiles" }
reqwest = { version = "0.11", features = ["gzip", "json"] }
serde_json = { version = "1", features = ["preserve_order"] }
thiserror = "1"
tokio = { version = "1", features = ["macros", "rt-multi-thread"] }
118 changes: 118 additions & 0 deletions projects/hst-tools/src/bin/hst-tw-db.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
use hst_cli::prelude::*;
use hst_tw_db::{
table::{ReadOnly, Table, Writeable},
ProfileDb,
};
use hst_tw_profiles::model::User;
use std::collections::HashSet;
use std::fs::File;

fn main() -> Result<(), Error> {
let opts: Opts = Opts::parse();
opts.verbose.init_logging()?;

match opts.command {
Command::Import { input } => {
let db = ProfileDb::<Writeable>::open(opts.db, false)?;

let file = File::open(input)?;
let reader = hst_tw_profiles::avro::reader(file)?;

for value in reader {
let user = apache_avro::from_value::<User>(&value?)?;
db.update(&user)?;
}
}
Command::Lookup { id } => {
let db = ProfileDb::<ReadOnly>::open(opts.db, true)?;
let users = db.lookup(id)?;

for (_, user) in users {
println!("{}", serde_json::to_value(user)?);
}
}
Command::Count => {
let db = ProfileDb::<ReadOnly>::open(opts.db, true)?;
let mut user_count = 0;
let mut screen_name_count = 0;
let mut verified = 0;
let mut protected = 0;
for result in db.iter() {
let (_, users) = result?;
let mut screen_names = HashSet::new();

user_count += 1;

for (_, user) in &users {
screen_names.insert(user.screen_name.clone());
}

if let Some((_, user)) = users.last() {
if user.verified {
verified += 1;
}
if user.protected {
protected += 1;
}
}

screen_name_count += screen_names.len();
}

println!("{} users, {} screen names", user_count, screen_name_count);
println!("{} verified, {} protected", verified, protected);
}
Command::Stats => {
let db = ProfileDb::<ReadOnly>::open(opts.db, true)?;
if let Some(count) = db.get_estimated_key_count()? {
println!("Estimated number of keys: {}", count);
}
println!("{:?}", db.statistics());
}
}

Ok(())
}

#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("ProfileDb error")]
ProfileDb(#[from] hst_tw_db::Error),
#[error("Profile Avro error")]
ProfileAvro(#[from] hst_tw_profiles::avro::Error),
#[error("Avro decoding error")]
Avro(#[from] apache_avro::Error),
#[error("JSON encoding error")]
Json(#[from] serde_json::Error),
#[error("I/O error")]
Io(#[from] std::io::Error),
#[error("Log initialization error")]
LogInitialization(#[from] log::SetLoggerError),
}

#[derive(Debug, Parser)]
#[clap(name = "hst-tw-db", version, author)]
struct Opts {
#[clap(flatten)]
verbose: Verbosity,
/// Database directory path
#[clap(long)]
db: String,
#[clap(subcommand)]
command: Command,
}

#[derive(Debug, Parser)]
enum Command {
Import {
/// Avro input path
#[clap(short, long)]
input: String,
},
Lookup {
/// Twitter user ID
id: u64,
},
Count,
Stats,
}
84 changes: 84 additions & 0 deletions projects/hst-tools/src/bin/hst-tw-images.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
use hst_cli::prelude::*;
use hst_tw_images::{Image, Store};
use reqwest::Url;
use std::fs::File;
use std::io::Write;
use std::path::Path;

#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("Twitter image error")]
TwitterImage(#[from] hst_tw_images::Error),
#[error("Twitter image store error")]
TwitterImageStore(#[from] hst_tw_images::store::Error),
#[error("HTTP client error")]
HttpClient(#[from] reqwest::Error),
#[error("I/O error")]
Io(#[from] std::io::Error),
#[error("Log initialization error")]
LogInitialization(#[from] log::SetLoggerError),
}

async fn download_image<P: AsRef<Path>>(
client: &reqwest::Client,
image: &Image,
output: P,
) -> Result<(Url, bool), Error> {
let path = output.as_ref().join(image.path());

let url = image.url();

let response = client.get(url).send().await?;
let response_url = response.url().clone();
let bytes = response.bytes().await?;

if !bytes.is_empty() {
if let Some(parent) = path.parent() {
std::fs::create_dir_all(&parent)?;
}

let mut file = File::create(path)?;
file.write_all(&bytes)?;

Ok((response_url, false))
} else {
Ok((response_url, true))
}
}

#[tokio::main]
async fn main() -> Result<(), Error> {
let opts: Opts = Opts::parse();
opts.verbose.init_logging()?;

match opts.command {
Command::StoreUrls { base } => {
let store = Store::new(base);

for entry in &store {
let (image, _) = entry?;
println!("{}", image);
}
}
Command::Scrape => todo!(),
}

Ok(())
}

#[derive(Parser)]
#[clap(name = "hst-tw-images", about, version, author)]
struct Opts {
#[clap(flatten)]
verbose: Verbosity,
#[clap(subcommand)]
command: Command,
}

#[derive(Debug, Parser)]
enum Command {
/// Download
Scrape,
/// Dump a list of URLs (arbitrarily ordered) from a store as text
StoreUrls { base: String },
}

0 comments on commit 0fcae6c

Please sign in to comment.