-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
b2aeeff
commit 0fcae6c
Showing
4 changed files
with
220 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
[package] | ||
name = "hst-tw-tools" | ||
authors = ["Travis Brown <[email protected]>"] | ||
repository = "https://github.com/travisbrown/hassreden-tracker" | ||
version = "0.1.0" | ||
edition = "2021" | ||
|
||
[dependencies] | ||
apache-avro = { version = "0.14", features = ["snappy"] } | ||
hst-cli = { path = "../hst-cli" } | ||
hst-tw-db = { path = "../hst-tw-db" } | ||
hst-tw-images = { path = "../hst-tw-images" } | ||
hst-tw-profiles = { path = "../hst-tw-profiles" } | ||
reqwest = { version = "0.11", features = ["gzip", "json"] } | ||
serde_json = { version = "1", features = ["preserve_order"] } | ||
thiserror = "1" | ||
tokio = { version = "1", features = ["macros", "rt-multi-thread"] } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
use hst_cli::prelude::*; | ||
use hst_tw_db::{ | ||
table::{ReadOnly, Table, Writeable}, | ||
ProfileDb, | ||
}; | ||
use hst_tw_profiles::model::User; | ||
use std::collections::HashSet; | ||
use std::fs::File; | ||
|
||
fn main() -> Result<(), Error> { | ||
let opts: Opts = Opts::parse(); | ||
opts.verbose.init_logging()?; | ||
|
||
match opts.command { | ||
Command::Import { input } => { | ||
let db = ProfileDb::<Writeable>::open(opts.db, false)?; | ||
|
||
let file = File::open(input)?; | ||
let reader = hst_tw_profiles::avro::reader(file)?; | ||
|
||
for value in reader { | ||
let user = apache_avro::from_value::<User>(&value?)?; | ||
db.update(&user)?; | ||
} | ||
} | ||
Command::Lookup { id } => { | ||
let db = ProfileDb::<ReadOnly>::open(opts.db, true)?; | ||
let users = db.lookup(id)?; | ||
|
||
for (_, user) in users { | ||
println!("{}", serde_json::to_value(user)?); | ||
} | ||
} | ||
Command::Count => { | ||
let db = ProfileDb::<ReadOnly>::open(opts.db, true)?; | ||
let mut user_count = 0; | ||
let mut screen_name_count = 0; | ||
let mut verified = 0; | ||
let mut protected = 0; | ||
for result in db.iter() { | ||
let (_, users) = result?; | ||
let mut screen_names = HashSet::new(); | ||
|
||
user_count += 1; | ||
|
||
for (_, user) in &users { | ||
screen_names.insert(user.screen_name.clone()); | ||
} | ||
|
||
if let Some((_, user)) = users.last() { | ||
if user.verified { | ||
verified += 1; | ||
} | ||
if user.protected { | ||
protected += 1; | ||
} | ||
} | ||
|
||
screen_name_count += screen_names.len(); | ||
} | ||
|
||
println!("{} users, {} screen names", user_count, screen_name_count); | ||
println!("{} verified, {} protected", verified, protected); | ||
} | ||
Command::Stats => { | ||
let db = ProfileDb::<ReadOnly>::open(opts.db, true)?; | ||
if let Some(count) = db.get_estimated_key_count()? { | ||
println!("Estimated number of keys: {}", count); | ||
} | ||
println!("{:?}", db.statistics()); | ||
} | ||
} | ||
|
||
Ok(()) | ||
} | ||
|
||
#[derive(thiserror::Error, Debug)] | ||
pub enum Error { | ||
#[error("ProfileDb error")] | ||
ProfileDb(#[from] hst_tw_db::Error), | ||
#[error("Profile Avro error")] | ||
ProfileAvro(#[from] hst_tw_profiles::avro::Error), | ||
#[error("Avro decoding error")] | ||
Avro(#[from] apache_avro::Error), | ||
#[error("JSON encoding error")] | ||
Json(#[from] serde_json::Error), | ||
#[error("I/O error")] | ||
Io(#[from] std::io::Error), | ||
#[error("Log initialization error")] | ||
LogInitialization(#[from] log::SetLoggerError), | ||
} | ||
|
||
#[derive(Debug, Parser)] | ||
#[clap(name = "hst-tw-db", version, author)] | ||
struct Opts { | ||
#[clap(flatten)] | ||
verbose: Verbosity, | ||
/// Database directory path | ||
#[clap(long)] | ||
db: String, | ||
#[clap(subcommand)] | ||
command: Command, | ||
} | ||
|
||
#[derive(Debug, Parser)] | ||
enum Command { | ||
Import { | ||
/// Avro input path | ||
#[clap(short, long)] | ||
input: String, | ||
}, | ||
Lookup { | ||
/// Twitter user ID | ||
id: u64, | ||
}, | ||
Count, | ||
Stats, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
use hst_cli::prelude::*; | ||
use hst_tw_images::{Image, Store}; | ||
use reqwest::Url; | ||
use std::fs::File; | ||
use std::io::Write; | ||
use std::path::Path; | ||
|
||
#[derive(thiserror::Error, Debug)] | ||
pub enum Error { | ||
#[error("Twitter image error")] | ||
TwitterImage(#[from] hst_tw_images::Error), | ||
#[error("Twitter image store error")] | ||
TwitterImageStore(#[from] hst_tw_images::store::Error), | ||
#[error("HTTP client error")] | ||
HttpClient(#[from] reqwest::Error), | ||
#[error("I/O error")] | ||
Io(#[from] std::io::Error), | ||
#[error("Log initialization error")] | ||
LogInitialization(#[from] log::SetLoggerError), | ||
} | ||
|
||
async fn download_image<P: AsRef<Path>>( | ||
client: &reqwest::Client, | ||
image: &Image, | ||
output: P, | ||
) -> Result<(Url, bool), Error> { | ||
let path = output.as_ref().join(image.path()); | ||
|
||
let url = image.url(); | ||
|
||
let response = client.get(url).send().await?; | ||
let response_url = response.url().clone(); | ||
let bytes = response.bytes().await?; | ||
|
||
if !bytes.is_empty() { | ||
if let Some(parent) = path.parent() { | ||
std::fs::create_dir_all(&parent)?; | ||
} | ||
|
||
let mut file = File::create(path)?; | ||
file.write_all(&bytes)?; | ||
|
||
Ok((response_url, false)) | ||
} else { | ||
Ok((response_url, true)) | ||
} | ||
} | ||
|
||
#[tokio::main] | ||
async fn main() -> Result<(), Error> { | ||
let opts: Opts = Opts::parse(); | ||
opts.verbose.init_logging()?; | ||
|
||
match opts.command { | ||
Command::StoreUrls { base } => { | ||
let store = Store::new(base); | ||
|
||
for entry in &store { | ||
let (image, _) = entry?; | ||
println!("{}", image); | ||
} | ||
} | ||
Command::Scrape => todo!(), | ||
} | ||
|
||
Ok(()) | ||
} | ||
|
||
#[derive(Parser)] | ||
#[clap(name = "hst-tw-images", about, version, author)] | ||
struct Opts { | ||
#[clap(flatten)] | ||
verbose: Verbosity, | ||
#[clap(subcommand)] | ||
command: Command, | ||
} | ||
|
||
#[derive(Debug, Parser)] | ||
enum Command { | ||
/// Download | ||
Scrape, | ||
/// Dump a list of URLs (arbitrarily ordered) from a store as text | ||
StoreUrls { base: String }, | ||
} |