From b1863d07efbb2cc4672c0d6f6143738a122d6720 Mon Sep 17 00:00:00 2001 From: iximeow Date: Thu, 30 Nov 2017 03:07:05 -0800 Subject: remove unneeded params, adjust id display tweets are now recorded with the date they are recieved at dated id lookup is now supported tweet ids, where displayed, are displayed in a convenient form: if the tweet was recieved today, show the today id if the tweet was recieved on a different day, show the bare id many instances where display_info was passed for logging information, the parameter has become unnecessary, so it's gone now --- src/tw/mod.rs | 200 +++++++++++++++++++++++++++++++++++++++++--------------- src/tw/tweet.rs | 5 ++ 2 files changed, 153 insertions(+), 52 deletions(-) (limited to 'src/tw') diff --git a/src/tw/mod.rs b/src/tw/mod.rs index 629a6c7..737286d 100644 --- a/src/tw/mod.rs +++ b/src/tw/mod.rs @@ -131,17 +131,21 @@ pub struct TwitterCache { // twitter::num // twiter tweet id num struct IdConversions { // maps a day to the base id for tweets off that day. - id_per_date: HashMap, - id_to_tweet_id: HashMap + id_to_tweet_id: HashMap, // twitter id to id is satisfied by looking up the twitter id in tweeter.tweets and getting // .inner_id + // YYYYMMDD : day_id : inner_tweet_id + tweets_by_date: HashMap>, + // YYYYMMDD : inner_tweet_id : day_id + tweets_by_date_and_tweet_id: HashMap> } impl Default for IdConversions { fn default() -> Self { IdConversions { - id_per_date: HashMap::new(), - id_to_tweet_id: HashMap::new() + id_to_tweet_id: HashMap::new(), + tweets_by_date: HashMap::new(), + tweets_by_date_and_tweet_id: HashMap::new() } } } @@ -180,6 +184,7 @@ mod tests { fn tweet_id_parse_test() { assert_eq!(TweetId::parse("12345".to_string()), Ok(TweetId::Today(12345))); assert_eq!(TweetId::parse("20170403:12345".to_string()), Ok(TweetId::Dated("20170403".to_string(), 12345))); + assert_eq!(TweetId::parse("20170403:12345".to_string()), Ok(TweetId::Dated("20170403".to_string(), 12345))); assert_eq!(TweetId::parse(":12345".to_string()), Ok(TweetId::Bare(12345))); assert_eq!(TweetId::parse("twitter:12345".to_string()), Ok(TweetId::Twitter("12345".to_string()))); assert_eq!(TweetId::parse("twitter:asdf".to_string()), Ok(TweetId::Twitter("asdf".to_string()))); @@ -192,6 +197,57 @@ mod tests { assert_eq!(TweetId::parse(":a34".to_string()), Err("invalid digit found in string".to_owned())); assert_eq!(TweetId::parse("asdf:34".to_string()), Err("Unrecognized id string: asdf:34".to_owned())); } + + #[test] + fn test_tweet_retrieval() { + let today = Local::now(); + let yesterday = today - chrono::Duration::days(1); + let tweets = vec![ + Tweet { + id: "manual_tweet_1".to_owned(), + author_id: "author_1".to_owned(), + text: "this is a test".to_owned(), + created_at: "1234 not real lol".to_owned(), + recieved_at: yesterday, + urls: HashMap::new(), + quoted_tweet_id: None, + rt_tweet: None, + reply_to_tweet: None, + internal_id: 0 + }, + Tweet { + id: "manual_tweet_2".to_owned(), + author_id: "author_1".to_owned(), + text: "this is a test".to_owned(), + created_at: "1234 not real lol".to_owned(), + recieved_at: today, + urls: HashMap::new(), + quoted_tweet_id: None, + rt_tweet: None, + reply_to_tweet: None, + internal_id: 0 + } + ]; + + let mut tweeter = TwitterCache::new(); + + for tweet in &tweets { + tweeter.number_and_insert_tweet(tweet.to_owned()); + } + + assert_eq!( + tweeter.retrieve_tweet(&TweetId::Twitter("manual_tweet_1".to_owned())).map(|x| x.id.to_owned()), + Some(tweets[0].clone()).map(|x| x.id) + ); + assert_eq!( + tweeter.retrieve_tweet(&TweetId::Today(0)).map(|x| x.id.to_owned()), + Some(tweets[1].clone()).map(|x| x.id) + ); + assert_eq!( + tweeter.retrieve_tweet(&TweetId::Dated(format!("{:04}{:02}{:02}", yesterday.year(), yesterday.month(), yesterday.day()), 0)).map(|x| x.id.to_owned()), + Some(tweets[0].clone()).map(|x| x.id) + ); + } } impl TweetId { @@ -230,18 +286,55 @@ impl IdConversions { // // except in the TweetId::Twitter case we TweetId -> Option -> Option ... to -> // Option in the future? - fn to_inner_id(&self, tweeter: &TwitterCache, twid: TweetId, display_info: &mut DisplayInfo) -> Option { + fn to_twitter_id(&self, twid: TweetId) -> Option { match twid { TweetId::Today(num) => { - let first_for_today: u64 = 0; - Some(first_for_today + num) + let now = Local::now(); + let now_date_str = format!("{:04}{:02}{:02}", now.year(), now.month(), now.day()); + let tweet_id = self.tweets_by_date.get(&now_date_str).and_then(|x| x.get(&num)); + tweet_id.and_then(|x| self.id_to_tweet_id.get(x)).map(|x| x.to_owned()) }, TweetId::Dated(date, num) => { - let first_for_date: u64 = 0; - Some(first_for_date + num) + let tweet_id = self.tweets_by_date.get(&date).and_then(|x| x.get(&num)); + tweet_id.and_then(|x| self.id_to_tweet_id.get(x)).map(|x| x.to_owned()) }, - TweetId::Bare(num) => Some(num), - twid @ TweetId::Twitter(_) => tweeter.retrieve_tweet(&twid, display_info).map(|x| x.internal_id) + TweetId::Bare(num) => self.id_to_tweet_id.get(&num).map(|x| x.to_owned()), + TweetId::Twitter(id) => Some(id) + } + } + + fn to_display_id(&self, twid: &TweetId, tweeter: &TwitterCache) -> TweetId { + match twid { + id @ &TweetId::Today(_) => id.to_owned(), + id @ &TweetId::Dated(_, _) => { + tweeter.retrieve_tweet(id).map(|x| TweetId::Bare(x.internal_id)).unwrap_or(id.to_owned()) + }, + id @ &TweetId::Bare(_) => { + tweeter.retrieve_tweet(id).and_then(|tweet| { + let now = Local::now(); + let tweet_date = tweet.recieved_at.with_timezone(&now.timezone()); + if now.year() == tweet_date.year() && now.month() == tweet_date.month() && now.day() == tweet_date.day() { + let date_string = format!("{:04}{:02}{:02}", tweet.recieved_at.year(), tweet.recieved_at.month(), tweet.recieved_at.day()); + let today_id = self.tweets_by_date_and_tweet_id.get(&date_string).and_then(|m| m.get(&tweet.internal_id)); + today_id.map(|x| TweetId::Today(*x)) + } else { + None + } + }).unwrap_or(id.to_owned()) + }, + id @ &TweetId::Twitter(_) => { + tweeter.retrieve_tweet(id).and_then(|tweet| { + let now = Local::now(); + let tweet_date = tweet.recieved_at.with_timezone(&now.timezone()); + if now.year() == tweet_date.year() && now.month() == tweet_date.month() && now.day() == tweet_date.day() { + let date_string = format!("{:04}{:02}{:02}", tweet.recieved_at.year(), tweet.recieved_at.month(), tweet.recieved_at.day()); + let today_id = self.tweets_by_date_and_tweet_id.get(&date_string).and_then(|m| m.get(&tweet.internal_id)); + today_id.map(|x| TweetId::Today(*x)) + } else { + None + } + }).unwrap_or(id.to_owned()) + } } } } @@ -500,10 +593,44 @@ impl TwitterCache { if tw.internal_id == 0 { tw.internal_id = (self.tweets.len() as u64) + 1; self.id_conversions.id_to_tweet_id.insert(tw.internal_id, tw.id.to_owned()); + let local_recv_time = tw.recieved_at.with_timezone(&Local::now().timezone()); + let tweet_date = format!("{:04}{:02}{:02}", local_recv_time.year(), local_recv_time.month(), local_recv_time.day()); + if !self.id_conversions.tweets_by_date.contains_key(&tweet_date) { + self.id_conversions.tweets_by_date.insert(tweet_date.clone(), HashMap::new()); + } + if !self.id_conversions.tweets_by_date_and_tweet_id.contains_key(&tweet_date) { + self.id_conversions.tweets_by_date_and_tweet_id.insert(tweet_date.clone(), HashMap::new()); + } + + let date_map: &mut HashMap = self.id_conversions.tweets_by_date.get_mut(&tweet_date).unwrap(); + let next_idx = date_map.len() as u64; + date_map.insert(next_idx, tw.internal_id); + + let date_map: &mut HashMap = self.id_conversions.tweets_by_date_and_tweet_id.get_mut(&tweet_date).unwrap(); + let next_idx = date_map.len() as u64; + date_map.insert(tw.internal_id, next_idx); + self.tweets.insert(tw.id.to_owned(), tw); } } } + pub fn display_id_for_tweet(&self, tweet: &Tweet) -> TweetId { + let now = Local::now(); + let tweet_date = tweet.recieved_at.with_timezone(&now.timezone()); + let bare_id = TweetId::Bare(tweet.internal_id); + let maybe_dated_id = if now.year() == tweet_date.year() && now.month() == tweet_date.month() && now.day() == tweet_date.day() { + let date_string = format!("{:04}{:02}{:02}", tweet.recieved_at.year(), tweet.recieved_at.month(), tweet.recieved_at.day()); + let today_id = self.id_conversions.tweets_by_date_and_tweet_id.get(&date_string).and_then(|m| m.get(&tweet.internal_id)); + today_id.map(|x| TweetId::Today(*x)) + } else { + None + }; + + maybe_dated_id.unwrap_or(bare_id) + } + pub fn display_id_for_tweet_id(&self, twid: &TweetId) -> TweetId { + self.id_conversions.to_display_id(twid, self) + } pub fn load_cache(display_info: &mut DisplayInfo) -> TwitterCache { if Path::new(TwitterCache::PROFILE_CACHE).is_file() { let mut buf = vec![]; @@ -641,54 +768,23 @@ impl TwitterCache { /* nothing else to care about now, i think? */ } } - pub fn retrieve_tweet(&self, tweet_id: &TweetId, display_info: &mut DisplayInfo) -> Option<&Tweet> { - match tweet_id { - &TweetId::Bare(ref id) => { - let maybe_tweet_id = self.id_conversions.id_to_tweet_id.get(id); - match maybe_tweet_id { - Some(id) => self.tweets.get(id), - None => None - } - }, - &TweetId::Today(ref id) => { - let inner_id = self.id_conversions.id_to_tweet_id.get(id); - display_info.status("Retrieving tweets with dated IDs is not yet supported.".to_string()); - None - }, - &TweetId::Dated(ref date, ref id) => { - display_info.status("Retrieving tweets with dated IDs is not yet supported.".to_string()); - None - }, - &TweetId::Twitter(ref id) => self.tweets.get(id) - } + pub fn retrieve_tweet(&self, tweet_id: &TweetId) -> Option<&Tweet> { + let maybe_tweet_id = self.id_conversions.to_twitter_id(tweet_id.to_owned()); + maybe_tweet_id.and_then(|id| self.tweets.get(&id)) } pub fn retrieve_user(&self, user_id: &String) -> Option<&User> { self.users.get(user_id) } pub fn fetch_tweet(&mut self, tweet_id: &TweetId, mut queryer: &mut ::Queryer, display_info: &mut DisplayInfo) -> Option<&Tweet> { - match tweet_id { - &TweetId::Bare(ref id) => { - // we can do nothing but just try to get it - self.retrieve_tweet(tweet_id, display_info) - } - &TweetId::Today(ref id) => { - // we can do nothing but just try to get it - self.retrieve_tweet(tweet_id, display_info) - }, - &TweetId::Dated(ref date, ref id) => { - // we can do nothing but just try to get it - self.retrieve_tweet(tweet_id, display_info) - }, - &TweetId::Twitter(ref id) => { - if !self.tweets.contains_key(id) { - match self.look_up_tweet(id, &mut queryer) { - Ok(json) => self.cache_api_tweet(json), - Err(e) => display_info.status(format!("Unable to retrieve tweet {}:\n{}", id, e)) - }; - } - self.retrieve_tweet(tweet_id, display_info) + if let &TweetId::Twitter(ref id) = tweet_id { + if !self.tweets.contains_key(id) { + match self.look_up_tweet(id, &mut queryer) { + Ok(json) => self.cache_api_tweet(json), + Err(e) => display_info.status(format!("Unable to retrieve tweet {}:\n{}", id, e)) + }; } } + self.retrieve_tweet(tweet_id) } pub fn fetch_user(&mut self, user_id: &String, mut queryer: &mut ::Queryer, display_info: &mut DisplayInfo) -> Option<&User> { if !self.users.contains_key(user_id) { diff --git a/src/tw/tweet.rs b/src/tw/tweet.rs index 38b838d..2ca32c3 100644 --- a/src/tw/tweet.rs +++ b/src/tw/tweet.rs @@ -1,5 +1,7 @@ extern crate serde_json; +use chrono::prelude::*; + use std::collections::HashMap; use tw::user::User; @@ -10,6 +12,8 @@ pub struct Tweet { pub author_id: String, pub text: String, pub created_at: String, // lol + #[serde(default = "Utc::now")] + pub recieved_at: DateTime, #[serde(skip_serializing_if="HashMap::is_empty")] #[serde(default = "HashMap::default")] pub urls: HashMap, @@ -84,6 +88,7 @@ impl Tweet { author_id: author_id.to_owned(), text: text, created_at: created_at.to_owned(), + recieved_at: Utc::now(), urls: url_map, quoted_tweet_id: json_map.get("quoted_status_id_str") .and_then(|x| x.as_str()) -- cgit v1.1