use crate::{ Result, db::Channel, AdapterClient, }; use reqwest::Url; use chrono::{DateTime, Utc}; use std::hash::{Hash, Hasher}; pub struct FetchedRSSItem { guid: String, title: String, description: String, content: String, } impl FetchedRSSItem { pub fn guid(&self) -> &str { &self.guid } pub fn title(&self) -> &str { &self.title } pub fn description(&self) -> &str { &self.description } pub fn content(&self) -> &str { &self.content } fn parse(item: rss::Item) -> Self { FetchedRSSItem { guid: Self::get_or_create_guid(&item), title: item.title().unwrap_or("").to_string(), description: item.description().unwrap_or("").to_string(), content: item.content().unwrap_or("").to_string(), } } fn get_or_create_guid(item: &rss::Item) -> String { if let Some(guid) = item.guid() { return guid.value().to_string(); } let mut hasher = std::collections::hash_map::DefaultHasher::new(); item.link().unwrap_or("").hash(&mut hasher); item.title().unwrap_or("").hash(&mut hasher); item.description().unwrap_or("").hash(&mut hasher); format!("gen-{:x}", hasher.finish()) } } pub struct FetchedRSSChannel { title: String, link: Url, description: String, items: Vec, fetched_at: DateTime, } impl FetchedRSSChannel { pub fn title(&self) -> &str { &self.title } pub fn link(&self) -> &Url { &self.link } pub fn description(&self) -> &str { &self.description } pub fn items(&self) -> &[FetchedRSSItem] { &self.items } pub fn fetched_at(&self) -> &DateTime { &self.fetched_at } pub async fn fetch_channel( client: &AdapterClient, channel: Channel ) -> Result> { if channel.should_skip_fetch() { return Ok(None); } let bytestream = client.0.get(channel.link().clone()) .send().await? .bytes().await?; let rss_channel = rss::Channel::read_from(&bytestream[..])?; let now = Utc::now(); Ok(Some(FetchedRSSChannel::parse(rss_channel, now)?)) } fn parse(rss: rss::Channel, fetched_at: DateTime) -> Result { Ok(FetchedRSSChannel { title: rss.title, link: Url::parse(&rss.link)?, description: rss.description, items: rss.items.into_iter().map(FetchedRSSItem::parse).collect(), fetched_at: fetched_at, }) } } #[cfg(test)] mod tests { use super::*; use crate::test_utils::{ ITEM_TITLE, ITEM_GUID, ITEM_GUID2, ITEM_DESC, ITEM_CONT, CHANNEL_TITLE, CHANNEL_DESC, FEED1 }; use chrono::TimeZone; fn create_guid(value: String) -> rss::Guid { rss::Guid { value, permalink: false } } fn create_item(guid: rss::Guid) -> rss::Item { rss::ItemBuilder::default() .title(ITEM_TITLE.to_string()) .guid(guid) .description(ITEM_DESC.to_string()) .content(ITEM_CONT.to_string()) .build() } fn create_channel(items: Vec) -> rss::Channel { rss::ChannelBuilder::default() .title(CHANNEL_TITLE.to_string()) .description(CHANNEL_DESC.to_string()) .link(FEED1.to_string()) .items(items) .build() } #[test] fn parse_item() { let rss_guid = create_guid(ITEM_GUID.to_string()); let rss_item = create_item(rss_guid); let item = FetchedRSSItem::parse(rss_item); assert_eq!(item.guid, ITEM_GUID); assert_eq!(item.title, ITEM_TITLE); assert_eq!(item.description, ITEM_DESC); assert_eq!(item.content, ITEM_CONT); } #[test] fn parse_feed() { let rss_guid = create_guid(ITEM_GUID.to_string()); let rss_guid2 = create_guid(ITEM_GUID2.to_string()); let rss_item = create_item(rss_guid); let rss_item2 = create_item(rss_guid2); let rss_channel = create_channel([rss_item, rss_item2].to_vec()); let date: DateTime = Utc.with_ymd_and_hms(2020,1,1,0,0,0).unwrap(); let channel = FetchedRSSChannel::parse(rss_channel, date).unwrap(); assert_eq!(channel.title, CHANNEL_TITLE); assert_eq!(channel.link.as_str(), FEED1); assert_eq!(channel.description, CHANNEL_DESC); assert_eq!(channel.fetched_at, date); assert_eq!(channel.items.len(), 2); assert!(channel.items.iter().any(|i| i.guid() == ITEM_GUID)); assert!(channel.items.iter().any(|i| i.guid() == ITEM_GUID2)); } }