Creates a fetch mod for fetching remote rss content. This implementation is barebones and not great since it doesn't pass any compression or timestamp information.
151 lines
4.1 KiB
Rust
151 lines
4.1 KiB
Rust
use crate::{
|
|
Result,
|
|
db::Channel,
|
|
AdapterClient,
|
|
};
|
|
use reqwest::Url;
|
|
use chrono::{DateTime, Utc};
|
|
use std::hash::{Hash, Hasher};
|
|
|
|
pub struct FetchedRSSItem {
|
|
guid: String,
|
|
title: String,
|
|
description: String,
|
|
content: String,
|
|
}
|
|
impl FetchedRSSItem {
|
|
pub fn guid(&self) -> &str { &self.guid }
|
|
pub fn title(&self) -> &str { &self.title }
|
|
pub fn description(&self) -> &str { &self.description }
|
|
pub fn content(&self) -> &str { &self.content }
|
|
|
|
fn parse(item: rss::Item) -> Self {
|
|
FetchedRSSItem {
|
|
guid: Self::get_or_create_guid(&item),
|
|
title: item.title().unwrap_or("").to_string(),
|
|
description: item.description().unwrap_or("").to_string(),
|
|
content: item.content().unwrap_or("").to_string(),
|
|
}
|
|
}
|
|
|
|
fn get_or_create_guid(item: &rss::Item) -> String {
|
|
if let Some(guid) = item.guid() {
|
|
return guid.value().to_string();
|
|
}
|
|
|
|
let mut hasher = std::collections::hash_map::DefaultHasher::new();
|
|
item.link().unwrap_or("").hash(&mut hasher);
|
|
item.title().unwrap_or("").hash(&mut hasher);
|
|
item.description().unwrap_or("").hash(&mut hasher);
|
|
|
|
format!("gen-{:x}", hasher.finish())
|
|
}
|
|
}
|
|
pub struct FetchedRSSChannel {
|
|
title: String,
|
|
link: Url,
|
|
description: String,
|
|
|
|
items: Vec<FetchedRSSItem>,
|
|
|
|
fetched_at: DateTime<Utc>,
|
|
}
|
|
impl FetchedRSSChannel {
|
|
pub fn title(&self) -> &str { &self.title }
|
|
pub fn link(&self) -> &Url { &self.link }
|
|
pub fn description(&self) -> &str { &self.description }
|
|
pub fn items(&self) -> &[FetchedRSSItem] { &self.items }
|
|
pub fn fetched_at(&self) -> &DateTime<Utc> { &self.fetched_at }
|
|
|
|
pub async fn fetch_channel(
|
|
client: &AdapterClient, channel: Channel
|
|
) -> Result<Option<Self>> {
|
|
let bytestream = client.0.get(channel.link().clone())
|
|
.send().await?
|
|
.bytes().await?;
|
|
|
|
let rss_channel = rss::Channel::read_from(&bytestream[..])?;
|
|
|
|
let now = Utc::now();
|
|
|
|
Ok(Some(FetchedRSSChannel::parse(rss_channel, now)?))
|
|
}
|
|
|
|
fn parse(rss: rss::Channel, fetched_at: DateTime<Utc>) -> Result<Self> {
|
|
Ok(FetchedRSSChannel {
|
|
title: rss.title,
|
|
link: Url::parse(&rss.link)?,
|
|
description: rss.description,
|
|
|
|
items: rss.items.into_iter().map(FetchedRSSItem::parse).collect(),
|
|
|
|
fetched_at: fetched_at,
|
|
})
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::test_utils::{
|
|
ITEM_TITLE, ITEM_GUID, ITEM_GUID2, ITEM_DESC, ITEM_CONT,
|
|
CHANNEL_TITLE, CHANNEL_DESC, FEED1,
|
|
get_datetime
|
|
};
|
|
|
|
fn create_guid(value: String) -> rss::Guid {
|
|
rss::Guid { value, permalink: false }
|
|
}
|
|
|
|
fn create_item(guid: rss::Guid) -> rss::Item {
|
|
rss::ItemBuilder::default()
|
|
.title(ITEM_TITLE.to_string())
|
|
.guid(guid)
|
|
.description(ITEM_DESC.to_string())
|
|
.content(ITEM_CONT.to_string())
|
|
.build()
|
|
}
|
|
|
|
fn create_channel(items: Vec<rss::Item>) -> rss::Channel {
|
|
rss::ChannelBuilder::default()
|
|
.title(CHANNEL_TITLE.to_string())
|
|
.description(CHANNEL_DESC.to_string())
|
|
.link(FEED1.to_string())
|
|
.items(items)
|
|
.build()
|
|
}
|
|
|
|
#[test]
|
|
fn parse_item() {
|
|
let rss_guid = create_guid(ITEM_GUID.to_string());
|
|
let rss_item = create_item(rss_guid);
|
|
let item = FetchedRSSItem::parse(rss_item);
|
|
|
|
assert_eq!(item.guid, ITEM_GUID);
|
|
assert_eq!(item.title, ITEM_TITLE);
|
|
assert_eq!(item.description, ITEM_DESC);
|
|
assert_eq!(item.content, ITEM_CONT);
|
|
}
|
|
|
|
#[test]
|
|
fn parse_feed() {
|
|
let rss_guid = create_guid(ITEM_GUID.to_string());
|
|
let rss_guid2 = create_guid(ITEM_GUID2.to_string());
|
|
let rss_item = create_item(rss_guid);
|
|
let rss_item2 = create_item(rss_guid2);
|
|
|
|
let rss_channel = create_channel([rss_item, rss_item2].to_vec());
|
|
|
|
let date: DateTime<Utc> = get_datetime();
|
|
|
|
let channel = FetchedRSSChannel::parse(rss_channel, date).unwrap();
|
|
|
|
assert_eq!(channel.title, CHANNEL_TITLE);
|
|
assert_eq!(channel.link.as_str(), FEED1);
|
|
assert_eq!(channel.description, CHANNEL_DESC);
|
|
assert_eq!(channel.fetched_at, date);
|
|
assert_eq!(channel.items.len(), 2);
|
|
assert!(channel.items.iter().any(|i| i.guid() == ITEM_GUID));
|
|
assert!(channel.items.iter().any(|i| i.guid() == ITEM_GUID2));
|
|
}
|
|
}
|