Koucha/koucha/src/fetch.rs

156 lines
4.2 KiB
Rust
Raw Normal View History

use crate::{
Result,
db::Channel,
AdapterClient,
};
use reqwest::Url;
use chrono::{DateTime, Utc};
use std::hash::{Hash, Hasher};
pub struct FetchedRSSItem {
guid: String,
title: String,
description: String,
content: String,
}
impl FetchedRSSItem {
pub fn guid(&self) -> &str { &self.guid }
pub fn title(&self) -> &str { &self.title }
pub fn description(&self) -> &str { &self.description }
pub fn content(&self) -> &str { &self.content }
fn parse(item: rss::Item) -> Self {
FetchedRSSItem {
guid: Self::get_or_create_guid(&item),
title: item.title().unwrap_or("").to_string(),
description: item.description().unwrap_or("").to_string(),
content: item.content().unwrap_or("").to_string(),
}
}
fn get_or_create_guid(item: &rss::Item) -> String {
if let Some(guid) = item.guid() {
return guid.value().to_string();
}
let mut hasher = std::collections::hash_map::DefaultHasher::new();
item.link().unwrap_or("").hash(&mut hasher);
item.title().unwrap_or("").hash(&mut hasher);
item.description().unwrap_or("").hash(&mut hasher);
format!("gen-{:x}", hasher.finish())
}
}
pub struct FetchedRSSChannel {
title: String,
link: Url,
description: String,
items: Vec<FetchedRSSItem>,
fetched_at: DateTime<Utc>,
}
impl FetchedRSSChannel {
pub fn title(&self) -> &str { &self.title }
pub fn link(&self) -> &Url { &self.link }
pub fn description(&self) -> &str { &self.description }
pub fn items(&self) -> &[FetchedRSSItem] { &self.items }
pub fn fetched_at(&self) -> &DateTime<Utc> { &self.fetched_at }
pub async fn fetch_channel(
client: &AdapterClient, channel: Channel
) -> Result<Option<Self>> {
if channel.should_skip_fetch() {
return Ok(None);
}
let bytestream = client.0.get(channel.link().clone())
.send().await?
.bytes().await?;
let rss_channel = rss::Channel::read_from(&bytestream[..])?;
2026-01-26 15:00:52 -08:00
let now = Utc::now();
Ok(Some(FetchedRSSChannel::parse(rss_channel, now)?))
}
2026-01-26 15:00:52 -08:00
fn parse(rss: rss::Channel, fetched_at: DateTime<Utc>) -> Result<Self> {
Ok(FetchedRSSChannel {
title: rss.title,
link: Url::parse(&rss.link)?,
description: rss.description,
items: rss.items.into_iter().map(FetchedRSSItem::parse).collect(),
2026-01-26 15:00:52 -08:00
fetched_at: fetched_at,
})
}
}
2026-01-26 15:00:52 -08:00
#[cfg(test)]
mod tests {
use super::*;
use crate::test_utils::{
ITEM_TITLE, ITEM_GUID, ITEM_GUID2, ITEM_DESC, ITEM_CONT,
CHANNEL_TITLE, CHANNEL_DESC, FEED1,
get_datetime
2026-01-26 15:00:52 -08:00
};
fn create_guid(value: String) -> rss::Guid {
rss::Guid { value, permalink: false }
}
fn create_item(guid: rss::Guid) -> rss::Item {
rss::ItemBuilder::default()
.title(ITEM_TITLE.to_string())
.guid(guid)
.description(ITEM_DESC.to_string())
.content(ITEM_CONT.to_string())
.build()
}
fn create_channel(items: Vec<rss::Item>) -> rss::Channel {
rss::ChannelBuilder::default()
.title(CHANNEL_TITLE.to_string())
.description(CHANNEL_DESC.to_string())
.link(FEED1.to_string())
.items(items)
.build()
}
#[test]
fn parse_item() {
let rss_guid = create_guid(ITEM_GUID.to_string());
let rss_item = create_item(rss_guid);
let item = FetchedRSSItem::parse(rss_item);
assert_eq!(item.guid, ITEM_GUID);
assert_eq!(item.title, ITEM_TITLE);
assert_eq!(item.description, ITEM_DESC);
assert_eq!(item.content, ITEM_CONT);
}
#[test]
fn parse_feed() {
let rss_guid = create_guid(ITEM_GUID.to_string());
let rss_guid2 = create_guid(ITEM_GUID2.to_string());
let rss_item = create_item(rss_guid);
let rss_item2 = create_item(rss_guid2);
let rss_channel = create_channel([rss_item, rss_item2].to_vec());
let date: DateTime<Utc> = get_datetime();
2026-01-26 15:00:52 -08:00
let channel = FetchedRSSChannel::parse(rss_channel, date).unwrap();
assert_eq!(channel.title, CHANNEL_TITLE);
assert_eq!(channel.link.as_str(), FEED1);
assert_eq!(channel.description, CHANNEL_DESC);
assert_eq!(channel.fetched_at, date);
assert_eq!(channel.items.len(), 2);
assert!(channel.items.iter().any(|i| i.guid() == ITEM_GUID));
assert!(channel.items.iter().any(|i| i.guid() == ITEM_GUID2));
}
}