From 4467690ff19088f2c3b215623a3d34c2a2d6ec7a Mon Sep 17 00:00:00 2001 From: Julia Lange Date: Fri, 6 Feb 2026 13:18:26 -0800 Subject: [PATCH] fetch, create fetch mod and AdapterClient Creates a fetch mod for fetching remote rss content. This implementation is barebones and not great since it doesn't pass any compression or timestamp information. --- koucha/Cargo.lock | 129 +++++++++++++++++++++++++++++++++++++ koucha/Cargo.toml | 1 + koucha/src/fetch.rs | 151 ++++++++++++++++++++++++++++++++++++++++++++ koucha/src/lib.rs | 8 ++- 4 files changed, 288 insertions(+), 1 deletion(-) create mode 100644 koucha/src/fetch.rs diff --git a/koucha/Cargo.lock b/koucha/Cargo.lock index 4849805..c9fd8ab 100644 --- a/koucha/Cargo.lock +++ b/koucha/Cargo.lock @@ -26,6 +26,19 @@ dependencies = [ "num-traits", ] +[[package]] +name = "atom_syndication" +version = "0.12.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2f68d23e2cb4fd958c705b91a6b4c80ceeaf27a9e11651272a8389d5ce1a4a3" +dependencies = [ + "chrono", + "derive_builder", + "diligent-date-parser", + "never", + "quick-xml", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -260,6 +273,41 @@ dependencies = [ "typenum", ] +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core", + "quote", + "syn", +] + [[package]] name = "der" version = "0.7.10" @@ -271,6 +319,37 @@ dependencies = [ "zeroize", ] +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn", +] + [[package]] name = "digest" version = "0.10.7" @@ -283,6 +362,15 @@ dependencies = [ "subtle", ] +[[package]] +name = "diligent-date-parser" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ede7d79366f419921e2e2f67889c12125726692a313bffb474bd5f37a581e9" +dependencies = [ + "chrono", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -806,6 +894,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.1.0" @@ -907,6 +1001,7 @@ version = "0.1.0" dependencies = [ "chrono", "reqwest", + "rss", "sqlx", "tokio", ] @@ -1014,6 +1109,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "never" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c96aba5aa877601bb3f6dd6a63a969e1f82e60646e81e71b14496995e9853c91" + [[package]] name = "num-bigint-dig" version = "0.8.6" @@ -1182,6 +1283,16 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "quick-xml" +version = "0.37.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" +dependencies = [ + "encoding_rs", + "memchr", +] + [[package]] name = "quinn" version = "0.11.9" @@ -1402,6 +1513,18 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rss" +version = "2.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2107738f003660f0a91f56fd3e3bd3ab5d918b2ddaf1e1ec2136fb1c46f71bf" +dependencies = [ + "atom_syndication", + "derive_builder", + "never", + "quick-xml", +] + [[package]] name = "rustc-hash" version = "2.1.1" @@ -1894,6 +2017,12 @@ dependencies = [ "unicode-properties", ] +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "subtle" version = "2.6.1" diff --git a/koucha/Cargo.toml b/koucha/Cargo.toml index 7a97cef..1106901 100644 --- a/koucha/Cargo.toml +++ b/koucha/Cargo.toml @@ -5,6 +5,7 @@ edition = "2024" [dependencies] reqwest = "0.13.1" +rss = "2.0.12" tokio = { version = "1.49.0", features = ["full"] } sqlx = { version = "0.8.6", features = [ "runtime-tokio", "sqlite" ] } chrono = "0.4.43" diff --git a/koucha/src/fetch.rs b/koucha/src/fetch.rs new file mode 100644 index 0000000..53ba2b9 --- /dev/null +++ b/koucha/src/fetch.rs @@ -0,0 +1,151 @@ +use crate::{ + Result, + db::Channel, + AdapterClient, +}; +use reqwest::Url; +use chrono::{DateTime, Utc}; +use std::hash::{Hash, Hasher}; + +pub struct FetchedRSSItem { + guid: String, + title: String, + description: String, + content: String, +} +impl FetchedRSSItem { + pub fn guid(&self) -> &str { &self.guid } + pub fn title(&self) -> &str { &self.title } + pub fn description(&self) -> &str { &self.description } + pub fn content(&self) -> &str { &self.content } + + fn parse(item: rss::Item) -> Self { + FetchedRSSItem { + guid: Self::get_or_create_guid(&item), + title: item.title().unwrap_or("").to_string(), + description: item.description().unwrap_or("").to_string(), + content: item.content().unwrap_or("").to_string(), + } + } + + fn get_or_create_guid(item: &rss::Item) -> String { + if let Some(guid) = item.guid() { + return guid.value().to_string(); + } + + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + item.link().unwrap_or("").hash(&mut hasher); + item.title().unwrap_or("").hash(&mut hasher); + item.description().unwrap_or("").hash(&mut hasher); + + format!("gen-{:x}", hasher.finish()) + } +} +pub struct FetchedRSSChannel { + title: String, + link: Url, + description: String, + + items: Vec, + + fetched_at: DateTime, +} +impl FetchedRSSChannel { + pub fn title(&self) -> &str { &self.title } + pub fn link(&self) -> &Url { &self.link } + pub fn description(&self) -> &str { &self.description } + pub fn items(&self) -> &[FetchedRSSItem] { &self.items } + pub fn fetched_at(&self) -> &DateTime { &self.fetched_at } + + pub async fn fetch_channel( + client: &AdapterClient, channel: Channel + ) -> Result> { + let bytestream = client.0.get(channel.link().clone()) + .send().await? + .bytes().await?; + + let rss_channel = rss::Channel::read_from(&bytestream[..])?; + + let now = Utc::now(); + + Ok(Some(FetchedRSSChannel::parse(rss_channel, now)?)) + } + + fn parse(rss: rss::Channel, fetched_at: DateTime) -> Result { + Ok(FetchedRSSChannel { + title: rss.title, + link: Url::parse(&rss.link)?, + description: rss.description, + + items: rss.items.into_iter().map(FetchedRSSItem::parse).collect(), + + fetched_at: fetched_at, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test_utils::{ + ITEM_TITLE, ITEM_GUID, ITEM_GUID2, ITEM_DESC, ITEM_CONT, + CHANNEL_TITLE, CHANNEL_DESC, FEED1, + get_datetime + }; + + fn create_guid(value: String) -> rss::Guid { + rss::Guid { value, permalink: false } + } + + fn create_item(guid: rss::Guid) -> rss::Item { + rss::ItemBuilder::default() + .title(ITEM_TITLE.to_string()) + .guid(guid) + .description(ITEM_DESC.to_string()) + .content(ITEM_CONT.to_string()) + .build() + } + + fn create_channel(items: Vec) -> rss::Channel { + rss::ChannelBuilder::default() + .title(CHANNEL_TITLE.to_string()) + .description(CHANNEL_DESC.to_string()) + .link(FEED1.to_string()) + .items(items) + .build() + } + + #[test] + fn parse_item() { + let rss_guid = create_guid(ITEM_GUID.to_string()); + let rss_item = create_item(rss_guid); + let item = FetchedRSSItem::parse(rss_item); + + assert_eq!(item.guid, ITEM_GUID); + assert_eq!(item.title, ITEM_TITLE); + assert_eq!(item.description, ITEM_DESC); + assert_eq!(item.content, ITEM_CONT); + } + + #[test] + fn parse_feed() { + let rss_guid = create_guid(ITEM_GUID.to_string()); + let rss_guid2 = create_guid(ITEM_GUID2.to_string()); + let rss_item = create_item(rss_guid); + let rss_item2 = create_item(rss_guid2); + + let rss_channel = create_channel([rss_item, rss_item2].to_vec()); + + let date: DateTime = get_datetime(); + + let channel = FetchedRSSChannel::parse(rss_channel, date).unwrap(); + + assert_eq!(channel.title, CHANNEL_TITLE); + assert_eq!(channel.link.as_str(), FEED1); + assert_eq!(channel.description, CHANNEL_DESC); + assert_eq!(channel.fetched_at, date); + assert_eq!(channel.items.len(), 2); + assert!(channel.items.iter().any(|i| i.guid() == ITEM_GUID)); + assert!(channel.items.iter().any(|i| i.guid() == ITEM_GUID2)); + } +} diff --git a/koucha/src/lib.rs b/koucha/src/lib.rs index 5982237..a20a9dc 100644 --- a/koucha/src/lib.rs +++ b/koucha/src/lib.rs @@ -3,12 +3,15 @@ use std::error::Error; type Result = std::result::Result>; pub mod db; +pub mod fetch; pub mod score; #[cfg(test)] pub mod test_utils; pub struct AdapterPool(sqlx::SqlitePool); +pub struct AdapterClient(reqwest::Client); + pub struct AdapterBuilder { database_url: String, } @@ -29,15 +32,18 @@ impl AdapterBuilder { let db = sqlx::sqlite::SqlitePoolOptions::new() .connect(&self.database_url).await?; sqlx::migrate!().run(&db).await?; + let client = reqwest::Client::new(); - Ok(Adapter { db: AdapterPool(db) }) + Ok(Adapter { db: AdapterPool(db), client: AdapterClient(client) }) } } pub struct Adapter { db: AdapterPool, + client: AdapterClient, } impl Adapter { pub fn get_pool(&self) -> &AdapterPool { &self.db } + pub fn get_client(&self) -> &AdapterClient { &self.client } }