fetch, create fetch mod and AdapterClient

Creates a fetch mod for fetching remote rss content.

This implementation is barebones and not great since it doesn't pass any
compression or timestamp information.
This commit is contained in:
Julia Lange 2026-02-06 13:18:26 -08:00
parent 55c3e967bc
commit 4467690ff1
Signed by: Julia
SSH key fingerprint: SHA256:5DJcfxa5/fKCYn57dcabJa2vN2e6eT0pBerYi5SUbto
4 changed files with 288 additions and 1 deletions

129
koucha/Cargo.lock generated
View file

@ -26,6 +26,19 @@ dependencies = [
"num-traits",
]
[[package]]
name = "atom_syndication"
version = "0.12.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2f68d23e2cb4fd958c705b91a6b4c80ceeaf27a9e11651272a8389d5ce1a4a3"
dependencies = [
"chrono",
"derive_builder",
"diligent-date-parser",
"never",
"quick-xml",
]
[[package]]
name = "atomic-waker"
version = "1.1.2"
@ -260,6 +273,41 @@ dependencies = [
"typenum",
]
[[package]]
name = "darling"
version = "0.20.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
dependencies = [
"darling_core",
"darling_macro",
]
[[package]]
name = "darling_core"
version = "0.20.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e"
dependencies = [
"fnv",
"ident_case",
"proc-macro2",
"quote",
"strsim",
"syn",
]
[[package]]
name = "darling_macro"
version = "0.20.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
dependencies = [
"darling_core",
"quote",
"syn",
]
[[package]]
name = "der"
version = "0.7.10"
@ -271,6 +319,37 @@ dependencies = [
"zeroize",
]
[[package]]
name = "derive_builder"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
dependencies = [
"derive_builder_macro",
]
[[package]]
name = "derive_builder_core"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
dependencies = [
"darling",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "derive_builder_macro"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
dependencies = [
"derive_builder_core",
"syn",
]
[[package]]
name = "digest"
version = "0.10.7"
@ -283,6 +362,15 @@ dependencies = [
"subtle",
]
[[package]]
name = "diligent-date-parser"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8ede7d79366f419921e2e2f67889c12125726692a313bffb474bd5f37a581e9"
dependencies = [
"chrono",
]
[[package]]
name = "displaydoc"
version = "0.2.5"
@ -806,6 +894,12 @@ dependencies = [
"zerovec",
]
[[package]]
name = "ident_case"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "idna"
version = "1.1.0"
@ -907,6 +1001,7 @@ version = "0.1.0"
dependencies = [
"chrono",
"reqwest",
"rss",
"sqlx",
"tokio",
]
@ -1014,6 +1109,12 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "never"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c96aba5aa877601bb3f6dd6a63a969e1f82e60646e81e71b14496995e9853c91"
[[package]]
name = "num-bigint-dig"
version = "0.8.6"
@ -1182,6 +1283,16 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "quick-xml"
version = "0.37.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb"
dependencies = [
"encoding_rs",
"memchr",
]
[[package]]
name = "quinn"
version = "0.11.9"
@ -1402,6 +1513,18 @@ dependencies = [
"zeroize",
]
[[package]]
name = "rss"
version = "2.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2107738f003660f0a91f56fd3e3bd3ab5d918b2ddaf1e1ec2136fb1c46f71bf"
dependencies = [
"atom_syndication",
"derive_builder",
"never",
"quick-xml",
]
[[package]]
name = "rustc-hash"
version = "2.1.1"
@ -1894,6 +2017,12 @@ dependencies = [
"unicode-properties",
]
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "subtle"
version = "2.6.1"

View file

@ -5,6 +5,7 @@ edition = "2024"
[dependencies]
reqwest = "0.13.1"
rss = "2.0.12"
tokio = { version = "1.49.0", features = ["full"] }
sqlx = { version = "0.8.6", features = [ "runtime-tokio", "sqlite" ] }
chrono = "0.4.43"

151
koucha/src/fetch.rs Normal file
View file

@ -0,0 +1,151 @@
use crate::{
Result,
db::Channel,
AdapterClient,
};
use reqwest::Url;
use chrono::{DateTime, Utc};
use std::hash::{Hash, Hasher};
pub struct FetchedRSSItem {
guid: String,
title: String,
description: String,
content: String,
}
impl FetchedRSSItem {
pub fn guid(&self) -> &str { &self.guid }
pub fn title(&self) -> &str { &self.title }
pub fn description(&self) -> &str { &self.description }
pub fn content(&self) -> &str { &self.content }
fn parse(item: rss::Item) -> Self {
FetchedRSSItem {
guid: Self::get_or_create_guid(&item),
title: item.title().unwrap_or("").to_string(),
description: item.description().unwrap_or("").to_string(),
content: item.content().unwrap_or("").to_string(),
}
}
fn get_or_create_guid(item: &rss::Item) -> String {
if let Some(guid) = item.guid() {
return guid.value().to_string();
}
let mut hasher = std::collections::hash_map::DefaultHasher::new();
item.link().unwrap_or("").hash(&mut hasher);
item.title().unwrap_or("").hash(&mut hasher);
item.description().unwrap_or("").hash(&mut hasher);
format!("gen-{:x}", hasher.finish())
}
}
pub struct FetchedRSSChannel {
title: String,
link: Url,
description: String,
items: Vec<FetchedRSSItem>,
fetched_at: DateTime<Utc>,
}
impl FetchedRSSChannel {
pub fn title(&self) -> &str { &self.title }
pub fn link(&self) -> &Url { &self.link }
pub fn description(&self) -> &str { &self.description }
pub fn items(&self) -> &[FetchedRSSItem] { &self.items }
pub fn fetched_at(&self) -> &DateTime<Utc> { &self.fetched_at }
pub async fn fetch_channel(
client: &AdapterClient, channel: Channel
) -> Result<Option<Self>> {
let bytestream = client.0.get(channel.link().clone())
.send().await?
.bytes().await?;
let rss_channel = rss::Channel::read_from(&bytestream[..])?;
let now = Utc::now();
Ok(Some(FetchedRSSChannel::parse(rss_channel, now)?))
}
fn parse(rss: rss::Channel, fetched_at: DateTime<Utc>) -> Result<Self> {
Ok(FetchedRSSChannel {
title: rss.title,
link: Url::parse(&rss.link)?,
description: rss.description,
items: rss.items.into_iter().map(FetchedRSSItem::parse).collect(),
fetched_at: fetched_at,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::test_utils::{
ITEM_TITLE, ITEM_GUID, ITEM_GUID2, ITEM_DESC, ITEM_CONT,
CHANNEL_TITLE, CHANNEL_DESC, FEED1,
get_datetime
};
fn create_guid(value: String) -> rss::Guid {
rss::Guid { value, permalink: false }
}
fn create_item(guid: rss::Guid) -> rss::Item {
rss::ItemBuilder::default()
.title(ITEM_TITLE.to_string())
.guid(guid)
.description(ITEM_DESC.to_string())
.content(ITEM_CONT.to_string())
.build()
}
fn create_channel(items: Vec<rss::Item>) -> rss::Channel {
rss::ChannelBuilder::default()
.title(CHANNEL_TITLE.to_string())
.description(CHANNEL_DESC.to_string())
.link(FEED1.to_string())
.items(items)
.build()
}
#[test]
fn parse_item() {
let rss_guid = create_guid(ITEM_GUID.to_string());
let rss_item = create_item(rss_guid);
let item = FetchedRSSItem::parse(rss_item);
assert_eq!(item.guid, ITEM_GUID);
assert_eq!(item.title, ITEM_TITLE);
assert_eq!(item.description, ITEM_DESC);
assert_eq!(item.content, ITEM_CONT);
}
#[test]
fn parse_feed() {
let rss_guid = create_guid(ITEM_GUID.to_string());
let rss_guid2 = create_guid(ITEM_GUID2.to_string());
let rss_item = create_item(rss_guid);
let rss_item2 = create_item(rss_guid2);
let rss_channel = create_channel([rss_item, rss_item2].to_vec());
let date: DateTime<Utc> = get_datetime();
let channel = FetchedRSSChannel::parse(rss_channel, date).unwrap();
assert_eq!(channel.title, CHANNEL_TITLE);
assert_eq!(channel.link.as_str(), FEED1);
assert_eq!(channel.description, CHANNEL_DESC);
assert_eq!(channel.fetched_at, date);
assert_eq!(channel.items.len(), 2);
assert!(channel.items.iter().any(|i| i.guid() == ITEM_GUID));
assert!(channel.items.iter().any(|i| i.guid() == ITEM_GUID2));
}
}

View file

@ -3,12 +3,15 @@ use std::error::Error;
type Result<T> = std::result::Result<T, Box<dyn Error>>;
pub mod db;
pub mod fetch;
pub mod score;
#[cfg(test)]
pub mod test_utils;
pub struct AdapterPool(sqlx::SqlitePool);
pub struct AdapterClient(reqwest::Client);
pub struct AdapterBuilder {
database_url: String,
}
@ -29,15 +32,18 @@ impl AdapterBuilder {
let db = sqlx::sqlite::SqlitePoolOptions::new()
.connect(&self.database_url).await?;
sqlx::migrate!().run(&db).await?;
let client = reqwest::Client::new();
Ok(Adapter { db: AdapterPool(db) })
Ok(Adapter { db: AdapterPool(db), client: AdapterClient(client) })
}
}
pub struct Adapter {
db: AdapterPool,
client: AdapterClient,
}
impl Adapter {
pub fn get_pool(&self) -> &AdapterPool { &self.db }
pub fn get_client(&self) -> &AdapterClient { &self.client }
}