From 7bb4cf4230e77736180ae6afcbb0b13120536594 Mon Sep 17 00:00:00 2001 From: Julia Lange Date: Thu, 22 Jan 2026 10:39:38 -0800 Subject: [PATCH] Got the structure; got lots of tests; item still needs work --- koucha/src/channel.rs | 326 +++++++++++++++++++++++++++--------- koucha/src/channel/fetch.rs | 67 ++++++++ koucha/src/feed.rs | 12 +- koucha/src/item.rs | 19 ++- koucha/src/user.rs | 2 +- 5 files changed, 338 insertions(+), 88 deletions(-) create mode 100644 koucha/src/channel/fetch.rs diff --git a/koucha/src/channel.rs b/koucha/src/channel.rs index 6d130ff..b95b0b8 100644 --- a/koucha/src/channel.rs +++ b/koucha/src/channel.rs @@ -1,69 +1,18 @@ use reqwest::{Url, Client}; use sqlx::SqlitePool; use chrono::{DateTime, Utc}; -use crate::{Result}; -use std::hash::{Hash, Hasher}; +use crate::{ + Result, + Item, + channel::fetch::FetchedRSSChannel, + item::UnparsedItem, +}; -pub struct ChannelId(pub i64); -impl From for ChannelId { fn from(id: i64) -> Self { ChannelId(id) } } +#[derive(Copy, Clone)] +pub struct ChannelId(i64); impl From for i64 { fn from(id: ChannelId) -> Self { id.0 } } - -struct FetchedRSSItem { - guid: String, - title: String, - description: String, - content: String, -} -impl FetchedRSSItem { - fn parse(item: rss::Item) -> Self { - FetchedRSSItem { - guid: Self::get_or_create_guid(&item), - title: item.title().unwrap_or("").to_string(), - description: item.description().unwrap_or("").to_string(), - content: item.content().unwrap_or("").to_string(), - } - } - - fn get_or_create_guid(item: &rss::Item) -> String { - if let Some(guid) = item.guid() { - return guid.value().to_string(); - } - - let mut hasher = std::collections::hash_map::DefaultHasher::new(); - item.link().unwrap_or("").hash(&mut hasher); - item.title().unwrap_or("").hash(&mut hasher); - item.description().unwrap_or("").hash(&mut hasher); - - format!("gen-{:x}", hasher.finish()) - } -} -pub struct FetchedRSSChannel { - title: String, - link: Url, - description: String, - - items: Vec, - - fetched_at: DateTime, -} -impl FetchedRSSChannel { - pub fn fetched_at(&self) -> &DateTime { - &self.fetched_at - } - - fn parse(rss: rss::Channel) -> Result { - Ok(FetchedRSSChannel { - title: rss.title, - link: Url::parse(&rss.link)?, - description: rss.description, - - items: rss.items.into_iter().map(FetchedRSSItem::parse).collect(), - - fetched_at: Utc::now(), - }) - } -} +pub mod fetch; pub struct UnparsedChannel { pub id: i64, @@ -88,38 +37,77 @@ impl UnparsedChannel { } pub struct Channel { - pub id: ChannelId, - pub title: String, - pub link: Url, - pub description: Option, - pub last_fetched: Option>, + id: ChannelId, + title: String, + link: Url, + description: Option, + last_fetched: Option>, } impl Channel { - + pub fn id(&self) -> ChannelId { self.id } + pub fn title(&self) -> &str { &self.title } + pub fn link(&self) -> &Url { &self.link } + pub fn description(&self) -> Option<&str> { self.description.as_deref() } pub async fn get_all(pool: &SqlitePool) -> Result> { let channels: Result> = sqlx::query_as!( UnparsedChannel, "SELECT id, title, link, description, last_fetched FROM channels" - ) - .fetch_all(pool).await?.into_iter() - .map(UnparsedChannel::parse).collect(); + ).fetch_all(pool).await?.into_iter().map(UnparsedChannel::parse).collect(); channels } + pub async fn get(pool: &SqlitePool, id: ChannelId) -> Result { + let channel: Result = sqlx::query_as!( + UnparsedChannel, + "SELECT id, title, link, description, last_fetched + FROM channels + WHERE id = ?", + id.0 + ).fetch_one(pool).await?.parse(); + + channel + } + + pub async fn create( + pool: &SqlitePool, link: Url + ) -> Result { + let link_str = link.as_str(); + + if let Ok(existing_channel) = sqlx::query_as!( + UnparsedChannel, + "SELECT id as `id!`, title, link, description, last_fetched + FROM channels + WHERE link = ?", + link_str + ).fetch_one(pool).await { + return existing_channel.parse(); + } + + let new_channel = sqlx::query_as!( + UnparsedChannel, + "INSERT INTO channels (title, link) + VALUES (?, ?) + RETURNING id, title, link, description, last_fetched", + link_str, link_str + ).fetch_one(pool).await?.parse(); + + new_channel + } + // TODO implement fetch skipping fn should_skip_fetch(&self) -> bool { false } // TODO implement conditional fetching pub async fn fetch_rss( - client: &Client, channel: &Channel + &self, client: &Client ) -> Result> { - if channel.should_skip_fetch() { + if self.should_skip_fetch() { return Ok(None); } - let bytestream = client.get(channel.link.clone()) + let bytestream = client.get(self.link.clone()) .send().await? .bytes().await?; @@ -129,33 +117,39 @@ impl Channel { } pub async fn update_metadata( - pool: &SqlitePool, id: ChannelId, fetched: FetchedRSSChannel + &self, pool: &SqlitePool, fetched: FetchedRSSChannel ) -> Result<()> { - let link = fetched.link.as_str(); - let fetched_at = fetched.fetched_at.to_rfc2822(); + let title = fetched.title(); + let description = fetched.description(); + let link = fetched.link().as_str(); + let fetched_at = fetched.fetched_at().to_rfc2822(); sqlx::query!( "UPDATE channels SET title = ?, link = ?, description = ?, last_fetched = ? WHERE id = ?", - fetched.title, link, fetched.description, fetched_at, - id.0 + title, link, description, fetched_at, + self.id.0 ).execute(pool).await?; Ok(()) } pub async fn update_items( - pool: &SqlitePool, id: ChannelId, fetched: FetchedRSSChannel + &self, pool: &SqlitePool, fetched: FetchedRSSChannel ) -> Result<()> { - let fetched_at = fetched.fetched_at.to_rfc2822(); + let fetched_at = fetched.fetched_at().to_rfc2822(); - for item in fetched.items { + for item in fetched.items() { + let guid = item.guid(); + let title = item.title(); + let description = item.description(); + let content = item.content(); sqlx::query!( "INSERT OR IGNORE INTO items (channel_id, guid, fetched_at, title, description, content) VALUES (?, ?, ?, ?, ?, ?)", - id.0, item.guid, fetched_at, item.title, item.description, item.content + self.id.0, guid, fetched_at, title, description, content ) .execute(pool) .await?; @@ -163,4 +157,174 @@ impl Channel { Ok(()) } + + pub async fn get_items(&self, pool: &SqlitePool) -> Result> { + let items: Result> = sqlx::query_as!( + UnparsedItem, + "SELECT id as `id!` FROM items WHERE channel_id = ?", + self.id.0 + ).fetch_all(pool).await?.into_iter().map(UnparsedItem::parse).collect(); + + items + } +} + +#[cfg(test)] +mod tests { + use super::*; + use rss::{ + Guid as RSSGuid, + Item as RSSItem, + ItemBuilder as RSSItemBuilder, + Channel as RSSChannel, + ChannelBuilder as RSSChannelBuilder, + }; + use sqlx::SqlitePool; + + const ITEM_TITLE: &str = "My Item"; + const ITEM_GUID1: &str = "https://mycontent.com/blog/1"; + const ITEM_GUID2: &str = "something-else!"; + const ITEM_DESC: &str = "A test item"; + const ITEM_CONT: &str = "some rss content baby"; + const CHAN_TITLE: &str = "My Feed"; + const CHAN_DESC: &str = "A test feed"; + const FEED1: &str = "https://example.com/feed"; + const FEED2: &str = "https://example2.com/feed"; + + async fn setup_test_db() -> SqlitePool { + let pool = SqlitePool::connect("sqlite::memory:").await.unwrap(); + sqlx::migrate!().run(&pool).await.unwrap(); + pool + } + + #[tokio::test] + async fn create_channel() { + let pool = setup_test_db().await; + let url_feed = Url::parse(FEED1).unwrap(); + + let channel = Channel::create(&pool, url_feed).await.unwrap(); + + assert!(channel.id().0 > 0); + assert_eq!(channel.link().as_str(), FEED1); + assert!(channel.title().len() > 0); + } + + #[tokio::test] + async fn create_duplicate_returns_existing() { + let pool = setup_test_db().await; + let url_feed = Url::parse(FEED1).unwrap(); + + let channel1 = Channel::create(&pool, url_feed.clone()).await.unwrap(); + let channel2 = Channel::create(&pool, url_feed).await.unwrap(); + + assert_eq!( + i64::from(channel1.id()), + i64::from(channel2.id()) + ); + } + + #[tokio::test] + async fn get_all_channels() { + let pool = setup_test_db().await; + let url_feed1 = Url::parse(FEED1).unwrap(); + let url_feed2 = Url::parse(FEED2).unwrap(); + + Channel::create(&pool, url_feed1).await.unwrap(); + Channel::create(&pool, url_feed2).await.unwrap(); + + let channels = Channel::get_all(&pool).await.unwrap(); + + assert_eq!(channels.len(), 2); + } + + #[tokio::test] + async fn update_metadata() { + let pool = setup_test_db().await; + let url_feed = Url::parse(FEED1).unwrap(); + + let channel = Channel::create(&pool, url_feed).await.unwrap(); + + let fake_rss: RSSChannel = RSSChannelBuilder::default() + .title(CHAN_TITLE) + .link(FEED2) + .description(CHAN_DESC) + .build(); + + let fetched = FetchedRSSChannel::parse(fake_rss).unwrap(); + + channel.update_metadata(&pool, fetched).await.unwrap(); + + let updated = Channel::get(&pool, channel.id()).await.unwrap(); + assert_eq!(updated.title(), CHAN_TITLE); + assert_eq!(updated.link().as_str(), FEED2); + assert_eq!(updated.description(), Some(CHAN_DESC)); + } + + #[tokio::test] + async fn update_items() { + let pool = setup_test_db().await; + let url_feed = Url::parse(FEED1).unwrap(); + + let channel = Channel::create(&pool, url_feed).await.unwrap(); + + let item1: RSSItem = RSSItemBuilder::default() + .title(ITEM_TITLE.to_string()) + .description(ITEM_DESC.to_string()) + .content(ITEM_CONT.to_string()) + .guid(RSSGuid { value: ITEM_GUID1.to_string(), permalink: false }) + .build(); + let item2: RSSItem = RSSItemBuilder::default() + .title(ITEM_TITLE.to_string()) + .description(ITEM_DESC.to_string()) + .content(ITEM_CONT.to_string()) + .guid(RSSGuid { value: ITEM_GUID2.to_string(), permalink: false }) + .build(); + + let fake_rss: RSSChannel = RSSChannelBuilder::default() + .title(CHAN_TITLE) + .link(FEED2) + .description(CHAN_DESC) + .item(item1) + .item(item2) + .build(); + + let fetched = FetchedRSSChannel::parse(fake_rss).unwrap(); + + channel.update_items(&pool, fetched).await.unwrap(); + + let items = channel.get_items(&pool).await.unwrap(); + assert_eq!(items.len(), 2); + } + + #[tokio::test] + async fn update_items_ignores_duplicates() { + let pool = setup_test_db().await; + let url_feed = Url::parse(FEED1).unwrap(); + + let channel = Channel::create(&pool, url_feed).await.unwrap(); + + let item1: RSSItem = RSSItemBuilder::default() + .title(ITEM_TITLE.to_string()) + .description(ITEM_DESC.to_string()) + .content(ITEM_CONT.to_string()) + .guid(RSSGuid { value: ITEM_GUID1.to_string(), permalink: false }) + .build(); + + let fake_rss: RSSChannel = RSSChannelBuilder::default() + .title(CHAN_TITLE) + .link(FEED2) + .description(CHAN_DESC) + .item(item1) + .build(); + + let fetched = FetchedRSSChannel::parse(fake_rss.clone()).unwrap(); + + channel.update_items(&pool, fetched).await.unwrap(); + let fetched = FetchedRSSChannel::parse(fake_rss).unwrap(); + + channel.update_items(&pool, fetched).await.unwrap(); + + let items = channel.get_items(&pool).await.unwrap(); + assert_eq!(items.len(), 1); + } } diff --git a/koucha/src/channel/fetch.rs b/koucha/src/channel/fetch.rs new file mode 100644 index 0000000..b660cfe --- /dev/null +++ b/koucha/src/channel/fetch.rs @@ -0,0 +1,67 @@ +use crate::Result; +use reqwest::Url; +use chrono::{DateTime, Utc}; +use std::hash::{Hash, Hasher}; + +pub struct FetchedRSSItem { + guid: String, + title: String, + description: String, + content: String, +} +impl FetchedRSSItem { + pub fn guid(&self) -> &str { &self.guid } + pub fn title(&self) -> &str { &self.title } + pub fn description(&self) -> &str { &self.description } + pub fn content(&self) -> &str { &self.content } + + fn parse(item: rss::Item) -> Self { + FetchedRSSItem { + guid: Self::get_or_create_guid(&item), + title: item.title().unwrap_or("").to_string(), + description: item.description().unwrap_or("").to_string(), + content: item.content().unwrap_or("").to_string(), + } + } + + fn get_or_create_guid(item: &rss::Item) -> String { + if let Some(guid) = item.guid() { + return guid.value().to_string(); + } + + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + item.link().unwrap_or("").hash(&mut hasher); + item.title().unwrap_or("").hash(&mut hasher); + item.description().unwrap_or("").hash(&mut hasher); + + format!("gen-{:x}", hasher.finish()) + } +} +pub struct FetchedRSSChannel { + title: String, + link: Url, + description: String, + + items: Vec, + + fetched_at: DateTime, +} +impl FetchedRSSChannel { + pub fn title(&self) -> &str { &self.title } + pub fn link(&self) -> &Url { &self.link } + pub fn description(&self) -> &str { &self.description } + pub fn items(&self) -> &[FetchedRSSItem] { &self.items } + pub fn fetched_at(&self) -> &DateTime { &self.fetched_at } + + pub fn parse(rss: rss::Channel) -> Result { + Ok(FetchedRSSChannel { + title: rss.title, + link: Url::parse(&rss.link)?, + description: rss.description, + + items: rss.items.into_iter().map(FetchedRSSItem::parse).collect(), + + fetched_at: Utc::now(), + }) + } +} diff --git a/koucha/src/feed.rs b/koucha/src/feed.rs index 56097fb..8e33b92 100644 --- a/koucha/src/feed.rs +++ b/koucha/src/feed.rs @@ -1,6 +1,7 @@ use crate::{ Result, Item, + item::UnparsedItem, Channel, channel::{ UnparsedChannel, @@ -77,10 +78,11 @@ impl Feed { pub async fn add_channel( &self, pool: &SqlitePool, channel_id: ChannelId ) -> Result<()> { + let int_channel_id = i64::from(channel_id); sqlx::query!( "INSERT INTO feed_channels (feed_id, channel_id) VALUES (?, ?)", - self.id.0, channel_id.0 + self.id.0, int_channel_id ).execute(pool).await?; Ok(()) @@ -89,16 +91,16 @@ impl Feed { pub async fn get_items( &self, pool: &SqlitePool, limit: u8, offset: u32 ) -> Result> { - let items = sqlx::query_as!( - Item, + let items: Result> = sqlx::query_as!( + UnparsedItem, "SELECT item_id as id FROM feed_items WHERE feed_id = ? AND archived = FALSE ORDER BY score DESC LIMIT ? OFFSET ?", self.id.0, limit, offset - ).fetch_all(pool).await?; + ).fetch_all(pool).await?.into_iter().map(UnparsedItem::parse).collect(); - Ok(items) + items } pub async fn get_channels( diff --git a/koucha/src/item.rs b/koucha/src/item.rs index 0d2a7ab..4d6af9f 100644 --- a/koucha/src/item.rs +++ b/koucha/src/item.rs @@ -1,3 +1,20 @@ -pub struct Item { +use crate::Result; + +#[derive(Copy, Clone)] +pub struct ItemId(i64); +impl From for i64 { fn from(id: ItemId) -> Self { id.0 } } + +pub struct UnparsedItem { pub id: i64, } +impl UnparsedItem { + pub fn parse(self) -> Result { + Ok(Item { + id: ItemId(self.id), + }) + } +} + +pub struct Item { + id: ItemId, +} diff --git a/koucha/src/user.rs b/koucha/src/user.rs index f78de6e..fc4e6ad 100644 --- a/koucha/src/user.rs +++ b/koucha/src/user.rs @@ -51,7 +51,7 @@ impl User { pub async fn create(pool: &SqlitePool, name: &str) -> Result { let result = sqlx::query!( - "INSERT INTO users (name) + "INSERT INTO users (name) VALUES (?) RETURNING id, name", name