Got the structure; got lots of tests; item still needs work

This commit is contained in:
Julia Lange 2026-01-22 10:39:38 -08:00
parent d7123fb153
commit 7bb4cf4230
Signed by: Julia
SSH key fingerprint: SHA256:5DJcfxa5/fKCYn57dcabJa2vN2e6eT0pBerYi5SUbto
5 changed files with 338 additions and 88 deletions

View file

@ -1,69 +1,18 @@
use reqwest::{Url, Client}; use reqwest::{Url, Client};
use sqlx::SqlitePool; use sqlx::SqlitePool;
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use crate::{Result}; use crate::{
use std::hash::{Hash, Hasher}; Result,
Item,
channel::fetch::FetchedRSSChannel,
item::UnparsedItem,
};
pub struct ChannelId(pub i64); #[derive(Copy, Clone)]
impl From<i64> for ChannelId { fn from(id: i64) -> Self { ChannelId(id) } } pub struct ChannelId(i64);
impl From<ChannelId> for i64 { fn from(id: ChannelId) -> Self { id.0 } } impl From<ChannelId> for i64 { fn from(id: ChannelId) -> Self { id.0 } }
pub mod fetch;
struct FetchedRSSItem {
guid: String,
title: String,
description: String,
content: String,
}
impl FetchedRSSItem {
fn parse(item: rss::Item) -> Self {
FetchedRSSItem {
guid: Self::get_or_create_guid(&item),
title: item.title().unwrap_or("").to_string(),
description: item.description().unwrap_or("").to_string(),
content: item.content().unwrap_or("").to_string(),
}
}
fn get_or_create_guid(item: &rss::Item) -> String {
if let Some(guid) = item.guid() {
return guid.value().to_string();
}
let mut hasher = std::collections::hash_map::DefaultHasher::new();
item.link().unwrap_or("").hash(&mut hasher);
item.title().unwrap_or("").hash(&mut hasher);
item.description().unwrap_or("").hash(&mut hasher);
format!("gen-{:x}", hasher.finish())
}
}
pub struct FetchedRSSChannel {
title: String,
link: Url,
description: String,
items: Vec<FetchedRSSItem>,
fetched_at: DateTime<Utc>,
}
impl FetchedRSSChannel {
pub fn fetched_at(&self) -> &DateTime<Utc> {
&self.fetched_at
}
fn parse(rss: rss::Channel) -> Result<Self> {
Ok(FetchedRSSChannel {
title: rss.title,
link: Url::parse(&rss.link)?,
description: rss.description,
items: rss.items.into_iter().map(FetchedRSSItem::parse).collect(),
fetched_at: Utc::now(),
})
}
}
pub struct UnparsedChannel { pub struct UnparsedChannel {
pub id: i64, pub id: i64,
@ -88,38 +37,77 @@ impl UnparsedChannel {
} }
pub struct Channel { pub struct Channel {
pub id: ChannelId, id: ChannelId,
pub title: String, title: String,
pub link: Url, link: Url,
pub description: Option<String>, description: Option<String>,
pub last_fetched: Option<DateTime<Utc>>, last_fetched: Option<DateTime<Utc>>,
} }
impl Channel { impl Channel {
pub fn id(&self) -> ChannelId { self.id }
pub fn title(&self) -> &str { &self.title }
pub fn link(&self) -> &Url { &self.link }
pub fn description(&self) -> Option<&str> { self.description.as_deref() }
pub async fn get_all(pool: &SqlitePool) -> Result<Vec<Self>> { pub async fn get_all(pool: &SqlitePool) -> Result<Vec<Self>> {
let channels: Result<Vec<Channel>> = sqlx::query_as!( let channels: Result<Vec<Channel>> = sqlx::query_as!(
UnparsedChannel, UnparsedChannel,
"SELECT id, title, link, description, last_fetched FROM channels" "SELECT id, title, link, description, last_fetched FROM channels"
) ).fetch_all(pool).await?.into_iter().map(UnparsedChannel::parse).collect();
.fetch_all(pool).await?.into_iter()
.map(UnparsedChannel::parse).collect();
channels channels
} }
pub async fn get(pool: &SqlitePool, id: ChannelId) -> Result<Self> {
let channel: Result<Self> = sqlx::query_as!(
UnparsedChannel,
"SELECT id, title, link, description, last_fetched
FROM channels
WHERE id = ?",
id.0
).fetch_one(pool).await?.parse();
channel
}
pub async fn create(
pool: &SqlitePool, link: Url
) -> Result<Self> {
let link_str = link.as_str();
if let Ok(existing_channel) = sqlx::query_as!(
UnparsedChannel,
"SELECT id as `id!`, title, link, description, last_fetched
FROM channels
WHERE link = ?",
link_str
).fetch_one(pool).await {
return existing_channel.parse();
}
let new_channel = sqlx::query_as!(
UnparsedChannel,
"INSERT INTO channels (title, link)
VALUES (?, ?)
RETURNING id, title, link, description, last_fetched",
link_str, link_str
).fetch_one(pool).await?.parse();
new_channel
}
// TODO implement fetch skipping // TODO implement fetch skipping
fn should_skip_fetch(&self) -> bool { false } fn should_skip_fetch(&self) -> bool { false }
// TODO implement conditional fetching // TODO implement conditional fetching
pub async fn fetch_rss( pub async fn fetch_rss(
client: &Client, channel: &Channel &self, client: &Client
) -> Result<Option<FetchedRSSChannel>> { ) -> Result<Option<FetchedRSSChannel>> {
if channel.should_skip_fetch() { if self.should_skip_fetch() {
return Ok(None); return Ok(None);
} }
let bytestream = client.get(channel.link.clone()) let bytestream = client.get(self.link.clone())
.send().await? .send().await?
.bytes().await?; .bytes().await?;
@ -129,33 +117,39 @@ impl Channel {
} }
pub async fn update_metadata( pub async fn update_metadata(
pool: &SqlitePool, id: ChannelId, fetched: FetchedRSSChannel &self, pool: &SqlitePool, fetched: FetchedRSSChannel
) -> Result<()> { ) -> Result<()> {
let link = fetched.link.as_str(); let title = fetched.title();
let fetched_at = fetched.fetched_at.to_rfc2822(); let description = fetched.description();
let link = fetched.link().as_str();
let fetched_at = fetched.fetched_at().to_rfc2822();
sqlx::query!( sqlx::query!(
"UPDATE channels "UPDATE channels
SET title = ?, link = ?, description = ?, SET title = ?, link = ?, description = ?,
last_fetched = ? last_fetched = ?
WHERE id = ?", WHERE id = ?",
fetched.title, link, fetched.description, fetched_at, title, link, description, fetched_at,
id.0 self.id.0
).execute(pool).await?; ).execute(pool).await?;
Ok(()) Ok(())
} }
pub async fn update_items( pub async fn update_items(
pool: &SqlitePool, id: ChannelId, fetched: FetchedRSSChannel &self, pool: &SqlitePool, fetched: FetchedRSSChannel
) -> Result<()> { ) -> Result<()> {
let fetched_at = fetched.fetched_at.to_rfc2822(); let fetched_at = fetched.fetched_at().to_rfc2822();
for item in fetched.items { for item in fetched.items() {
let guid = item.guid();
let title = item.title();
let description = item.description();
let content = item.content();
sqlx::query!( sqlx::query!(
"INSERT OR IGNORE INTO items "INSERT OR IGNORE INTO items
(channel_id, guid, fetched_at, title, description, content) (channel_id, guid, fetched_at, title, description, content)
VALUES (?, ?, ?, ?, ?, ?)", VALUES (?, ?, ?, ?, ?, ?)",
id.0, item.guid, fetched_at, item.title, item.description, item.content self.id.0, guid, fetched_at, title, description, content
) )
.execute(pool) .execute(pool)
.await?; .await?;
@ -163,4 +157,174 @@ impl Channel {
Ok(()) Ok(())
} }
pub async fn get_items(&self, pool: &SqlitePool) -> Result<Vec<Item>> {
let items: Result<Vec<Item>> = sqlx::query_as!(
UnparsedItem,
"SELECT id as `id!` FROM items WHERE channel_id = ?",
self.id.0
).fetch_all(pool).await?.into_iter().map(UnparsedItem::parse).collect();
items
}
}
#[cfg(test)]
mod tests {
use super::*;
use rss::{
Guid as RSSGuid,
Item as RSSItem,
ItemBuilder as RSSItemBuilder,
Channel as RSSChannel,
ChannelBuilder as RSSChannelBuilder,
};
use sqlx::SqlitePool;
const ITEM_TITLE: &str = "My Item";
const ITEM_GUID1: &str = "https://mycontent.com/blog/1";
const ITEM_GUID2: &str = "something-else!";
const ITEM_DESC: &str = "A test item";
const ITEM_CONT: &str = "some rss content baby";
const CHAN_TITLE: &str = "My Feed";
const CHAN_DESC: &str = "A test feed";
const FEED1: &str = "https://example.com/feed";
const FEED2: &str = "https://example2.com/feed";
async fn setup_test_db() -> SqlitePool {
let pool = SqlitePool::connect("sqlite::memory:").await.unwrap();
sqlx::migrate!().run(&pool).await.unwrap();
pool
}
#[tokio::test]
async fn create_channel() {
let pool = setup_test_db().await;
let url_feed = Url::parse(FEED1).unwrap();
let channel = Channel::create(&pool, url_feed).await.unwrap();
assert!(channel.id().0 > 0);
assert_eq!(channel.link().as_str(), FEED1);
assert!(channel.title().len() > 0);
}
#[tokio::test]
async fn create_duplicate_returns_existing() {
let pool = setup_test_db().await;
let url_feed = Url::parse(FEED1).unwrap();
let channel1 = Channel::create(&pool, url_feed.clone()).await.unwrap();
let channel2 = Channel::create(&pool, url_feed).await.unwrap();
assert_eq!(
i64::from(channel1.id()),
i64::from(channel2.id())
);
}
#[tokio::test]
async fn get_all_channels() {
let pool = setup_test_db().await;
let url_feed1 = Url::parse(FEED1).unwrap();
let url_feed2 = Url::parse(FEED2).unwrap();
Channel::create(&pool, url_feed1).await.unwrap();
Channel::create(&pool, url_feed2).await.unwrap();
let channels = Channel::get_all(&pool).await.unwrap();
assert_eq!(channels.len(), 2);
}
#[tokio::test]
async fn update_metadata() {
let pool = setup_test_db().await;
let url_feed = Url::parse(FEED1).unwrap();
let channel = Channel::create(&pool, url_feed).await.unwrap();
let fake_rss: RSSChannel = RSSChannelBuilder::default()
.title(CHAN_TITLE)
.link(FEED2)
.description(CHAN_DESC)
.build();
let fetched = FetchedRSSChannel::parse(fake_rss).unwrap();
channel.update_metadata(&pool, fetched).await.unwrap();
let updated = Channel::get(&pool, channel.id()).await.unwrap();
assert_eq!(updated.title(), CHAN_TITLE);
assert_eq!(updated.link().as_str(), FEED2);
assert_eq!(updated.description(), Some(CHAN_DESC));
}
#[tokio::test]
async fn update_items() {
let pool = setup_test_db().await;
let url_feed = Url::parse(FEED1).unwrap();
let channel = Channel::create(&pool, url_feed).await.unwrap();
let item1: RSSItem = RSSItemBuilder::default()
.title(ITEM_TITLE.to_string())
.description(ITEM_DESC.to_string())
.content(ITEM_CONT.to_string())
.guid(RSSGuid { value: ITEM_GUID1.to_string(), permalink: false })
.build();
let item2: RSSItem = RSSItemBuilder::default()
.title(ITEM_TITLE.to_string())
.description(ITEM_DESC.to_string())
.content(ITEM_CONT.to_string())
.guid(RSSGuid { value: ITEM_GUID2.to_string(), permalink: false })
.build();
let fake_rss: RSSChannel = RSSChannelBuilder::default()
.title(CHAN_TITLE)
.link(FEED2)
.description(CHAN_DESC)
.item(item1)
.item(item2)
.build();
let fetched = FetchedRSSChannel::parse(fake_rss).unwrap();
channel.update_items(&pool, fetched).await.unwrap();
let items = channel.get_items(&pool).await.unwrap();
assert_eq!(items.len(), 2);
}
#[tokio::test]
async fn update_items_ignores_duplicates() {
let pool = setup_test_db().await;
let url_feed = Url::parse(FEED1).unwrap();
let channel = Channel::create(&pool, url_feed).await.unwrap();
let item1: RSSItem = RSSItemBuilder::default()
.title(ITEM_TITLE.to_string())
.description(ITEM_DESC.to_string())
.content(ITEM_CONT.to_string())
.guid(RSSGuid { value: ITEM_GUID1.to_string(), permalink: false })
.build();
let fake_rss: RSSChannel = RSSChannelBuilder::default()
.title(CHAN_TITLE)
.link(FEED2)
.description(CHAN_DESC)
.item(item1)
.build();
let fetched = FetchedRSSChannel::parse(fake_rss.clone()).unwrap();
channel.update_items(&pool, fetched).await.unwrap();
let fetched = FetchedRSSChannel::parse(fake_rss).unwrap();
channel.update_items(&pool, fetched).await.unwrap();
let items = channel.get_items(&pool).await.unwrap();
assert_eq!(items.len(), 1);
}
} }

View file

@ -0,0 +1,67 @@
use crate::Result;
use reqwest::Url;
use chrono::{DateTime, Utc};
use std::hash::{Hash, Hasher};
pub struct FetchedRSSItem {
guid: String,
title: String,
description: String,
content: String,
}
impl FetchedRSSItem {
pub fn guid(&self) -> &str { &self.guid }
pub fn title(&self) -> &str { &self.title }
pub fn description(&self) -> &str { &self.description }
pub fn content(&self) -> &str { &self.content }
fn parse(item: rss::Item) -> Self {
FetchedRSSItem {
guid: Self::get_or_create_guid(&item),
title: item.title().unwrap_or("").to_string(),
description: item.description().unwrap_or("").to_string(),
content: item.content().unwrap_or("").to_string(),
}
}
fn get_or_create_guid(item: &rss::Item) -> String {
if let Some(guid) = item.guid() {
return guid.value().to_string();
}
let mut hasher = std::collections::hash_map::DefaultHasher::new();
item.link().unwrap_or("").hash(&mut hasher);
item.title().unwrap_or("").hash(&mut hasher);
item.description().unwrap_or("").hash(&mut hasher);
format!("gen-{:x}", hasher.finish())
}
}
pub struct FetchedRSSChannel {
title: String,
link: Url,
description: String,
items: Vec<FetchedRSSItem>,
fetched_at: DateTime<Utc>,
}
impl FetchedRSSChannel {
pub fn title(&self) -> &str { &self.title }
pub fn link(&self) -> &Url { &self.link }
pub fn description(&self) -> &str { &self.description }
pub fn items(&self) -> &[FetchedRSSItem] { &self.items }
pub fn fetched_at(&self) -> &DateTime<Utc> { &self.fetched_at }
pub fn parse(rss: rss::Channel) -> Result<Self> {
Ok(FetchedRSSChannel {
title: rss.title,
link: Url::parse(&rss.link)?,
description: rss.description,
items: rss.items.into_iter().map(FetchedRSSItem::parse).collect(),
fetched_at: Utc::now(),
})
}
}

View file

@ -1,6 +1,7 @@
use crate::{ use crate::{
Result, Result,
Item, Item,
item::UnparsedItem,
Channel, Channel,
channel::{ channel::{
UnparsedChannel, UnparsedChannel,
@ -77,10 +78,11 @@ impl Feed {
pub async fn add_channel( pub async fn add_channel(
&self, pool: &SqlitePool, channel_id: ChannelId &self, pool: &SqlitePool, channel_id: ChannelId
) -> Result<()> { ) -> Result<()> {
let int_channel_id = i64::from(channel_id);
sqlx::query!( sqlx::query!(
"INSERT INTO feed_channels (feed_id, channel_id) "INSERT INTO feed_channels (feed_id, channel_id)
VALUES (?, ?)", VALUES (?, ?)",
self.id.0, channel_id.0 self.id.0, int_channel_id
).execute(pool).await?; ).execute(pool).await?;
Ok(()) Ok(())
@ -89,16 +91,16 @@ impl Feed {
pub async fn get_items( pub async fn get_items(
&self, pool: &SqlitePool, limit: u8, offset: u32 &self, pool: &SqlitePool, limit: u8, offset: u32
) -> Result<Vec<Item>> { ) -> Result<Vec<Item>> {
let items = sqlx::query_as!( let items: Result<Vec<Item>> = sqlx::query_as!(
Item, UnparsedItem,
"SELECT item_id as id FROM feed_items "SELECT item_id as id FROM feed_items
WHERE feed_id = ? AND archived = FALSE WHERE feed_id = ? AND archived = FALSE
ORDER BY score DESC ORDER BY score DESC
LIMIT ? OFFSET ?", LIMIT ? OFFSET ?",
self.id.0, limit, offset self.id.0, limit, offset
).fetch_all(pool).await?; ).fetch_all(pool).await?.into_iter().map(UnparsedItem::parse).collect();
Ok(items) items
} }
pub async fn get_channels( pub async fn get_channels(

View file

@ -1,3 +1,20 @@
pub struct Item { use crate::Result;
#[derive(Copy, Clone)]
pub struct ItemId(i64);
impl From<ItemId> for i64 { fn from(id: ItemId) -> Self { id.0 } }
pub struct UnparsedItem {
pub id: i64, pub id: i64,
} }
impl UnparsedItem {
pub fn parse(self) -> Result<Item> {
Ok(Item {
id: ItemId(self.id),
})
}
}
pub struct Item {
id: ItemId,
}