fetch, create fetch mod and AdapterClient
Creates a fetch mod for fetching remote rss content. This implementation is barebones and not great since it doesn't pass any compression or timestamp information.
This commit is contained in:
parent
070c55a95b
commit
fcb03ead1f
4 changed files with 288 additions and 1 deletions
151
koucha/src/fetch.rs
Normal file
151
koucha/src/fetch.rs
Normal file
|
|
@ -0,0 +1,151 @@
|
|||
use crate::{
|
||||
Result,
|
||||
db::Channel,
|
||||
AdapterClient,
|
||||
};
|
||||
use reqwest::Url;
|
||||
use chrono::{DateTime, Utc};
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
pub struct FetchedRSSItem {
|
||||
guid: String,
|
||||
title: String,
|
||||
description: String,
|
||||
content: String,
|
||||
}
|
||||
impl FetchedRSSItem {
|
||||
pub fn guid(&self) -> &str { &self.guid }
|
||||
pub fn title(&self) -> &str { &self.title }
|
||||
pub fn description(&self) -> &str { &self.description }
|
||||
pub fn content(&self) -> &str { &self.content }
|
||||
|
||||
fn parse(item: rss::Item) -> Self {
|
||||
FetchedRSSItem {
|
||||
guid: Self::get_or_create_guid(&item),
|
||||
title: item.title().unwrap_or("").to_string(),
|
||||
description: item.description().unwrap_or("").to_string(),
|
||||
content: item.content().unwrap_or("").to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_or_create_guid(item: &rss::Item) -> String {
|
||||
if let Some(guid) = item.guid() {
|
||||
return guid.value().to_string();
|
||||
}
|
||||
|
||||
let mut hasher = std::collections::hash_map::DefaultHasher::new();
|
||||
item.link().unwrap_or("").hash(&mut hasher);
|
||||
item.title().unwrap_or("").hash(&mut hasher);
|
||||
item.description().unwrap_or("").hash(&mut hasher);
|
||||
|
||||
format!("gen-{:x}", hasher.finish())
|
||||
}
|
||||
}
|
||||
pub struct FetchedRSSChannel {
|
||||
title: String,
|
||||
link: Url,
|
||||
description: String,
|
||||
|
||||
items: Vec<FetchedRSSItem>,
|
||||
|
||||
fetched_at: DateTime<Utc>,
|
||||
}
|
||||
impl FetchedRSSChannel {
|
||||
pub fn title(&self) -> &str { &self.title }
|
||||
pub fn link(&self) -> &Url { &self.link }
|
||||
pub fn description(&self) -> &str { &self.description }
|
||||
pub fn items(&self) -> &[FetchedRSSItem] { &self.items }
|
||||
pub fn fetched_at(&self) -> &DateTime<Utc> { &self.fetched_at }
|
||||
|
||||
pub async fn fetch_channel(
|
||||
client: &AdapterClient, channel: Channel
|
||||
) -> Result<Option<Self>> {
|
||||
let bytestream = client.0.get(channel.link().clone())
|
||||
.send().await?
|
||||
.bytes().await?;
|
||||
|
||||
let rss_channel = rss::Channel::read_from(&bytestream[..])?;
|
||||
|
||||
let now = Utc::now();
|
||||
|
||||
Ok(Some(FetchedRSSChannel::parse(rss_channel, now)?))
|
||||
}
|
||||
|
||||
fn parse(rss: rss::Channel, fetched_at: DateTime<Utc>) -> Result<Self> {
|
||||
Ok(FetchedRSSChannel {
|
||||
title: rss.title,
|
||||
link: Url::parse(&rss.link)?,
|
||||
description: rss.description,
|
||||
|
||||
items: rss.items.into_iter().map(FetchedRSSItem::parse).collect(),
|
||||
|
||||
fetched_at: fetched_at,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::test_utils::{
|
||||
ITEM_TITLE, ITEM_GUID, ITEM_GUID2, ITEM_DESC, ITEM_CONT,
|
||||
CHANNEL_TITLE, CHANNEL_DESC, FEED1,
|
||||
get_datetime
|
||||
};
|
||||
|
||||
fn create_guid(value: String) -> rss::Guid {
|
||||
rss::Guid { value, permalink: false }
|
||||
}
|
||||
|
||||
fn create_item(guid: rss::Guid) -> rss::Item {
|
||||
rss::ItemBuilder::default()
|
||||
.title(ITEM_TITLE.to_string())
|
||||
.guid(guid)
|
||||
.description(ITEM_DESC.to_string())
|
||||
.content(ITEM_CONT.to_string())
|
||||
.build()
|
||||
}
|
||||
|
||||
fn create_channel(items: Vec<rss::Item>) -> rss::Channel {
|
||||
rss::ChannelBuilder::default()
|
||||
.title(CHANNEL_TITLE.to_string())
|
||||
.description(CHANNEL_DESC.to_string())
|
||||
.link(FEED1.to_string())
|
||||
.items(items)
|
||||
.build()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_item() {
|
||||
let rss_guid = create_guid(ITEM_GUID.to_string());
|
||||
let rss_item = create_item(rss_guid);
|
||||
let item = FetchedRSSItem::parse(rss_item);
|
||||
|
||||
assert_eq!(item.guid, ITEM_GUID);
|
||||
assert_eq!(item.title, ITEM_TITLE);
|
||||
assert_eq!(item.description, ITEM_DESC);
|
||||
assert_eq!(item.content, ITEM_CONT);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_feed() {
|
||||
let rss_guid = create_guid(ITEM_GUID.to_string());
|
||||
let rss_guid2 = create_guid(ITEM_GUID2.to_string());
|
||||
let rss_item = create_item(rss_guid);
|
||||
let rss_item2 = create_item(rss_guid2);
|
||||
|
||||
let rss_channel = create_channel([rss_item, rss_item2].to_vec());
|
||||
|
||||
let date: DateTime<Utc> = get_datetime();
|
||||
|
||||
let channel = FetchedRSSChannel::parse(rss_channel, date).unwrap();
|
||||
|
||||
assert_eq!(channel.title, CHANNEL_TITLE);
|
||||
assert_eq!(channel.link.as_str(), FEED1);
|
||||
assert_eq!(channel.description, CHANNEL_DESC);
|
||||
assert_eq!(channel.fetched_at, date);
|
||||
assert_eq!(channel.items.len(), 2);
|
||||
assert!(channel.items.iter().any(|i| i.guid() == ITEM_GUID));
|
||||
assert!(channel.items.iter().any(|i| i.guid() == ITEM_GUID2));
|
||||
}
|
||||
}
|
||||
|
|
@ -3,12 +3,15 @@ use std::error::Error;
|
|||
type Result<T> = std::result::Result<T, Box<dyn Error>>;
|
||||
|
||||
pub mod db;
|
||||
pub mod fetch;
|
||||
pub mod score;
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test_utils;
|
||||
|
||||
pub struct AdapterPool(sqlx::SqlitePool);
|
||||
pub struct AdapterClient(reqwest::Client);
|
||||
|
||||
pub struct AdapterBuilder {
|
||||
database_url: String,
|
||||
}
|
||||
|
|
@ -29,15 +32,18 @@ impl AdapterBuilder {
|
|||
let db = sqlx::sqlite::SqlitePoolOptions::new()
|
||||
.connect(&self.database_url).await?;
|
||||
sqlx::migrate!().run(&db).await?;
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
Ok(Adapter { db: AdapterPool(db) })
|
||||
Ok(Adapter { db: AdapterPool(db), client: AdapterClient(client) })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Adapter {
|
||||
db: AdapterPool,
|
||||
client: AdapterClient,
|
||||
}
|
||||
|
||||
impl Adapter {
|
||||
pub fn get_pool(&self) -> &AdapterPool { &self.db }
|
||||
pub fn get_client(&self) -> &AdapterClient { &self.client }
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue