Global, mono-binary to libraries and binaries

This separates the previous mono-binary setup into separate libraries
and binaries. Specifically it split the old since api/ingestor binary
into an Atproto, and DB library, as well as an api, and ingestor binary.

Atproto Lib
Was mostly untouched. The original URI implementation was changed to use
FromStr, otherwise only imports were changed.

DB Lib
Is mostly unused, so there wasn't much that needed to be changed. Some
new files were added so that future work on it can hit the ground
running.

Api Binary
Is almost entirely the same. Imports were changed and the ingestor code
of main was removed.

Ingestor Binary
Was almost entirely refactored. An interface to made injestors was
added, and it was modularized. The only shared code is in
Ingestor.start(), and collections.rs's macros, but that is mostly
boilerplate.
This commit is contained in:
Julia Lange 2025-05-22 15:22:43 -07:00
parent 45acaaa601
commit eb28549a0f
Signed by: Julia
SSH key fingerprint: SHA256:5DJcfxa5/fKCYn57dcabJa2vN2e6eT0pBerYi5SUbto
31 changed files with 582 additions and 636 deletions

12
atproto/Cargo.toml Normal file
View file

@ -0,0 +1,12 @@
[package]
name = "atproto"
version = "0.1.0"
edition = "2024"
[dependencies]
atrium-api = { version = "0.25.3", default-features = false }
lazy-regex = "3.4.1"
serde.workspace = true
serde_json.workspace = true
tracing-subscriber.workspace = true
tracing.workspace = true

View file

@ -0,0 +1,3 @@
// @generated - This file is generated by esquema-codegen (forked from atrium-codegen). DO NOT EDIT.
pub mod record;
pub mod my;

View file

@ -0,0 +1,3 @@
// @generated - This file is generated by esquema-codegen (forked from atrium-codegen). DO NOT EDIT.
//!Definitions for the `my` namespace.
pub mod spoor;

View file

@ -0,0 +1,4 @@
// @generated - This file is generated by esquema-codegen (forked from atrium-codegen). DO NOT EDIT.
//!Definitions for the `my.spoor` namespace.
pub mod content;
pub mod log;

View file

@ -0,0 +1,17 @@
// @generated - This file is generated by esquema-codegen (forked from atrium-codegen). DO NOT EDIT.
//!Definitions for the `my.spoor.content` namespace.
pub mod external;
pub mod media;
pub mod title;
#[derive(Debug)]
pub struct External;
impl atrium_api::types::Collection for External {
const NSID: &'static str = "my.spoor.content.external";
type Record = external::Record;
}
#[derive(Debug)]
pub struct Media;
impl atrium_api::types::Collection for Media {
const NSID: &'static str = "my.spoor.content.media";
type Record = media::Record;
}

View file

@ -0,0 +1,36 @@
// @generated - This file is generated by esquema-codegen (forked from atrium-codegen). DO NOT EDIT.
//!Definitions for the `my.spoor.content.external` namespace.
use atrium_api::types::TryFromUnknown;
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct RecordData {
///User defined overrides for the returned content
#[serde(skip_serializing_if = "core::option::Option::is_none")]
pub overrides: core::option::Option<atrium_api::types::Union<RecordOverridesRefs>>,
///All the data needed to query the content from the source
pub queryable: atrium_api::types::Union<RecordQueryableRefs>,
#[serde(skip_serializing_if = "core::option::Option::is_none")]
pub titles: core::option::Option<Vec<crate::lexicons::my::spoor::content::title::Main>>,
}
pub type Record = atrium_api::types::Object<RecordData>;
impl From<atrium_api::types::Unknown> for RecordData {
fn from(value: atrium_api::types::Unknown) -> Self {
Self::try_from_unknown(value).unwrap()
}
}
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct Tvdbv4Data {
pub id: i64,
pub r#type: String,
}
pub type Tvdbv4 = atrium_api::types::Object<Tvdbv4Data>;
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(tag = "$type")]
pub enum RecordOverridesRefs {}
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(tag = "$type")]
pub enum RecordQueryableRefs {
#[serde(rename = "my.spoor.content.external#tvdbv4")]
Tvdbv4(Box<Tvdbv4>),
}

View file

@ -0,0 +1,44 @@
// @generated - This file is generated by esquema-codegen (forked from atrium-codegen). DO NOT EDIT.
//!Definitions for the `my.spoor.content.media` namespace.
use atrium_api::types::TryFromUnknown;
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct RecordData {
#[serde(skip_serializing_if = "core::option::Option::is_none")]
pub duration_data: core::option::Option<
atrium_api::types::Union<RecordDurationDataRefs>,
>,
///Client-declared timestamp for when this activity was last updated
pub last_updated: atrium_api::types::string::Datetime,
#[serde(skip_serializing_if = "core::option::Option::is_none")]
pub poster_image: core::option::Option<atrium_api::types::BlobRef>,
pub titles: Vec<crate::lexicons::my::spoor::content::title::Main>,
}
pub type Record = atrium_api::types::Object<RecordData>;
impl From<atrium_api::types::Unknown> for RecordData {
fn from(value: atrium_api::types::Unknown) -> Self {
Self::try_from_unknown(value).unwrap()
}
}
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct BookData {
#[serde(skip_serializing_if = "core::option::Option::is_none")]
pub chapters: core::option::Option<usize>,
}
pub type Book = atrium_api::types::Object<BookData>;
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct TelevisionData {
#[serde(skip_serializing_if = "core::option::Option::is_none")]
pub episodes: core::option::Option<usize>,
}
pub type Television = atrium_api::types::Object<TelevisionData>;
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(tag = "$type")]
pub enum RecordDurationDataRefs {
#[serde(rename = "my.spoor.content.media#television")]
MySpoorContentMediaTelevision(Box<crate::lexicons::my::spoor::content::media::Television>),
#[serde(rename = "my.spoor.content.media#book")]
MySpoorContentMediaBook(Box<crate::lexicons::my::spoor::content::media::Book>),
}

View file

@ -0,0 +1,19 @@
// @generated - This file is generated by esquema-codegen (forked from atrium-codegen). DO NOT EDIT.
//!Definitions for the `my.spoor.content.title` namespace.
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct MainData {
#[serde(skip_serializing_if = "core::option::Option::is_none")]
pub lang: core::option::Option<atrium_api::types::string::Language>,
#[serde(skip_serializing_if = "core::option::Option::is_none")]
pub translation_type: core::option::Option<atrium_api::types::string::Nsid>,
#[serde(skip_serializing_if = "core::option::Option::is_none")]
pub value: core::option::Option<String>,
}
pub type Main = atrium_api::types::Object<MainData>;
///A phonetic transcription of the native title such that when read it will sound like the native title.
pub const TITLE_HOMOPHONIC: &str = "my.spoor.content.title#titleHomophonic";
///The title in its native script
pub const TITLE_NATIVE: &str = "my.spoor.content.title#titleNative";
///A translation of the title
pub const TITLE_TRANSLATION: &str = "my.spoor.content.title#titleTranslation";

View file

@ -0,0 +1,16 @@
// @generated - This file is generated by esquema-codegen (forked from atrium-codegen). DO NOT EDIT.
//!Definitions for the `my.spoor.log` namespace.
pub mod activity;
pub mod session;
#[derive(Debug)]
pub struct Activity;
impl atrium_api::types::Collection for Activity {
const NSID: &'static str = "my.spoor.log.activity";
type Record = activity::Record;
}
#[derive(Debug)]
pub struct Session;
impl atrium_api::types::Collection for Session {
const NSID: &'static str = "my.spoor.log.session";
type Record = session::Record;
}

View file

@ -0,0 +1,33 @@
// @generated - This file is generated by esquema-codegen (forked from atrium-codegen). DO NOT EDIT.
//!Definitions for the `my.spoor.log.activity` namespace.
use atrium_api::types::TryFromUnknown;
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct RecordData {
///Client-declared timestamp for when this activity was created
pub created_at: atrium_api::types::string::Datetime,
///User-declared timestamp for when they performed the activity. Null implies unknown time.
#[serde(skip_serializing_if = "core::option::Option::is_none")]
pub performed_at: core::option::Option<atrium_api::types::string::Datetime>,
pub progress: atrium_api::types::Union<RecordProgressRefs>,
pub session: atrium_api::com::atproto::repo::strong_ref::Main,
}
pub type Record = atrium_api::types::Object<RecordData>;
impl From<atrium_api::types::Unknown> for RecordData {
fn from(value: atrium_api::types::Unknown) -> Self {
Self::try_from_unknown(value).unwrap()
}
}
///The index of the content consumed. Content must be indexable
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct ProgressIndexData {
pub index: i64,
}
pub type ProgressIndex = atrium_api::types::Object<ProgressIndexData>;
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(tag = "$type")]
pub enum RecordProgressRefs {
#[serde(rename = "my.spoor.log.activity#progressIndex")]
ProgressIndex(Box<ProgressIndex>),
}

View file

@ -0,0 +1,20 @@
// @generated - This file is generated by esquema-codegen (forked from atrium-codegen). DO NOT EDIT.
//!Definitions for the `my.spoor.log.session` namespace.
use atrium_api::types::TryFromUnknown;
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct RecordData {
pub content: atrium_api::com::atproto::repo::strong_ref::Main,
///Client-declared timestamp for when this activity was created
pub created_at: atrium_api::types::string::Datetime,
#[serde(skip_serializing_if = "core::option::Option::is_none")]
pub label: core::option::Option<String>,
#[serde(skip_serializing_if = "core::option::Option::is_none")]
pub other_participants: core::option::Option<Vec<atrium_api::types::string::Did>>,
}
pub type Record = atrium_api::types::Object<RecordData>;
impl From<atrium_api::types::Unknown> for RecordData {
fn from(value: atrium_api::types::Unknown) -> Self {
Self::try_from_unknown(value).unwrap()
}
}

View file

@ -0,0 +1,65 @@
// @generated - This file is generated by esquema-codegen (forked from atrium-codegen). DO NOT EDIT.
//!A collection of known record types.
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(tag = "$type")]
pub enum KnownRecord {
#[serde(rename = "my.spoor.content.external")]
LexiconsMySpoorContentExternal(
Box<crate::lexicons::my::spoor::content::external::Record>,
),
#[serde(rename = "my.spoor.content.media")]
LexiconsMySpoorContentMedia(Box<crate::lexicons::my::spoor::content::media::Record>),
#[serde(rename = "my.spoor.log.activity")]
LexiconsMySpoorLogActivity(Box<crate::lexicons::my::spoor::log::activity::Record>),
#[serde(rename = "my.spoor.log.session")]
LexiconsMySpoorLogSession(Box<crate::lexicons::my::spoor::log::session::Record>),
}
impl From<crate::lexicons::my::spoor::content::external::Record> for KnownRecord {
fn from(record: crate::lexicons::my::spoor::content::external::Record) -> Self {
KnownRecord::LexiconsMySpoorContentExternal(Box::new(record))
}
}
impl From<crate::lexicons::my::spoor::content::external::RecordData> for KnownRecord {
fn from(
record_data: crate::lexicons::my::spoor::content::external::RecordData,
) -> Self {
KnownRecord::LexiconsMySpoorContentExternal(Box::new(record_data.into()))
}
}
impl From<crate::lexicons::my::spoor::content::media::Record> for KnownRecord {
fn from(record: crate::lexicons::my::spoor::content::media::Record) -> Self {
KnownRecord::LexiconsMySpoorContentMedia(Box::new(record))
}
}
impl From<crate::lexicons::my::spoor::content::media::RecordData> for KnownRecord {
fn from(
record_data: crate::lexicons::my::spoor::content::media::RecordData,
) -> Self {
KnownRecord::LexiconsMySpoorContentMedia(Box::new(record_data.into()))
}
}
impl From<crate::lexicons::my::spoor::log::activity::Record> for KnownRecord {
fn from(record: crate::lexicons::my::spoor::log::activity::Record) -> Self {
KnownRecord::LexiconsMySpoorLogActivity(Box::new(record))
}
}
impl From<crate::lexicons::my::spoor::log::activity::RecordData> for KnownRecord {
fn from(record_data: crate::lexicons::my::spoor::log::activity::RecordData) -> Self {
KnownRecord::LexiconsMySpoorLogActivity(Box::new(record_data.into()))
}
}
impl From<crate::lexicons::my::spoor::log::session::Record> for KnownRecord {
fn from(record: crate::lexicons::my::spoor::log::session::Record) -> Self {
KnownRecord::LexiconsMySpoorLogSession(Box::new(record))
}
}
impl From<crate::lexicons::my::spoor::log::session::RecordData> for KnownRecord {
fn from(record_data: crate::lexicons::my::spoor::log::session::RecordData) -> Self {
KnownRecord::LexiconsMySpoorLogSession(Box::new(record_data.into()))
}
}
impl Into<atrium_api::types::Unknown> for KnownRecord {
fn into(self) -> atrium_api::types::Unknown {
atrium_api::types::TryIntoUnknown::try_into_unknown(&self).unwrap()
}
}

60
atproto/src/lib.rs Normal file
View file

@ -0,0 +1,60 @@
use lazy_regex::regex_captures;
use core::str::FromStr;
pub use atrium_api::types::{
Collection,
string::{
Nsid,
RecordKey,
AtIdentifier as Authority,
}
};
pub mod lexicons;
pub struct Uri {
whole: String,
// These fields could be useful in the future,
// so I'm leaving the code for them.
// authority: Authority,
// collection: Option<Nsid>,
// rkey: Option<RecordKey>,
}
impl FromStr for Uri {
type Err = &'static str;
fn from_str(uri: &str) -> Result<Self, Self::Err> {
if uri.len() > 8000 {
return Err("Uri too long")
}
let Some((
whole, unchecked_authority, unchecked_collection, unchecked_rkey
)) = regex_captures!(
r"/^at:\/\/([\w\.\-_~:]+)(?:\/([\w\.\-_~:]+)(?:)\/([\w\.\-_~:]+))?$/i",
uri,
) else {
return Err("Invalid Uri");
};
// This parsing is required, but the values don't need to be used yet.
// No compute cost to use them, just storage cost
let _authority = Authority::from_str(unchecked_authority)?;
let _collection = if unchecked_collection.is_empty() { None }
else { Some(Nsid::new(unchecked_collection.to_string())?) };
let _rkey = if unchecked_rkey.is_empty() { None }
else { Some(RecordKey::new(unchecked_rkey.to_string())?) };
// Ok(Uri{ whole: whole.to_string(), authority, collection, rkey })
Ok(Uri { whole: whole.to_string() })
}
}
impl Uri {
pub fn as_str(&self) -> &str {
self.whole.as_str()
}
}