fix: refactor backfill and add support for search

This commit is contained in:
Timo Kösters 2023-02-21 16:38:50 +01:00
parent 17a6431f5f
commit eae0989c40
No known key found for this signature in database
GPG key ID: 0B25E636FBA7E4CB
5 changed files with 157 additions and 204 deletions

View file

@ -81,6 +81,14 @@ pub async fn search_events_route(
let results: Vec<_> = results let results: Vec<_> = results
.iter() .iter()
.filter_map(|result| {
services()
.rooms
.timeline
.get_pdu_from_id(result)
.ok()?
.map(|pdu| pdu.to_room_event())
})
.map(|result| { .map(|result| {
Ok::<_, Error>(SearchResult { Ok::<_, Error>(SearchResult {
context: EventContextResult { context: EventContextResult {
@ -91,11 +99,7 @@ pub async fn search_events_route(
start: None, start: None,
}, },
rank: None, rank: None,
result: services() result: Some(result),
.rooms
.timeline
.get_pdu_from_id(result)?
.map(|pdu| pdu.to_room_event()),
}) })
}) })
.filter_map(|r| r.ok()) .filter_map(|r| r.ok())

View file

@ -1,5 +1,3 @@
use std::mem::size_of;
use ruma::RoomId; use ruma::RoomId;
use crate::{database::KeyValueDatabase, service, services, utils, Result}; use crate::{database::KeyValueDatabase, service, services, utils, Result};
@ -15,7 +13,7 @@ impl service::rooms::search::Data for KeyValueDatabase {
let mut key = shortroomid.to_be_bytes().to_vec(); let mut key = shortroomid.to_be_bytes().to_vec();
key.extend_from_slice(word.as_bytes()); key.extend_from_slice(word.as_bytes());
key.push(0xff); key.push(0xff);
key.extend_from_slice(pdu_id); key.extend_from_slice(pdu_id); // TODO: currently we save the room id a second time here
(key, Vec::new()) (key, Vec::new())
}); });
@ -34,7 +32,6 @@ impl service::rooms::search::Data for KeyValueDatabase {
.expect("room exists") .expect("room exists")
.to_be_bytes() .to_be_bytes()
.to_vec(); .to_vec();
let prefix_clone = prefix.clone();
let words: Vec<_> = search_string let words: Vec<_> = search_string
.split_terminator(|c: char| !c.is_alphanumeric()) .split_terminator(|c: char| !c.is_alphanumeric())
@ -46,6 +43,7 @@ impl service::rooms::search::Data for KeyValueDatabase {
let mut prefix2 = prefix.clone(); let mut prefix2 = prefix.clone();
prefix2.extend_from_slice(word.as_bytes()); prefix2.extend_from_slice(word.as_bytes());
prefix2.push(0xff); prefix2.push(0xff);
let prefix3 = prefix2.clone();
let mut last_possible_id = prefix2.clone(); let mut last_possible_id = prefix2.clone();
last_possible_id.extend_from_slice(&u64::MAX.to_be_bytes()); last_possible_id.extend_from_slice(&u64::MAX.to_be_bytes());
@ -53,7 +51,7 @@ impl service::rooms::search::Data for KeyValueDatabase {
self.tokenids self.tokenids
.iter_from(&last_possible_id, true) // Newest pdus first .iter_from(&last_possible_id, true) // Newest pdus first
.take_while(move |(k, _)| k.starts_with(&prefix2)) .take_while(move |(k, _)| k.starts_with(&prefix2))
.map(|(key, _)| key[key.len() - size_of::<u64>()..].to_vec()) .map(move |(key, _)| key[prefix3.len()..].to_vec())
}); });
let common_elements = match utils::common_elements(iterators, |a, b| { let common_elements = match utils::common_elements(iterators, |a, b| {
@ -64,12 +62,6 @@ impl service::rooms::search::Data for KeyValueDatabase {
None => return Ok(None), None => return Ok(None),
}; };
let mapped = common_elements.map(move |id| { Ok(Some((Box::new(common_elements), words)))
let mut pduid = prefix_clone.clone();
pduid.extend_from_slice(&id);
pduid
});
Ok(Some((Box::new(mapped), words)))
} }
} }

View file

@ -42,19 +42,8 @@ impl service::rooms::timeline::Data for KeyValueDatabase {
Ok(self Ok(self
.eventid_pduid .eventid_pduid
.get(event_id.as_bytes())? .get(event_id.as_bytes())?
.map(|pdu_id| Ok::<_, Error>(PduCount::Normal(pdu_count(&pdu_id)?))) .map(|pdu_id| pdu_count(&pdu_id))
.transpose()? .transpose()?)
.map_or_else(
|| {
Ok::<_, Error>(
self.eventid_backfillpduid
.get(event_id.as_bytes())?
.map(|pdu_id| Ok::<_, Error>(PduCount::Backfilled(pdu_count(&pdu_id)?)))
.transpose()?,
)
},
|x| Ok(Some(x)),
)?)
} }
/// Returns the json of a pdu. /// Returns the json of a pdu.
@ -83,21 +72,6 @@ impl service::rooms::timeline::Data for KeyValueDatabase {
.ok_or_else(|| Error::bad_database("Invalid pduid in eventid_pduid.")) .ok_or_else(|| Error::bad_database("Invalid pduid in eventid_pduid."))
}) })
.transpose()? .transpose()?
.map_or_else(
|| {
Ok::<_, Error>(
self.eventid_backfillpduid
.get(event_id.as_bytes())?
.map(|pduid| {
self.pduid_backfillpdu.get(&pduid)?.ok_or_else(|| {
Error::bad_database("Invalid pduid in eventid_pduid.")
})
})
.transpose()?,
)
},
|x| Ok(Some(x)),
)?
.map(|pdu| { .map(|pdu| {
serde_json::from_slice(&pdu).map_err(|_| Error::bad_database("Invalid PDU in db.")) serde_json::from_slice(&pdu).map_err(|_| Error::bad_database("Invalid PDU in db."))
}) })
@ -106,10 +80,7 @@ impl service::rooms::timeline::Data for KeyValueDatabase {
/// Returns the pdu's id. /// Returns the pdu's id.
fn get_pdu_id(&self, event_id: &EventId) -> Result<Option<Vec<u8>>> { fn get_pdu_id(&self, event_id: &EventId) -> Result<Option<Vec<u8>>> {
Ok(self.eventid_pduid.get(event_id.as_bytes())?.map_or_else( Ok(self.eventid_pduid.get(event_id.as_bytes())?)
|| self.eventid_backfillpduid.get(event_id.as_bytes()),
|x| Ok(Some(x)),
)?)
} }
/// Returns the pdu. /// Returns the pdu.
@ -124,21 +95,6 @@ impl service::rooms::timeline::Data for KeyValueDatabase {
.ok_or_else(|| Error::bad_database("Invalid pduid in eventid_pduid.")) .ok_or_else(|| Error::bad_database("Invalid pduid in eventid_pduid."))
}) })
.transpose()? .transpose()?
.map_or_else(
|| {
Ok::<_, Error>(
self.eventid_backfillpduid
.get(event_id.as_bytes())?
.map(|pduid| {
self.pduid_backfillpdu.get(&pduid)?.ok_or_else(|| {
Error::bad_database("Invalid pduid in eventid_pduid.")
})
})
.transpose()?,
)
},
|x| Ok(Some(x)),
)?
.map(|pdu| { .map(|pdu| {
serde_json::from_slice(&pdu).map_err(|_| Error::bad_database("Invalid PDU in db.")) serde_json::from_slice(&pdu).map_err(|_| Error::bad_database("Invalid PDU in db."))
}) })
@ -183,28 +139,22 @@ impl service::rooms::timeline::Data for KeyValueDatabase {
/// ///
/// This does __NOT__ check the outliers `Tree`. /// This does __NOT__ check the outliers `Tree`.
fn get_pdu_from_id(&self, pdu_id: &[u8]) -> Result<Option<PduEvent>> { fn get_pdu_from_id(&self, pdu_id: &[u8]) -> Result<Option<PduEvent>> {
self.pduid_pdu self.pduid_pdu.get(pdu_id)?.map_or(Ok(None), |pdu| {
.get(pdu_id)? Ok(Some(
.map_or_else(|| self.pduid_backfillpdu.get(pdu_id), |x| Ok(Some(x)))? serde_json::from_slice(&pdu)
.map_or(Ok(None), |pdu| { .map_err(|_| Error::bad_database("Invalid PDU in db."))?,
Ok(Some( ))
serde_json::from_slice(&pdu) })
.map_err(|_| Error::bad_database("Invalid PDU in db."))?,
))
})
} }
/// Returns the pdu as a `BTreeMap<String, CanonicalJsonValue>`. /// Returns the pdu as a `BTreeMap<String, CanonicalJsonValue>`.
fn get_pdu_json_from_id(&self, pdu_id: &[u8]) -> Result<Option<CanonicalJsonObject>> { fn get_pdu_json_from_id(&self, pdu_id: &[u8]) -> Result<Option<CanonicalJsonObject>> {
self.pduid_pdu self.pduid_pdu.get(pdu_id)?.map_or(Ok(None), |pdu| {
.get(pdu_id)? Ok(Some(
.map_or_else(|| self.pduid_backfillpdu.get(pdu_id), |x| Ok(Some(x)))? serde_json::from_slice(&pdu)
.map_or(Ok(None), |pdu| { .map_err(|_| Error::bad_database("Invalid PDU in db."))?,
Ok(Some( ))
serde_json::from_slice(&pdu) })
.map_err(|_| Error::bad_database("Invalid PDU in db."))?,
))
})
} }
fn append_pdu( fn append_pdu(
@ -236,13 +186,12 @@ impl service::rooms::timeline::Data for KeyValueDatabase {
event_id: &EventId, event_id: &EventId,
json: &CanonicalJsonObject, json: &CanonicalJsonObject,
) -> Result<()> { ) -> Result<()> {
self.pduid_backfillpdu.insert( self.pduid_pdu.insert(
pdu_id, pdu_id,
&serde_json::to_vec(json).expect("CanonicalJsonObject is always a valid"), &serde_json::to_vec(json).expect("CanonicalJsonObject is always a valid"),
)?; )?;
self.eventid_backfillpduid self.eventid_pduid.insert(event_id.as_bytes(), pdu_id)?;
.insert(event_id.as_bytes(), pdu_id)?;
self.eventid_outlierpdu.remove(event_id.as_bytes())?; self.eventid_outlierpdu.remove(event_id.as_bytes())?;
Ok(()) Ok(())
@ -272,64 +221,24 @@ impl service::rooms::timeline::Data for KeyValueDatabase {
room_id: &RoomId, room_id: &RoomId,
until: PduCount, until: PduCount,
) -> Result<Box<dyn Iterator<Item = Result<(PduCount, PduEvent)>> + 'a>> { ) -> Result<Box<dyn Iterator<Item = Result<(PduCount, PduEvent)>> + 'a>> {
// Create the first part of the full pdu id let (prefix, current) = count_to_id(&room_id, until, 1, true)?;
let prefix = services()
.rooms
.short
.get_shortroomid(room_id)?
.expect("room exists")
.to_be_bytes()
.to_vec();
let mut current_backfill = prefix.clone();
// +1 so we don't send the base event
let backfill_count = match until {
PduCount::Backfilled(x) => x + 1,
PduCount::Normal(_) => 0,
};
current_backfill.extend_from_slice(&backfill_count.to_be_bytes());
let user_id = user_id.to_owned(); let user_id = user_id.to_owned();
let user_id2 = user_id.to_owned();
let prefix2 = prefix.clone();
let backfill_iter = self Ok(Box::new(
.pduid_backfillpdu self.pduid_pdu
.iter_from(&current_backfill, false) .iter_from(&current, true)
.take_while(move |(k, _)| k.starts_with(&prefix)) .take_while(move |(k, _)| k.starts_with(&prefix))
.map(move |(pdu_id, v)| { .map(move |(pdu_id, v)| {
let mut pdu = serde_json::from_slice::<PduEvent>(&v) let mut pdu = serde_json::from_slice::<PduEvent>(&v)
.map_err(|_| Error::bad_database("PDU in db is invalid."))?; .map_err(|_| Error::bad_database("PDU in db is invalid."))?;
if pdu.sender != user_id { if pdu.sender != user_id {
pdu.remove_transaction_id()?; pdu.remove_transaction_id()?;
} }
let count = PduCount::Backfilled(pdu_count(&pdu_id)?); let count = pdu_count(&pdu_id)?;
Ok((count, pdu)) Ok((count, pdu))
}); }),
))
match until {
PduCount::Backfilled(_) => Ok(Box::new(backfill_iter)),
PduCount::Normal(x) => {
let mut current_normal = prefix2.clone();
// -1 so we don't send the base event
current_normal.extend_from_slice(&x.saturating_sub(1).to_be_bytes());
let normal_iter = self
.pduid_pdu
.iter_from(&current_normal, true)
.take_while(move |(k, _)| k.starts_with(&prefix2))
.map(move |(pdu_id, v)| {
let mut pdu = serde_json::from_slice::<PduEvent>(&v)
.map_err(|_| Error::bad_database("PDU in db is invalid."))?;
if pdu.sender != user_id2 {
pdu.remove_transaction_id()?;
}
let count = PduCount::Normal(pdu_count(&pdu_id)?);
Ok((count, pdu))
});
Ok(Box::new(normal_iter.chain(backfill_iter)))
}
}
} }
fn pdus_after<'a>( fn pdus_after<'a>(
@ -338,64 +247,24 @@ impl service::rooms::timeline::Data for KeyValueDatabase {
room_id: &RoomId, room_id: &RoomId,
from: PduCount, from: PduCount,
) -> Result<Box<dyn Iterator<Item = Result<(PduCount, PduEvent)>> + 'a>> { ) -> Result<Box<dyn Iterator<Item = Result<(PduCount, PduEvent)>> + 'a>> {
// Create the first part of the full pdu id let (prefix, current) = count_to_id(&room_id, from, 1, false)?;
let prefix = services()
.rooms
.short
.get_shortroomid(room_id)?
.expect("room exists")
.to_be_bytes()
.to_vec();
let mut current_normal = prefix.clone();
// +1 so we don't send the base event
let normal_count = match from {
PduCount::Normal(x) => x + 1,
PduCount::Backfilled(_) => 0,
};
current_normal.extend_from_slice(&normal_count.to_be_bytes());
let user_id = user_id.to_owned(); let user_id = user_id.to_owned();
let user_id2 = user_id.to_owned();
let prefix2 = prefix.clone();
let normal_iter = self Ok(Box::new(
.pduid_pdu self.pduid_pdu
.iter_from(&current_normal, false) .iter_from(&current, false)
.take_while(move |(k, _)| k.starts_with(&prefix)) .take_while(move |(k, _)| k.starts_with(&prefix))
.map(move |(pdu_id, v)| { .map(move |(pdu_id, v)| {
let mut pdu = serde_json::from_slice::<PduEvent>(&v) let mut pdu = serde_json::from_slice::<PduEvent>(&v)
.map_err(|_| Error::bad_database("PDU in db is invalid."))?; .map_err(|_| Error::bad_database("PDU in db is invalid."))?;
if pdu.sender != user_id { if pdu.sender != user_id {
pdu.remove_transaction_id()?; pdu.remove_transaction_id()?;
} }
let count = PduCount::Normal(pdu_count(&pdu_id)?); let count = pdu_count(&pdu_id)?;
Ok((count, pdu)) Ok((count, pdu))
}); }),
))
match from {
PduCount::Normal(_) => Ok(Box::new(normal_iter)),
PduCount::Backfilled(x) => {
let mut current_backfill = prefix2.clone();
// -1 so we don't send the base event
current_backfill.extend_from_slice(&x.saturating_sub(1).to_be_bytes());
let backfill_iter = self
.pduid_backfillpdu
.iter_from(&current_backfill, true)
.take_while(move |(k, _)| k.starts_with(&prefix2))
.map(move |(pdu_id, v)| {
let mut pdu = serde_json::from_slice::<PduEvent>(&v)
.map_err(|_| Error::bad_database("PDU in db is invalid."))?;
if pdu.sender != user_id2 {
pdu.remove_transaction_id()?;
}
let count = PduCount::Backfilled(pdu_count(&pdu_id)?);
Ok((count, pdu))
});
Ok(Box::new(backfill_iter.chain(normal_iter)))
}
}
} }
fn increment_notification_counts( fn increment_notification_counts(
@ -428,7 +297,58 @@ impl service::rooms::timeline::Data for KeyValueDatabase {
} }
/// Returns the `count` of this pdu's id. /// Returns the `count` of this pdu's id.
fn pdu_count(pdu_id: &[u8]) -> Result<u64> { fn pdu_count(pdu_id: &[u8]) -> Result<PduCount> {
utils::u64_from_bytes(&pdu_id[pdu_id.len() - size_of::<u64>()..]) let last_u64 = utils::u64_from_bytes(&pdu_id[pdu_id.len() - size_of::<u64>()..])
.map_err(|_| Error::bad_database("PDU has invalid count bytes.")) .map_err(|_| Error::bad_database("PDU has invalid count bytes."))?;
let second_last_u64 = utils::u64_from_bytes(
&pdu_id[pdu_id.len() - 2 * size_of::<u64>()..pdu_id.len() - size_of::<u64>()],
);
if matches!(second_last_u64, Ok(0)) {
Ok(PduCount::Backfilled(u64::MAX - last_u64))
} else {
Ok(PduCount::Normal(last_u64))
}
}
fn count_to_id(
room_id: &RoomId,
count: PduCount,
offset: u64,
subtract: bool,
) -> Result<(Vec<u8>, Vec<u8>)> {
let prefix = services()
.rooms
.short
.get_shortroomid(room_id)?
.expect("room exists")
.to_be_bytes()
.to_vec();
let mut pdu_id = prefix.clone();
// +1 so we don't send the base event
let count_raw = match count {
PduCount::Normal(x) => {
if subtract {
x - offset
} else {
x + offset
}
}
PduCount::Backfilled(x) => {
pdu_id.extend_from_slice(&0_u64.to_be_bytes());
let num = u64::MAX - x;
if subtract {
if num > 0 {
num - offset
} else {
num
}
} else {
num + offset
}
}
};
pdu_id.extend_from_slice(&count_raw.to_be_bytes());
Ok((prefix, pdu_id))
} }

View file

@ -74,9 +74,7 @@ pub struct KeyValueDatabase {
//pub rooms: rooms::Rooms, //pub rooms: rooms::Rooms,
pub(super) pduid_pdu: Arc<dyn KvTree>, // PduId = ShortRoomId + Count pub(super) pduid_pdu: Arc<dyn KvTree>, // PduId = ShortRoomId + Count
pub(super) pduid_backfillpdu: Arc<dyn KvTree>, // PduId = ShortRoomId + Count
pub(super) eventid_pduid: Arc<dyn KvTree>, pub(super) eventid_pduid: Arc<dyn KvTree>,
pub(super) eventid_backfillpduid: Arc<dyn KvTree>,
pub(super) roomid_pduleaves: Arc<dyn KvTree>, pub(super) roomid_pduleaves: Arc<dyn KvTree>,
pub(super) alias_roomid: Arc<dyn KvTree>, pub(super) alias_roomid: Arc<dyn KvTree>,
pub(super) aliasid_alias: Arc<dyn KvTree>, // AliasId = RoomId + Count pub(super) aliasid_alias: Arc<dyn KvTree>, // AliasId = RoomId + Count
@ -297,9 +295,7 @@ impl KeyValueDatabase {
presenceid_presence: builder.open_tree("presenceid_presence")?, presenceid_presence: builder.open_tree("presenceid_presence")?,
userid_lastpresenceupdate: builder.open_tree("userid_lastpresenceupdate")?, userid_lastpresenceupdate: builder.open_tree("userid_lastpresenceupdate")?,
pduid_pdu: builder.open_tree("pduid_pdu")?, pduid_pdu: builder.open_tree("pduid_pdu")?,
pduid_backfillpdu: builder.open_tree("pduid_backfillpdu")?,
eventid_pduid: builder.open_tree("eventid_pduid")?, eventid_pduid: builder.open_tree("eventid_pduid")?,
eventid_backfillpduid: builder.open_tree("eventid_backfillpduid")?,
roomid_pduleaves: builder.open_tree("roomid_pduleaves")?, roomid_pduleaves: builder.open_tree("roomid_pduleaves")?,
alias_roomid: builder.open_tree("alias_roomid")?, alias_roomid: builder.open_tree("alias_roomid")?,

View file

@ -1045,6 +1045,24 @@ impl Service {
) -> Result<()> { ) -> Result<()> {
let (event_id, value, room_id) = server_server::parse_incoming_pdu(&pdu)?; let (event_id, value, room_id) = server_server::parse_incoming_pdu(&pdu)?;
// Lock so we cannot backfill the same pdu twice at the same time
let mutex = Arc::clone(
services()
.globals
.roomid_mutex_federation
.write()
.unwrap()
.entry(room_id.to_owned())
.or_default(),
);
let mutex_lock = mutex.lock().await;
// Skip the PDU if we already have it as a timeline event
if let Some(pdu_id) = services().rooms.timeline.get_pdu_id(&event_id)? {
info!("We already know {event_id} at {pdu_id:?}");
return Ok(());
}
services() services()
.rooms .rooms
.event_handler .event_handler
@ -1052,6 +1070,7 @@ impl Service {
.await?; .await?;
let value = self.get_pdu_json(&event_id)?.expect("We just created it"); let value = self.get_pdu_json(&event_id)?.expect("We just created it");
let pdu = self.get_pdu(&event_id)?.expect("We just created it");
let shortroomid = services() let shortroomid = services()
.rooms .rooms
@ -1072,14 +1091,36 @@ impl Service {
let count = services().globals.next_count()?; let count = services().globals.next_count()?;
let mut pdu_id = shortroomid.to_be_bytes().to_vec(); let mut pdu_id = shortroomid.to_be_bytes().to_vec();
pdu_id.extend_from_slice(&count.to_be_bytes()); pdu_id.extend_from_slice(&0_u64.to_be_bytes());
pdu_id.extend_from_slice(&(u64::MAX - count).to_be_bytes());
// Insert pdu // Insert pdu
self.db.prepend_backfill_pdu(&pdu_id, &event_id, &value)?; self.db.prepend_backfill_pdu(&pdu_id, &event_id, &value)?;
drop(insert_lock); drop(insert_lock);
info!("Appended incoming pdu"); match pdu.kind {
RoomEventType::RoomMessage => {
#[derive(Deserialize)]
struct ExtractBody {
body: Option<String>,
}
let content = serde_json::from_str::<ExtractBody>(pdu.content.get())
.map_err(|_| Error::bad_database("Invalid content in pdu."))?;
if let Some(body) = content.body {
services()
.rooms
.search
.index_pdu(shortroomid, &pdu_id, &body)?;
}
}
_ => {}
}
drop(mutex_lock);
info!("Prepended backfill pdu");
Ok(()) Ok(())
} }
} }