improvement: better memory usage and admin commands to analyze it

This commit is contained in:
Timo Kösters 2023-07-03 19:37:54 +02:00
parent bac13d08ae
commit a2c3256ced
No known key found for this signature in database
GPG key ID: 0B25E636FBA7E4CB
10 changed files with 517 additions and 331 deletions

View file

@ -105,7 +105,7 @@ async-trait = "0.1.68"
sd-notify = { version = "0.4.1", optional = true }
[features]
default = ["conduit_bin", "backend_sqlite", "backend_rocksdb", "jemalloc", "systemd"]
default = ["conduit_bin", "backend_sqlite", "backend_rocksdb", "systemd"]
#backend_sled = ["sled"]
backend_persy = ["persy", "parking_lot"]
backend_sqlite = ["sqlite"]

View file

@ -653,6 +653,11 @@ async fn load_joined_room(
.user
.get_token_shortstatehash(&room_id, since)?;
let (heroes, joined_member_count, invited_member_count, joined_since_last_sync, state_events) =
if timeline_pdus.is_empty() && since_shortstatehash == Some(current_shortstatehash) {
// No state changes
(Vec::new(), None, None, false, Vec::new())
} else {
// Calculates joined_member_count, invited_member_count and heroes
let calculate_counts = || {
let joined_member_count = services()
@ -680,20 +685,24 @@ async fn load_joined_room(
.filter_map(|pdu| pdu.ok()) // Ignore all broken pdus
.filter(|(_, pdu)| pdu.kind == TimelineEventType::RoomMember)
.map(|(_, pdu)| {
let content: RoomMemberEventContent = serde_json::from_str(pdu.content.get())
.map_err(|_| {
let content: RoomMemberEventContent =
serde_json::from_str(pdu.content.get()).map_err(|_| {
Error::bad_database("Invalid member event in database.")
})?;
if let Some(state_key) = &pdu.state_key {
let user_id = UserId::parse(state_key.clone())
.map_err(|_| Error::bad_database("Invalid UserId in member PDU."))?;
let user_id = UserId::parse(state_key.clone()).map_err(|_| {
Error::bad_database("Invalid UserId in member PDU.")
})?;
// The membership was and still is invite or join
if matches!(
content.membership,
MembershipState::Join | MembershipState::Invite
) && (services().rooms.state_cache.is_joined(&user_id, &room_id)?
) && (services()
.rooms
.state_cache
.is_joined(&user_id, &room_id)?
|| services()
.rooms
.state_cache
@ -746,10 +755,9 @@ async fn load_joined_room(
.ok()
});
let joined_since_last_sync =
since_sender_member.map_or(true, |member| member.membership != MembershipState::Join);
let joined_since_last_sync = since_sender_member
.map_or(true, |member| member.membership != MembershipState::Join);
let (heroes, joined_member_count, invited_member_count, joined_since_last_sync, state_events) =
if since_shortstatehash.is_none() || joined_since_last_sync {
// Probably since = 0, we will do an initial sync
@ -836,9 +844,6 @@ async fn load_joined_room(
true,
state_events,
)
} else if timeline_pdus.is_empty() && since_shortstatehash == Some(current_shortstatehash) {
// No state changes
(Vec::new(), None, None, false, Vec::new())
} else {
// Incremental /sync
let since_shortstatehash = since_shortstatehash.unwrap();
@ -945,8 +950,9 @@ async fn load_joined_room(
}
if let Some(state_key) = &state_event.state_key {
let user_id = UserId::parse(state_key.clone())
.map_err(|_| Error::bad_database("Invalid UserId in member PDU."))?;
let user_id = UserId::parse(state_key.clone()).map_err(|_| {
Error::bad_database("Invalid UserId in member PDU.")
})?;
if user_id == sender_user {
continue;
@ -989,7 +995,8 @@ async fn load_joined_room(
})
.filter(|user_id| {
// Only send keys if the sender doesn't share an encrypted room with the target already
!share_encrypted_room(&sender_user, user_id, &room_id).unwrap_or(false)
!share_encrypted_room(&sender_user, user_id, &room_id)
.unwrap_or(false)
}),
);
}
@ -1007,6 +1014,7 @@ async fn load_joined_room(
joined_since_last_sync,
state_events,
)
}
};
// Look for device list updates in this room

View file

@ -224,7 +224,7 @@ fn default_database_backend() -> String {
}
fn default_db_cache_capacity_mb() -> f64 {
1000.0
300.0
}
fn default_conduit_cache_capacity_modifier() -> f64 {

View file

@ -38,6 +38,7 @@ pub trait KeyValueDatabaseEngine: Send + Sync {
fn memory_usage(&self) -> Result<String> {
Ok("Current database engine does not support memory usage reporting.".to_owned())
}
fn clear_caches(&self) {}
}
pub trait KvTree: Send + Sync {

View file

@ -45,6 +45,10 @@ fn db_options(max_open_files: i32, rocksdb_cache: &rocksdb::Cache) -> rocksdb::O
db_opts.set_compaction_style(rocksdb::DBCompactionStyle::Level);
db_opts.optimize_level_style_compaction(10 * 1024 * 1024);
// https://github.com/facebook/rocksdb/wiki/Setup-Options-and-Basic-Tuning
db_opts.set_max_background_jobs(6);
db_opts.set_bytes_per_sync(1048576);
let prefix_extractor = rocksdb::SliceTransform::create_fixed_prefix(1);
db_opts.set_prefix_extractor(prefix_extractor);
@ -121,6 +125,8 @@ impl KeyValueDatabaseEngine for Arc<Engine> {
self.cache.get_pinned_usage() as f64 / 1024.0 / 1024.0,
))
}
fn clear_caches(&self) {}
}
impl RocksDbEngineTree<'_> {

View file

@ -118,8 +118,59 @@ impl service::globals::Data for KeyValueDatabase {
self._db.cleanup()
}
fn memory_usage(&self) -> Result<String> {
self._db.memory_usage()
fn memory_usage(&self) -> String {
let pdu_cache = self.pdu_cache.lock().unwrap().len();
let shorteventid_cache = self.shorteventid_cache.lock().unwrap().len();
let auth_chain_cache = self.auth_chain_cache.lock().unwrap().len();
let eventidshort_cache = self.eventidshort_cache.lock().unwrap().len();
let statekeyshort_cache = self.statekeyshort_cache.lock().unwrap().len();
let our_real_users_cache = self.our_real_users_cache.read().unwrap().len();
let appservice_in_room_cache = self.appservice_in_room_cache.read().unwrap().len();
let lasttimelinecount_cache = self.lasttimelinecount_cache.lock().unwrap().len();
let mut response = format!(
"\
pdu_cache: {pdu_cache}
shorteventid_cache: {shorteventid_cache}
auth_chain_cache: {auth_chain_cache}
eventidshort_cache: {eventidshort_cache}
statekeyshort_cache: {statekeyshort_cache}
our_real_users_cache: {our_real_users_cache}
appservice_in_room_cache: {appservice_in_room_cache}
lasttimelinecount_cache: {lasttimelinecount_cache}\n"
);
if let Ok(db_stats) = self._db.memory_usage() {
response += &db_stats;
}
response
}
fn clear_caches(&self, amount: u32) {
if amount > 0 {
self.pdu_cache.lock().unwrap().clear();
}
if amount > 1 {
self.shorteventid_cache.lock().unwrap().clear();
}
if amount > 2 {
self.auth_chain_cache.lock().unwrap().clear();
}
if amount > 3 {
self.eventidshort_cache.lock().unwrap().clear();
}
if amount > 4 {
self.statekeyshort_cache.lock().unwrap().clear();
}
if amount > 5 {
self.our_real_users_cache.write().unwrap().clear();
}
if amount > 6 {
self.appservice_in_room_cache.write().unwrap().clear();
}
if amount > 7 {
self.lasttimelinecount_cache.lock().unwrap().clear();
}
}
fn load_keypair(&self) -> Result<Ed25519KeyPair> {

View file

@ -134,7 +134,13 @@ enum AdminCommand {
},
/// Print database memory usage statistics
DatabaseMemoryUsage,
MemoryUsage,
/// Clears all of Conduit's database caches with index smaller than the amount
ClearDatabaseCaches { amount: u32 },
/// Clears all of Conduit's service caches with index smaller than the amount
ClearServiceCaches { amount: u32 },
/// Show configuration values
ShowConfig,
@ -531,12 +537,24 @@ impl Service {
None => RoomMessageEventContent::text_plain("PDU not found."),
}
}
AdminCommand::DatabaseMemoryUsage => match services().globals.db.memory_usage() {
Ok(response) => RoomMessageEventContent::text_plain(response),
Err(e) => RoomMessageEventContent::text_plain(format!(
"Failed to get database memory usage: {e}"
)),
},
AdminCommand::MemoryUsage => {
let response1 = services().memory_usage();
let response2 = services().globals.db.memory_usage();
RoomMessageEventContent::text_plain(format!(
"Services:\n{response1}\n\nDatabase:\n{response2}"
))
}
AdminCommand::ClearDatabaseCaches { amount } => {
services().globals.db.clear_caches(amount);
RoomMessageEventContent::text_plain("Done.")
}
AdminCommand::ClearServiceCaches { amount } => {
services().clear_caches(amount);
RoomMessageEventContent::text_plain("Done.")
}
AdminCommand::ShowConfig => {
// Construct and send the response
RoomMessageEventContent::text_plain(format!("{}", services().globals.config))

View file

@ -15,7 +15,8 @@ pub trait Data: Send + Sync {
fn current_count(&self) -> Result<u64>;
async fn watch(&self, user_id: &UserId, device_id: &DeviceId) -> Result<()>;
fn cleanup(&self) -> Result<()>;
fn memory_usage(&self) -> Result<String>;
fn memory_usage(&self) -> String;
fn clear_caches(&self, amount: u32);
fn load_keypair(&self) -> Result<Ed25519KeyPair>;
fn remove_keypair(&self) -> Result<()>;
fn add_signing_key(

View file

@ -214,10 +214,6 @@ impl Service {
self.db.cleanup()
}
pub fn memory_usage(&self) -> Result<String> {
self.db.memory_usage()
}
pub fn server_name(&self) -> &ServerName {
self.config.server_name.as_ref()
}

View file

@ -90,7 +90,7 @@ impl Services {
state_compressor: rooms::state_compressor::Service {
db,
stateinfo_cache: Mutex::new(LruCache::new(
(300.0 * config.conduit_cache_capacity_modifier) as usize,
(100.0 * config.conduit_cache_capacity_modifier) as usize,
)),
},
timeline: rooms::timeline::Service {
@ -115,4 +115,109 @@ impl Services {
globals: globals::Service::load(db, config)?,
})
}
fn memory_usage(&self) -> String {
let lazy_load_waiting = self
.rooms
.lazy_loading
.lazy_load_waiting
.lock()
.unwrap()
.len();
let server_visibility_cache = self
.rooms
.state_accessor
.server_visibility_cache
.lock()
.unwrap()
.len();
let user_visibility_cache = self
.rooms
.state_accessor
.user_visibility_cache
.lock()
.unwrap()
.len();
let stateinfo_cache = self
.rooms
.state_compressor
.stateinfo_cache
.lock()
.unwrap()
.len();
let lasttimelinecount_cache = self
.rooms
.timeline
.lasttimelinecount_cache
.lock()
.unwrap()
.len();
let roomid_spacechunk_cache = self
.rooms
.spaces
.roomid_spacechunk_cache
.lock()
.unwrap()
.len();
format!(
"\
lazy_load_waiting: {lazy_load_waiting}
server_visibility_cache: {server_visibility_cache}
user_visibility_cache: {user_visibility_cache}
stateinfo_cache: {stateinfo_cache}
lasttimelinecount_cache: {lasttimelinecount_cache}
roomid_spacechunk_cache: {roomid_spacechunk_cache}\
"
)
}
fn clear_caches(&self, amount: u32) {
if amount > 0 {
self.rooms
.lazy_loading
.lazy_load_waiting
.lock()
.unwrap()
.clear();
}
if amount > 1 {
self.rooms
.state_accessor
.server_visibility_cache
.lock()
.unwrap()
.clear();
}
if amount > 2 {
self.rooms
.state_accessor
.user_visibility_cache
.lock()
.unwrap()
.clear();
}
if amount > 3 {
self.rooms
.state_compressor
.stateinfo_cache
.lock()
.unwrap()
.clear();
}
if amount > 4 {
self.rooms
.timeline
.lasttimelinecount_cache
.lock()
.unwrap()
.clear();
}
if amount > 5 {
self.rooms
.spaces
.roomid_spacechunk_cache
.lock()
.unwrap()
.clear();
}
}
}