From b1e598cb17d1fee36a2c0c633ad251c66cb7a9e2 Mon Sep 17 00:00:00 2001 From: Magnus Hoff Date: Fri, 19 Jan 2018 16:49:15 +0100 Subject: [PATCH] Experimentally transform markdown for better presentation in full text search results For issue #37 --- assets/search.js | 4 ++-- assets/style.css | 3 --- build.rs | 16 +++++++++++-- .../down.sql | 0 .../up.sql | 24 +++++++++++++++++++ src/db.rs | 19 +++++++++++++-- src/rendering.rs | 20 ++++++++++++++++ src/resources/search_resource.rs | 2 +- src/state.rs | 2 +- templates/search.html | 2 +- 10 files changed, 80 insertions(+), 12 deletions(-) create mode 100644 migrations/20180119150706_convert_markdown_for_fts/down.sql create mode 100644 migrations/20180119150706_convert_markdown_for_fts/up.sql diff --git a/assets/search.js b/assets/search.js index 0bc9997..f5d45ed 100644 --- a/assets/search.js +++ b/assets/search.js @@ -37,7 +37,7 @@ function debouncer(interval, callback) { const query = input.value; fetch( - "_search?snippet_size=4&limit=4&q=" + encodeURIComponent(query), + "_search?snippet_size=10&limit=4&q=" + encodeURIComponent(query), { headers: { "Accept": "application/json", @@ -56,7 +56,7 @@ function debouncer(interval, callback) { item.querySelector('.link').href = hit.slug || "."; item.querySelector('.link').setAttribute("data-focusindex", index + 1); item.querySelector('.title').textContent = hit.title; - item.querySelector('.snippet').textContent = hit.snippet; + item.querySelector('.snippet').innerHTML = hit.snippet; results.appendChild(item); }) diff --git a/assets/style.css b/assets/style.css index 336982a..00210ba 100644 --- a/assets/style.css +++ b/assets/style.css @@ -305,9 +305,6 @@ article ul.search-results { .search-result .title { font-weight: bold; } -.snippet { - white-space: pre-line; -} .search-result p { margin: 0; } diff --git a/build.rs b/build.rs index 3d92390..c460933 100644 --- a/build.rs +++ b/build.rs @@ -11,6 +11,11 @@ use std::io::prelude::*; use std::path::Path; use walkdir::WalkDir; +use std::ffi::CString; +fn markdown_to_fts(_: &::diesel::sqlite::Context) -> CString { + panic!("Should never be called when running migrations on build.db") +} + fn main() { let out_dir = env::var("OUT_DIR").expect("cargo must set OUT_DIR"); let db_path = Path::new(&out_dir).join("build.db"); @@ -18,14 +23,21 @@ fn main() { let _ignore_failure = std::fs::remove_file(db_path); - let connection = SqliteConnection::establish(db_path) + let mut connection = SqliteConnection::establish(db_path) .expect(&format!("Error esablishing a database connection to {}", db_path)); // Integer is a dummy placeholder. Compiling fails when passing (). - diesel::expression::sql_literal::sql::<(diesel::types::Integer)>("PRAGMA foreign_keys = ON") + diesel::expression::sql_literal::sql::<(diesel::sql_types::Integer)>("PRAGMA foreign_keys = ON") .execute(&connection) .expect("Should be able to enable foreign keys"); + connection.create_scalar_function( + "markdown_to_fts", + 1, + true, + markdown_to_fts, + ).unwrap(); + diesel_migrations::run_pending_migrations(&connection).unwrap(); let infer_schema_path = Path::new(&out_dir).join("infer_schema.rs"); diff --git a/migrations/20180119150706_convert_markdown_for_fts/down.sql b/migrations/20180119150706_convert_markdown_for_fts/down.sql new file mode 100644 index 0000000..e69de29 diff --git a/migrations/20180119150706_convert_markdown_for_fts/up.sql b/migrations/20180119150706_convert_markdown_for_fts/up.sql new file mode 100644 index 0000000..8c9f4d9 --- /dev/null +++ b/migrations/20180119150706_convert_markdown_for_fts/up.sql @@ -0,0 +1,24 @@ +DROP TRIGGER article_revisions_ai; +DROP TRIGGER article_revisions_ad; +DROP TRIGGER article_revisions_au_disable; +DROP TRIGGER article_revisions_au_enable; + +CREATE TRIGGER article_revisions_ai AFTER INSERT ON article_revisions WHEN new.latest = 1 BEGIN + DELETE FROM article_search WHERE rowid = new.article_id; + INSERT INTO article_search(rowid, title, body, slug) VALUES (new.article_id, new.title, markdown_to_fts(new.body), new.slug); +END; +CREATE TRIGGER article_revisions_ad AFTER DELETE ON article_revisions WHEN old.latest = 1 BEGIN + DELETE FROM article_search WHERE rowid = old.article_id; +END; + +-- Index unique_latest_revision_per_article_id makes sure the following is sufficient: +CREATE TRIGGER article_revisions_au_disable AFTER UPDATE ON article_revisions WHEN old.latest = 1 AND new.latest = 0 BEGIN + DELETE FROM article_search WHERE rowid = old.article_id; +END; +CREATE TRIGGER article_revisions_au_enable AFTER UPDATE ON article_revisions WHEN old.latest = 0 AND new.latest = 1 BEGIN + INSERT INTO article_search(rowid, title, body, slug) VALUES (new.article_id, new.title, markdown_to_fts(new.body), new.slug); +END; + +DELETE FROM article_search; +INSERT INTO article_search(title, body, slug) + SELECT title, markdown_to_fts(body), slug FROM article_revisions WHERE latest = 1; diff --git a/src/db.rs b/src/db.rs index e4cdc4d..a1654f3 100644 --- a/src/db.rs +++ b/src/db.rs @@ -9,12 +9,27 @@ embed_migrations!(); #[derive(Debug)] struct SqliteInitializer; +use std::ffi::CString; + +fn markdown_to_fts(ctx: &::diesel::sqlite::Context) -> CString { + use rendering; + CString::new(rendering::render_markdown_for_fts(&ctx.get::(0))).unwrap() +} + impl CustomizeConnection for SqliteInitializer { fn on_acquire(&self, conn: &mut SqliteConnection) -> Result<(), r2d2_diesel::Error> { sql::<(Integer)>("PRAGMA foreign_keys = ON") .execute(conn) - .and(Ok(())) - .map_err(|x| r2d2_diesel::Error::QueryError(x)) + .map_err(|x| r2d2_diesel::Error::QueryError(x))?; + + conn.create_scalar_function( + "markdown_to_fts", + 1, + true, + markdown_to_fts, + ).map_err(|x| r2d2_diesel::Error::QueryError(x))?; + + Ok(()) } } diff --git a/src/rendering.rs b/src/rendering.rs index 2662d5a..dca1ad8 100644 --- a/src/rendering.rs +++ b/src/rendering.rs @@ -1,4 +1,5 @@ use pulldown_cmark::{Parser, html, OPTION_ENABLE_TABLES, OPTION_DISABLE_HTML}; +use pulldown_cmark::Event::Text; pub fn render_markdown(src: &str) -> String { let opts = OPTION_ENABLE_TABLES | OPTION_DISABLE_HTML; @@ -7,3 +8,22 @@ pub fn render_markdown(src: &str) -> String { html::push_html(&mut buf, p); buf } + +pub fn render_markdown_for_fts(src: &str) -> String { + let opts = OPTION_ENABLE_TABLES | OPTION_DISABLE_HTML; + let p = Parser::new_ext(src, opts); + let mut buf = String::new(); + + for event in p { + match event { + Text(text) => buf.push_str(&text), + _ => buf.push_str(" "), + } + } + + buf.replace('&', ""); + buf.replace('<', ""); + buf.replace('>', ""); + + buf +} diff --git a/src/resources/search_resource.rs b/src/resources/search_resource.rs index c64a962..e0ea905 100644 --- a/src/resources/search_resource.rs +++ b/src/resources/search_resource.rs @@ -12,7 +12,7 @@ use state::State; use web::{Resource, ResponseFuture}; const DEFAULT_LIMIT: u32 = 10; -const DEFAULT_SNIPPET_SIZE: u32 = 8; +const DEFAULT_SNIPPET_SIZE: u32 = 25; type BoxResource = Box; diff --git a/src/state.rs b/src/state.rs index de740a9..1930fb1 100644 --- a/src/state.rs +++ b/src/state.rs @@ -386,7 +386,7 @@ impl<'a> SyncState<'a> { Ok( sql_query( - "SELECT title, snippet(article_search, 1, '', '', '\u{2026}', ?) AS snippet, slug \ + "SELECT title, snippet(article_search, 1, '', '', '\u{2026}', ?) AS snippet, slug \ FROM article_search \ WHERE article_search MATCH ? \ ORDER BY rank \ diff --git a/templates/search.html b/templates/search.html index f882ed4..4fbd9fc 100644 --- a/templates/search.html +++ b/templates/search.html @@ -13,7 +13,7 @@ {{/hits}}