From e2fe1c50b8eb8ec70a2f3f74c5c601c8f41d9e99 Mon Sep 17 00:00:00 2001 From: daimond113 <72147841+daimond113@users.noreply.github.com> Date: Wed, 6 Nov 2024 21:22:57 +0100 Subject: [PATCH] fix(registry): ignore search query casing --- registry/src/endpoints/search.rs | 13 +------------ registry/src/main.rs | 4 +++- registry/src/search.rs | 17 ++++++++++++++--- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/registry/src/endpoints/search.rs b/registry/src/endpoints/search.rs index 7e331be..3fb6f07 100644 --- a/registry/src/endpoints/search.rs +++ b/registry/src/endpoints/search.rs @@ -28,23 +28,12 @@ pub async fn search_packages( let id = schema.get_field("id").unwrap(); - let scope = schema.get_field("scope").unwrap(); - let name = schema.get_field("name").unwrap(); - let description = schema.get_field("description").unwrap(); - let query = request.query.as_deref().unwrap_or_default().trim(); let query = if query.is_empty() { Box::new(AllQuery) } else { - let mut query_parser = tantivy::query::QueryParser::for_index( - searcher.index(), - vec![scope, name, description], - ); - query_parser.set_field_boost(scope, 2.0); - query_parser.set_field_boost(name, 3.5); - - query_parser.parse_query(query)? + app_state.query_parser.parse_query(query)? }; let (count, top_docs) = searcher diff --git a/registry/src/main.rs b/registry/src/main.rs index 984e501..a9bebac 100644 --- a/registry/src/main.rs +++ b/registry/src/main.rs @@ -45,6 +45,7 @@ pub struct AppState { pub search_reader: tantivy::IndexReader, pub search_writer: std::sync::Mutex, + pub query_parser: tantivy::query::QueryParser, } #[macro_export] @@ -105,7 +106,7 @@ async fn run() -> std::io::Result<()> { .await .expect("failed to refresh source"); - let (search_reader, search_writer) = make_search(&project, &source); + let (search_reader, search_writer, query_parser) = make_search(&project, &source); let app_data = web::Data::new(AppState { storage: { @@ -124,6 +125,7 @@ async fn run() -> std::io::Result<()> { search_reader, search_writer: std::sync::Mutex::new(search_writer), + query_parser, }); let publish_governor_config = GovernorConfigBuilder::default() diff --git a/registry/src/search.rs b/registry/src/search.rs index 322f689..008013a 100644 --- a/registry/src/search.rs +++ b/registry/src/search.rs @@ -6,11 +6,16 @@ use pesde::{ }; use tantivy::{ doc, + query::QueryParser, schema::{IndexRecordOption, TextFieldIndexing, TextOptions, FAST, STORED, STRING}, + tokenizer::TextAnalyzer, DateTime, IndexReader, IndexWriter, Term, }; -pub fn make_search(project: &Project, source: &PesdePackageSource) -> (IndexReader, IndexWriter) { +pub fn make_search( + project: &Project, + source: &PesdePackageSource, +) -> (IndexReader, IndexWriter, QueryParser) { let mut schema_builder = tantivy::schema::SchemaBuilder::new(); let field_options = TextOptions::default().set_indexing_options( @@ -28,7 +33,9 @@ pub fn make_search(project: &Project, source: &PesdePackageSource) -> (IndexRead let search_index = tantivy::Index::create_in_ram(schema_builder.build()); search_index.tokenizers().register( "ngram", - tantivy::tokenizer::NgramTokenizer::all_ngrams(1, 12).unwrap(), + TextAnalyzer::builder(tantivy::tokenizer::NgramTokenizer::all_ngrams(1, 12).unwrap()) + .filter(tantivy::tokenizer::LowerCaser) + .build(), ); let search_reader = search_index @@ -56,7 +63,11 @@ pub fn make_search(project: &Project, source: &PesdePackageSource) -> (IndexRead search_writer.commit().unwrap(); search_reader.reload().unwrap(); - (search_reader, search_writer) + let mut query_parser = QueryParser::for_index(&search_index, vec![scope, name, description]); + query_parser.set_field_boost(scope, 2.0); + query_parser.set_field_boost(name, 3.5); + + (search_reader, search_writer, query_parser) } pub fn update_version(app_state: &AppState, name: &PackageName, entry: IndexFileEntry) {