diff --git a/backend/src/main/java/com/openisle/search/SearchIndexInitializer.java b/backend/src/main/java/com/openisle/search/SearchIndexInitializer.java index 23f30e408..4a069a9b0 100644 --- a/backend/src/main/java/com/openisle/search/SearchIndexInitializer.java +++ b/backend/src/main/java/com/openisle/search/SearchIndexInitializer.java @@ -2,11 +2,16 @@ package com.openisle.search; import jakarta.annotation.PostConstruct; import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.opensearch.client.json.JsonData; import org.opensearch.client.opensearch.OpenSearchClient; import org.opensearch.client.opensearch._types.mapping.Property; import org.opensearch.client.opensearch._types.mapping.TypeMapping; +import org.opensearch.client.opensearch.indices.IndexSettings; @Slf4j @RequiredArgsConstructor @@ -36,7 +41,11 @@ public class SearchIndexInitializer { if (exists) { return; } - client.indices().create(builder -> builder.index(index).mappings(mappingSupplier.get())); + client + .indices() + .create(builder -> + builder.index(index).settings(this::applyPinyinAnalysis).mappings(mappingSupplier.get()) + ); log.info("Created OpenSearch index {}", index); } catch (IOException e) { log.warn("Failed to initialize OpenSearch index {}", index, e); @@ -47,11 +56,11 @@ public class SearchIndexInitializer { return TypeMapping.of(builder -> builder .properties("type", Property.of(p -> p.keyword(k -> k))) - .properties("title", Property.of(p -> p.text(t -> t))) - .properties("content", Property.of(p -> p.text(t -> t))) - .properties("author", Property.of(p -> p.keyword(k -> k))) - .properties("category", Property.of(p -> p.keyword(k -> k))) - .properties("tags", Property.of(p -> p.keyword(k -> k))) + .properties("title", textWithPinyin()) + .properties("content", textWithPinyin()) + .properties("author", keywordWithPinyin()) + .properties("category", keywordWithPinyin()) + .properties("tags", keywordWithPinyin()) .properties("postId", Property.of(p -> p.long_(l -> l))) .properties( "createdAt", @@ -64,11 +73,11 @@ public class SearchIndexInitializer { return TypeMapping.of(builder -> builder .properties("type", Property.of(p -> p.keyword(k -> k))) - .properties("title", Property.of(p -> p.text(t -> t))) - .properties("content", Property.of(p -> p.text(t -> t))) - .properties("author", Property.of(p -> p.keyword(k -> k))) - .properties("category", Property.of(p -> p.keyword(k -> k))) - .properties("tags", Property.of(p -> p.keyword(k -> k))) + .properties("title", textWithPinyin()) + .properties("content", textWithPinyin()) + .properties("author", keywordWithPinyin()) + .properties("category", keywordWithPinyin()) + .properties("tags", keywordWithPinyin()) .properties("postId", Property.of(p -> p.long_(l -> l))) .properties( "createdAt", @@ -81,8 +90,8 @@ public class SearchIndexInitializer { return TypeMapping.of(builder -> builder .properties("type", Property.of(p -> p.keyword(k -> k))) - .properties("title", Property.of(p -> p.text(t -> t))) - .properties("content", Property.of(p -> p.text(t -> t))) + .properties("title", textWithPinyin()) + .properties("content", textWithPinyin()) .properties( "createdAt", Property.of(p -> p.date(d -> d.format("strict_date_optional_time||epoch_millis"))) @@ -94,8 +103,8 @@ public class SearchIndexInitializer { return TypeMapping.of(builder -> builder .properties("type", Property.of(p -> p.keyword(k -> k))) - .properties("title", Property.of(p -> p.text(t -> t))) - .properties("content", Property.of(p -> p.text(t -> t))) + .properties("title", textWithPinyin()) + .properties("content", textWithPinyin()) ); } @@ -103,12 +112,55 @@ public class SearchIndexInitializer { return TypeMapping.of(builder -> builder .properties("type", Property.of(p -> p.keyword(k -> k))) - .properties("title", Property.of(p -> p.text(t -> t))) - .properties("content", Property.of(p -> p.text(t -> t))) + .properties("title", textWithPinyin()) + .properties("content", textWithPinyin()) .properties( "createdAt", Property.of(p -> p.date(d -> d.format("strict_date_optional_time||epoch_millis"))) ) ); } + + private Property textWithPinyin() { + return Property.of(p -> + p.text(t -> + t.fields("py", field -> + field.text(sub -> sub.analyzer("py_index").searchAnalyzer("py_search")) + ) + ) + ); + } + + private Property keywordWithPinyin() { + return Property.of(p -> + p.keyword(k -> + k.fields("py", field -> + field.text(sub -> sub.analyzer("py_index").searchAnalyzer("py_search")) + ) + ) + ); + } + + private IndexSettings.Builder applyPinyinAnalysis(IndexSettings.Builder builder) { + Map settings = new LinkedHashMap<>(); + settings.put("analysis.filter.py_filter.type", JsonData.of("pinyin")); + settings.put("analysis.filter.py_filter.keep_full_pinyin", JsonData.of(true)); + settings.put("analysis.filter.py_filter.keep_joined_full_pinyin", JsonData.of(true)); + settings.put("analysis.filter.py_filter.keep_first_letter", JsonData.of(true)); + settings.put("analysis.filter.py_filter.remove_duplicated_term", JsonData.of(true)); + settings.put("analysis.analyzer.py_index.type", JsonData.of("custom")); + settings.put("analysis.analyzer.py_index.tokenizer", JsonData.of("standard")); + settings.put( + "analysis.analyzer.py_index.filter", + JsonData.of(List.of("lowercase", "py_filter")) + ); + settings.put("analysis.analyzer.py_search.type", JsonData.of("custom")); + settings.put("analysis.analyzer.py_search.tokenizer", JsonData.of("standard")); + settings.put( + "analysis.analyzer.py_search.filter", + JsonData.of(List.of("lowercase", "py_filter")) + ); + settings.forEach(builder::customSettings); + return builder; + } } diff --git a/backend/src/main/java/com/openisle/service/SearchService.java b/backend/src/main/java/com/openisle/service/SearchService.java index 417100895..8e1ead2c5 100644 --- a/backend/src/main/java/com/openisle/service/SearchService.java +++ b/backend/src/main/java/com/openisle/service/SearchService.java @@ -197,7 +197,7 @@ public class SearchService { s.multiMatch(mm -> mm .query(qRaw) - .fields("title^3", "content^2") + .fields("title^3", "title.py^3", "content^2", "content.py^2") .type(TextQueryType.BestFields) .fuzziness("AUTO") .minimumShouldMatch("70%") @@ -210,7 +210,17 @@ public class SearchService { bool.should(s -> s.queryString(qs -> qs - .query("(title:" + qsEscaped + "* OR content:" + qsEscaped + "*)") + .query( + "(title:" + + qsEscaped + + "* OR title.py:" + + qsEscaped + + "* OR content:" + + qsEscaped + + "* OR content.py:" + + qsEscaped + + "*)" + ) .analyzeWildcard(true) ) ); @@ -226,6 +236,30 @@ public class SearchService { .boost(2.0f) ) ); + bool.should(s -> + s.match(m -> + m + .field("author.py") + .query(v -> v.stringValue(qRaw)) + .boost(2.0f) + ) + ); + bool.should(s -> + s.match(m -> + m + .field("category.py") + .query(v -> v.stringValue(qRaw)) + .boost(1.2f) + ) + ); + bool.should(s -> + s.match(m -> + m + .field("tags.py") + .query(v -> v.stringValue(qRaw)) + .boost(1.2f) + ) + ); if (enableWildcard) { // prefix/wildcard 这里的 value 是 String,直接传即可