mirror of
https://github.com/nagisa77/OpenIsle.git
synced 2026-02-18 13:01:02 +08:00
fix: add pinyin
This commit is contained in:
@@ -2,11 +2,16 @@ package com.openisle.search;
|
||||
|
||||
import jakarta.annotation.PostConstruct;
|
||||
import java.io.IOException;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.opensearch.client.json.JsonData;
|
||||
import org.opensearch.client.opensearch.OpenSearchClient;
|
||||
import org.opensearch.client.opensearch._types.mapping.Property;
|
||||
import org.opensearch.client.opensearch._types.mapping.TypeMapping;
|
||||
import org.opensearch.client.opensearch.indices.IndexSettings;
|
||||
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor
|
||||
@@ -36,7 +41,11 @@ public class SearchIndexInitializer {
|
||||
if (exists) {
|
||||
return;
|
||||
}
|
||||
client.indices().create(builder -> builder.index(index).mappings(mappingSupplier.get()));
|
||||
client
|
||||
.indices()
|
||||
.create(builder ->
|
||||
builder.index(index).settings(this::applyPinyinAnalysis).mappings(mappingSupplier.get())
|
||||
);
|
||||
log.info("Created OpenSearch index {}", index);
|
||||
} catch (IOException e) {
|
||||
log.warn("Failed to initialize OpenSearch index {}", index, e);
|
||||
@@ -47,11 +56,11 @@ public class SearchIndexInitializer {
|
||||
return TypeMapping.of(builder ->
|
||||
builder
|
||||
.properties("type", Property.of(p -> p.keyword(k -> k)))
|
||||
.properties("title", Property.of(p -> p.text(t -> t)))
|
||||
.properties("content", Property.of(p -> p.text(t -> t)))
|
||||
.properties("author", Property.of(p -> p.keyword(k -> k)))
|
||||
.properties("category", Property.of(p -> p.keyword(k -> k)))
|
||||
.properties("tags", Property.of(p -> p.keyword(k -> k)))
|
||||
.properties("title", textWithPinyin())
|
||||
.properties("content", textWithPinyin())
|
||||
.properties("author", keywordWithPinyin())
|
||||
.properties("category", keywordWithPinyin())
|
||||
.properties("tags", keywordWithPinyin())
|
||||
.properties("postId", Property.of(p -> p.long_(l -> l)))
|
||||
.properties(
|
||||
"createdAt",
|
||||
@@ -64,11 +73,11 @@ public class SearchIndexInitializer {
|
||||
return TypeMapping.of(builder ->
|
||||
builder
|
||||
.properties("type", Property.of(p -> p.keyword(k -> k)))
|
||||
.properties("title", Property.of(p -> p.text(t -> t)))
|
||||
.properties("content", Property.of(p -> p.text(t -> t)))
|
||||
.properties("author", Property.of(p -> p.keyword(k -> k)))
|
||||
.properties("category", Property.of(p -> p.keyword(k -> k)))
|
||||
.properties("tags", Property.of(p -> p.keyword(k -> k)))
|
||||
.properties("title", textWithPinyin())
|
||||
.properties("content", textWithPinyin())
|
||||
.properties("author", keywordWithPinyin())
|
||||
.properties("category", keywordWithPinyin())
|
||||
.properties("tags", keywordWithPinyin())
|
||||
.properties("postId", Property.of(p -> p.long_(l -> l)))
|
||||
.properties(
|
||||
"createdAt",
|
||||
@@ -81,8 +90,8 @@ public class SearchIndexInitializer {
|
||||
return TypeMapping.of(builder ->
|
||||
builder
|
||||
.properties("type", Property.of(p -> p.keyword(k -> k)))
|
||||
.properties("title", Property.of(p -> p.text(t -> t)))
|
||||
.properties("content", Property.of(p -> p.text(t -> t)))
|
||||
.properties("title", textWithPinyin())
|
||||
.properties("content", textWithPinyin())
|
||||
.properties(
|
||||
"createdAt",
|
||||
Property.of(p -> p.date(d -> d.format("strict_date_optional_time||epoch_millis")))
|
||||
@@ -94,8 +103,8 @@ public class SearchIndexInitializer {
|
||||
return TypeMapping.of(builder ->
|
||||
builder
|
||||
.properties("type", Property.of(p -> p.keyword(k -> k)))
|
||||
.properties("title", Property.of(p -> p.text(t -> t)))
|
||||
.properties("content", Property.of(p -> p.text(t -> t)))
|
||||
.properties("title", textWithPinyin())
|
||||
.properties("content", textWithPinyin())
|
||||
);
|
||||
}
|
||||
|
||||
@@ -103,12 +112,55 @@ public class SearchIndexInitializer {
|
||||
return TypeMapping.of(builder ->
|
||||
builder
|
||||
.properties("type", Property.of(p -> p.keyword(k -> k)))
|
||||
.properties("title", Property.of(p -> p.text(t -> t)))
|
||||
.properties("content", Property.of(p -> p.text(t -> t)))
|
||||
.properties("title", textWithPinyin())
|
||||
.properties("content", textWithPinyin())
|
||||
.properties(
|
||||
"createdAt",
|
||||
Property.of(p -> p.date(d -> d.format("strict_date_optional_time||epoch_millis")))
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
private Property textWithPinyin() {
|
||||
return Property.of(p ->
|
||||
p.text(t ->
|
||||
t.fields("py", field ->
|
||||
field.text(sub -> sub.analyzer("py_index").searchAnalyzer("py_search"))
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
private Property keywordWithPinyin() {
|
||||
return Property.of(p ->
|
||||
p.keyword(k ->
|
||||
k.fields("py", field ->
|
||||
field.text(sub -> sub.analyzer("py_index").searchAnalyzer("py_search"))
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
private IndexSettings.Builder applyPinyinAnalysis(IndexSettings.Builder builder) {
|
||||
Map<String, JsonData> settings = new LinkedHashMap<>();
|
||||
settings.put("analysis.filter.py_filter.type", JsonData.of("pinyin"));
|
||||
settings.put("analysis.filter.py_filter.keep_full_pinyin", JsonData.of(true));
|
||||
settings.put("analysis.filter.py_filter.keep_joined_full_pinyin", JsonData.of(true));
|
||||
settings.put("analysis.filter.py_filter.keep_first_letter", JsonData.of(true));
|
||||
settings.put("analysis.filter.py_filter.remove_duplicated_term", JsonData.of(true));
|
||||
settings.put("analysis.analyzer.py_index.type", JsonData.of("custom"));
|
||||
settings.put("analysis.analyzer.py_index.tokenizer", JsonData.of("standard"));
|
||||
settings.put(
|
||||
"analysis.analyzer.py_index.filter",
|
||||
JsonData.of(List.of("lowercase", "py_filter"))
|
||||
);
|
||||
settings.put("analysis.analyzer.py_search.type", JsonData.of("custom"));
|
||||
settings.put("analysis.analyzer.py_search.tokenizer", JsonData.of("standard"));
|
||||
settings.put(
|
||||
"analysis.analyzer.py_search.filter",
|
||||
JsonData.of(List.of("lowercase", "py_filter"))
|
||||
);
|
||||
settings.forEach(builder::customSettings);
|
||||
return builder;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -197,7 +197,7 @@ public class SearchService {
|
||||
s.multiMatch(mm ->
|
||||
mm
|
||||
.query(qRaw)
|
||||
.fields("title^3", "content^2")
|
||||
.fields("title^3", "title.py^3", "content^2", "content.py^2")
|
||||
.type(TextQueryType.BestFields)
|
||||
.fuzziness("AUTO")
|
||||
.minimumShouldMatch("70%")
|
||||
@@ -210,7 +210,17 @@ public class SearchService {
|
||||
bool.should(s ->
|
||||
s.queryString(qs ->
|
||||
qs
|
||||
.query("(title:" + qsEscaped + "* OR content:" + qsEscaped + "*)")
|
||||
.query(
|
||||
"(title:" +
|
||||
qsEscaped +
|
||||
"* OR title.py:" +
|
||||
qsEscaped +
|
||||
"* OR content:" +
|
||||
qsEscaped +
|
||||
"* OR content.py:" +
|
||||
qsEscaped +
|
||||
"*)"
|
||||
)
|
||||
.analyzeWildcard(true)
|
||||
)
|
||||
);
|
||||
@@ -226,6 +236,30 @@ public class SearchService {
|
||||
.boost(2.0f)
|
||||
)
|
||||
);
|
||||
bool.should(s ->
|
||||
s.match(m ->
|
||||
m
|
||||
.field("author.py")
|
||||
.query(v -> v.stringValue(qRaw))
|
||||
.boost(2.0f)
|
||||
)
|
||||
);
|
||||
bool.should(s ->
|
||||
s.match(m ->
|
||||
m
|
||||
.field("category.py")
|
||||
.query(v -> v.stringValue(qRaw))
|
||||
.boost(1.2f)
|
||||
)
|
||||
);
|
||||
bool.should(s ->
|
||||
s.match(m ->
|
||||
m
|
||||
.field("tags.py")
|
||||
.query(v -> v.stringValue(qRaw))
|
||||
.boost(1.2f)
|
||||
)
|
||||
);
|
||||
|
||||
if (enableWildcard) {
|
||||
// prefix/wildcard 这里的 value 是 String,直接传即可
|
||||
|
||||
Reference in New Issue
Block a user