mirror of
https://github.com/nagisa77/OpenIsle.git
synced 2026-02-22 15:21:21 +08:00
fix: 索引/查询规则微调
This commit is contained in:
@@ -52,15 +52,17 @@ public class SearchIndexInitializer {
|
||||
}
|
||||
}
|
||||
|
||||
// SearchIndexInitializer.java —— 只贴需要替换/新增的方法
|
||||
|
||||
private TypeMapping postMapping() {
|
||||
return TypeMapping.of(builder ->
|
||||
builder
|
||||
.properties("type", Property.of(p -> p.keyword(k -> k)))
|
||||
.properties("title", textWithPinyin())
|
||||
.properties("content", textWithPinyin())
|
||||
.properties("author", keywordWithPinyin())
|
||||
.properties("category", keywordWithPinyin())
|
||||
.properties("tags", keywordWithPinyin())
|
||||
.properties("title", textWithRawAndPinyin())
|
||||
.properties("content", textWithPinyinOnly()) // content 不做 .raw,避免超长 keyword
|
||||
.properties("author", keywordWithRawAndPinyin())
|
||||
.properties("category", keywordWithRawAndPinyin())
|
||||
.properties("tags", keywordWithRawAndPinyin())
|
||||
.properties("postId", Property.of(p -> p.long_(l -> l)))
|
||||
.properties(
|
||||
"createdAt",
|
||||
@@ -73,11 +75,11 @@ public class SearchIndexInitializer {
|
||||
return TypeMapping.of(builder ->
|
||||
builder
|
||||
.properties("type", Property.of(p -> p.keyword(k -> k)))
|
||||
.properties("title", textWithPinyin())
|
||||
.properties("content", textWithPinyin())
|
||||
.properties("author", keywordWithPinyin())
|
||||
.properties("category", keywordWithPinyin())
|
||||
.properties("tags", keywordWithPinyin())
|
||||
.properties("title", textWithRawAndPinyin())
|
||||
.properties("content", textWithPinyinOnly())
|
||||
.properties("author", keywordWithRawAndPinyin())
|
||||
.properties("category", keywordWithRawAndPinyin())
|
||||
.properties("tags", keywordWithRawAndPinyin())
|
||||
.properties("postId", Property.of(p -> p.long_(l -> l)))
|
||||
.properties(
|
||||
"createdAt",
|
||||
@@ -90,8 +92,8 @@ public class SearchIndexInitializer {
|
||||
return TypeMapping.of(builder ->
|
||||
builder
|
||||
.properties("type", Property.of(p -> p.keyword(k -> k)))
|
||||
.properties("title", textWithPinyin())
|
||||
.properties("content", textWithPinyin())
|
||||
.properties("title", textWithRawAndPinyin())
|
||||
.properties("content", textWithPinyinOnly())
|
||||
.properties(
|
||||
"createdAt",
|
||||
Property.of(p -> p.date(d -> d.format("strict_date_optional_time||epoch_millis")))
|
||||
@@ -103,8 +105,8 @@ public class SearchIndexInitializer {
|
||||
return TypeMapping.of(builder ->
|
||||
builder
|
||||
.properties("type", Property.of(p -> p.keyword(k -> k)))
|
||||
.properties("title", textWithPinyin())
|
||||
.properties("content", textWithPinyin())
|
||||
.properties("title", textWithRawAndPinyin())
|
||||
.properties("content", textWithPinyinOnly())
|
||||
);
|
||||
}
|
||||
|
||||
@@ -112,8 +114,8 @@ public class SearchIndexInitializer {
|
||||
return TypeMapping.of(builder ->
|
||||
builder
|
||||
.properties("type", Property.of(p -> p.keyword(k -> k)))
|
||||
.properties("title", textWithPinyin())
|
||||
.properties("content", textWithPinyin())
|
||||
.properties("title", textWithRawAndPinyin())
|
||||
.properties("content", textWithPinyinOnly())
|
||||
.properties(
|
||||
"createdAt",
|
||||
Property.of(p -> p.date(d -> d.format("strict_date_optional_time||epoch_millis")))
|
||||
@@ -121,45 +123,100 @@ public class SearchIndexInitializer {
|
||||
);
|
||||
}
|
||||
|
||||
private Property textWithPinyin() {
|
||||
// SearchIndexInitializer.java —— 只贴需要替换/新增的方法
|
||||
|
||||
/** 文本字段:.raw(keyword 精确) + .py(拼音短语精确) + .zh(ICU+2~3gram 召回) */
|
||||
private Property textWithRawAndPinyin() {
|
||||
return Property.of(p ->
|
||||
p.text(t ->
|
||||
t.fields("py", field ->
|
||||
field.text(sub -> sub.analyzer("py_index").searchAnalyzer("py_search"))
|
||||
)
|
||||
t
|
||||
.fields("raw", f -> f.keyword(k -> k.normalizer("lowercase_normalizer")))
|
||||
.fields("py", f -> f.text(sub -> sub.analyzer("py_index").searchAnalyzer("py_search")))
|
||||
.fields("zh", f ->
|
||||
f.text(sub -> sub.analyzer("zh_ngram_index").searchAnalyzer("zh_search"))
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
private Property keywordWithPinyin() {
|
||||
/** 长文本 content:保留拼音 + 新增 zh 子字段(不加 .raw,避免过长 keyword) */
|
||||
private Property textWithPinyinOnly() {
|
||||
return Property.of(p ->
|
||||
p.text(t ->
|
||||
t
|
||||
.fields("py", f -> f.text(sub -> sub.analyzer("py_index").searchAnalyzer("py_search")))
|
||||
.fields("zh", f ->
|
||||
f.text(sub -> sub.analyzer("zh_ngram_index").searchAnalyzer("zh_search"))
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
/** 关键词字段(author/category/tags):keyword 等值 + .py + .zh(尽量对齐标题策略) */
|
||||
private Property keywordWithRawAndPinyin() {
|
||||
return Property.of(p ->
|
||||
p.keyword(k ->
|
||||
k.fields("py", field ->
|
||||
field.text(sub -> sub.analyzer("py_index").searchAnalyzer("py_search"))
|
||||
)
|
||||
k
|
||||
.normalizer("lowercase_normalizer")
|
||||
.fields("raw", f -> f.keyword(kk -> kk.normalizer("lowercase_normalizer")))
|
||||
.fields("py", f -> f.text(sub -> sub.analyzer("py_index").searchAnalyzer("py_search")))
|
||||
.fields("zh", f ->
|
||||
f.text(sub -> sub.analyzer("zh_ngram_index").searchAnalyzer("zh_search"))
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
/** 新增 zh 分析器(ICU + 2~3gram),并保留你已有的 pinyin/normalizer 设置 */
|
||||
private IndexSettings.Builder applyPinyinAnalysis(IndexSettings.Builder builder) {
|
||||
Map<String, JsonData> settings = new LinkedHashMap<>();
|
||||
|
||||
// --- 已有:keyword normalizer(用于 .raw)
|
||||
settings.put("analysis.normalizer.lowercase_normalizer.type", JsonData.of("custom"));
|
||||
settings.put(
|
||||
"analysis.normalizer.lowercase_normalizer.filter",
|
||||
JsonData.of(List.of("lowercase"))
|
||||
);
|
||||
|
||||
// --- 已有:pinyin filter + analyzers
|
||||
settings.put("analysis.filter.py_filter.type", JsonData.of("pinyin"));
|
||||
settings.put("analysis.filter.py_filter.keep_full_pinyin", JsonData.of(true));
|
||||
settings.put("analysis.filter.py_filter.keep_joined_full_pinyin", JsonData.of(true));
|
||||
settings.put("analysis.filter.py_filter.keep_first_letter", JsonData.of(false));
|
||||
settings.put("analysis.filter.py_filter.remove_duplicated_term", JsonData.of(true));
|
||||
|
||||
settings.put("analysis.analyzer.py_index.type", JsonData.of("custom"));
|
||||
settings.put("analysis.analyzer.py_index.tokenizer", JsonData.of("standard"));
|
||||
settings.put(
|
||||
"analysis.analyzer.py_index.filter",
|
||||
JsonData.of(List.of("lowercase", "py_filter"))
|
||||
);
|
||||
|
||||
settings.put("analysis.analyzer.py_search.type", JsonData.of("custom"));
|
||||
settings.put("analysis.analyzer.py_search.tokenizer", JsonData.of("standard"));
|
||||
settings.put(
|
||||
"analysis.analyzer.py_search.filter",
|
||||
JsonData.of(List.of("lowercase", "py_filter"))
|
||||
);
|
||||
|
||||
settings.put("analysis.filter.zh_ngram_2_3.type", JsonData.of("ngram"));
|
||||
settings.put("analysis.filter.zh_ngram_2_3.min_gram", JsonData.of(2));
|
||||
settings.put("analysis.filter.zh_ngram_2_3.max_gram", JsonData.of(3));
|
||||
|
||||
settings.put("analysis.analyzer.zh_ngram_index.type", JsonData.of("custom"));
|
||||
settings.put("analysis.analyzer.zh_ngram_index.tokenizer", JsonData.of("icu_tokenizer"));
|
||||
settings.put(
|
||||
"analysis.analyzer.zh_ngram_index.filter",
|
||||
JsonData.of(List.of("lowercase", "zh_ngram_2_3"))
|
||||
);
|
||||
|
||||
settings.put("analysis.analyzer.zh_search.type", JsonData.of("custom"));
|
||||
settings.put("analysis.analyzer.zh_search.tokenizer", JsonData.of("icu_tokenizer"));
|
||||
settings.put(
|
||||
"analysis.analyzer.zh_search.filter",
|
||||
JsonData.of(List.of("lowercase", "zh_ngram_2_3"))
|
||||
);
|
||||
|
||||
settings.forEach(builder::customSettings);
|
||||
return builder;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user