feat: rss细化

This commit is contained in:
tim
2025-08-18 19:43:34 +08:00
parent 42693cb1ff
commit 1b31977ec6
3 changed files with 260 additions and 29 deletions

View File

@@ -3,13 +3,27 @@ package com.openisle.controller;
import com.openisle.model.Post;
import com.openisle.service.PostService;
import lombok.RequiredArgsConstructor;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.safety.Safelist;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RestController;
import com.vladsch.flexmark.ext.autolink.AutolinkExtension;
import com.vladsch.flexmark.ext.tables.TablesExtension;
import com.vladsch.flexmark.ext.gfm.strikethrough.StrikethroughExtension;
import com.vladsch.flexmark.ext.gfm.tasklist.TaskListExtension;
import com.vladsch.flexmark.html.HtmlRenderer;
import com.vladsch.flexmark.parser.Parser;
import com.vladsch.flexmark.util.data.MutableDataSet;
import java.net.URI;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.List;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -21,58 +35,248 @@ public class RssController {
@Value("${app.website-url:https://www.open-isle.com}")
private String websiteUrl;
// 兼容 Markdown/HTML 两类图片写法(用于 enclosure
private static final Pattern MD_IMAGE = Pattern.compile("!\\[[^\\]]*\\]\\(([^)]+)\\)");
private static final Pattern HTML_IMAGE = Pattern.compile("<img[^>]+src=[\"']?([^\"'>]+)[\"']?[^>]*>");
private static final DateTimeFormatter RFC1123 = DateTimeFormatter.RFC_1123_DATE_TIME;
// flexmarkMarkdown -> HTML
private static final Parser MD_PARSER;
private static final HtmlRenderer MD_RENDERER;
static {
MutableDataSet opts = new MutableDataSet();
opts.set(Parser.EXTENSIONS, Arrays.asList(
TablesExtension.create(),
AutolinkExtension.create(),
StrikethroughExtension.create(),
TaskListExtension.create()
));
// 允许内联 HTML下游再做 sanitize
opts.set(Parser.HTML_BLOCK_PARSER, true);
MD_PARSER = Parser.builder(opts).build();
MD_RENDERER = HtmlRenderer.builder(opts).escapeHtml(false).build();
}
@GetMapping(value = "/api/rss", produces = "application/rss+xml;charset=UTF-8")
public String feed() {
// 建议 20你现在是 10这里保留你的 10
List<Post> posts = postService.listLatestRssPosts(10);
StringBuilder sb = new StringBuilder();
String base = trimTrailingSlash(websiteUrl);
StringBuilder sb = new StringBuilder(4096);
sb.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
sb.append("<rss version=\"2.0\">");
sb.append("<rss version=\"2.0\" xmlns:content=\"http://purl.org/rss/1.0/modules/content/\">");
sb.append("<channel>");
sb.append("<title>OpenIsle RSS</title>");
sb.append("<link>").append(websiteUrl).append("</link>");
sb.append("<description>Latest posts</description>");
DateTimeFormatter fmt = DateTimeFormatter.RFC_1123_DATE_TIME;
elem(sb, "title", cdata("OpenIsle RSS"));
elem(sb, "link", base + "/");
elem(sb, "description", cdata("Latest posts"));
ZonedDateTime updated = posts.stream()
.map(p -> p.getCreatedAt().atZone(ZoneId.systemDefault()))
.max(Comparator.naturalOrder())
.orElse(ZonedDateTime.now());
// channel lastBuildDateGMT
elem(sb, "lastBuildDate", toRfc1123Gmt(updated));
for (Post p : posts) {
String link = websiteUrl + "/posts/" + p.getId();
String link = base + "/posts/" + p.getId();
// 1) Markdown -> HTML
String html = renderMarkdown(p.getContent());
// 2) Sanitize白名单增强
String safeHtml = sanitizeHtml(html);
// 3) 绝对化 href/src + 强制 rel/target
String absHtml = absolutifyHtml(safeHtml, base);
// 4) 纯文本摘要(用于 <description>
String plain = textSummary(absHtml, 180);
// 5) enclosure首图已绝对化
String enclosure = firstImage(p.getContent());
if (enclosure == null) {
// 如果 Markdown 没有图,尝试从渲染后的 HTML 再抓一次
enclosure = firstImage(absHtml);
}
if (enclosure != null) {
enclosure = absolutifyUrl(enclosure, base);
}
sb.append("<item>");
sb.append("<title><![CDATA[").append(p.getTitle()).append("]]></title>");
sb.append("<link>").append(link).append("</link>");
elem(sb, "title", cdata(nullSafe(p.getTitle())));
elem(sb, "link", link);
sb.append("<guid isPermaLink=\"true\">").append(link).append("</guid>");
sb.append("<pubDate>")
.append(p.getCreatedAt().atZone(ZoneId.systemDefault()).format(fmt))
.append("</pubDate>");
String desc = p.getContent() + "\n<p>更多细节请访问原文:" + link + "</p>";
sb.append("<description><![CDATA[").append(desc).append("]]></description>");
String img = firstImage(p.getContent());
if (img != null) {
sb.append("<enclosure url=\"").append(img).append("\" type=\"")
.append(getMimeType(img)).append("\" />");
elem(sb, "pubDate", toRfc1123Gmt(p.getCreatedAt().atZone(ZoneId.systemDefault())));
// 摘要
elem(sb, "description", cdata(plain));
// 全文HTML
sb.append("<content:encoded><![CDATA[").append(absHtml).append("]]></content:encoded>");
// 首图 enclosure图片类型
if (enclosure != null) {
sb.append("<enclosure url=\"").append(escapeXml(enclosure)).append("\" type=\"")
.append(getMimeType(enclosure)).append("\" />");
}
sb.append("</item>");
}
sb.append("</channel></rss>");
return sb.toString();
}
/* ===================== Markdown → HTML ===================== */
private static String renderMarkdown(String md) {
if (md == null || md.isEmpty()) return "";
return MD_RENDERER.render(MD_PARSER.parse(md));
}
/* ===================== Sanitize & 绝对化 ===================== */
private static String sanitizeHtml(String html) {
if (html == null) return "";
Safelist wl = Safelist.relaxed()
.addTags("pre", "code", "figure", "figcaption", "picture", "source",
"table","thead","tbody","tr","th","td","h1","h2","h3","h4","h5","h6")
.addAttributes("a", "href", "title", "target", "rel")
.addAttributes("img", "src", "alt", "title", "width", "height")
.addAttributes("source", "srcset", "type", "media")
.addAttributes("code", "class")
.addAttributes("pre", "class")
.addProtocols("a", "href", "http", "https", "mailto")
.addProtocols("img", "src", "http", "https", "data")
.addProtocols("source", "srcset", "http", "https");
// 清除所有 on* 事件、style避免阅读器环境差异
return Jsoup.clean(html, wl);
}
private static String absolutifyHtml(String html, String baseUrl) {
if (html == null || html.isEmpty()) return "";
Document doc = Jsoup.parseBodyFragment(html, baseUrl);
// a[href]
for (Element a : doc.select("a[href]")) {
String href = a.attr("href");
String abs = absolutifyUrl(href, baseUrl);
a.attr("href", abs);
// 强制外链安全属性
a.attr("rel", "noopener noreferrer nofollow");
a.attr("target", "_blank");
}
// img[src]
for (Element img : doc.select("img[src]")) {
String src = img.attr("src");
String abs = absolutifyUrl(src, baseUrl);
img.attr("src", abs);
}
// source[srcset] picture/webp
for (Element s : doc.select("source[srcset]")) {
String srcset = s.attr("srcset");
s.attr("srcset", absolutifySrcset(srcset, baseUrl));
}
return doc.body().html();
}
private static String absolutifyUrl(String url, String baseUrl) {
if (url == null || url.isEmpty()) return url;
String u = url.trim();
if (u.startsWith("//")) {
return "https:" + u;
}
if (u.startsWith("#")) {
// 保留页面内锚点:拼接到首页(也可拼接到当前帖子的 link但此处无上下文
return baseUrl + "/" + u;
}
try {
URI base = URI.create(ensureTrailingSlash(baseUrl));
URI abs = base.resolve(u);
return abs.toString();
} catch (Exception e) {
return url;
}
}
private static String absolutifySrcset(String srcset, String baseUrl) {
if (srcset == null || srcset.isEmpty()) return srcset;
String[] parts = srcset.split(",");
List<String> out = new ArrayList<>(parts.length);
for (String part : parts) {
String p = part.trim();
if (p.isEmpty()) continue;
String[] seg = p.split("\\s+");
String url = seg[0];
String size = seg.length > 1 ? seg[1] : "";
out.add(absolutifyUrl(url, baseUrl) + (size.isEmpty() ? "" : " " + size));
}
return String.join(", ", out);
}
/* ===================== 摘要 & enclosure ===================== */
private static String textSummary(String html, int maxLen) {
if (html == null) return "";
String text = Jsoup.parse(html).text().replaceAll("\\s+", " ").trim();
if (text.length() <= maxLen) return text;
return text.substring(0, maxLen) + "";
}
private String firstImage(String content) {
if (content == null) return null;
Matcher m = MD_IMAGE.matcher(content);
if (m.find()) {
return m.group(1);
}
if (m.find()) return m.group(1);
m = HTML_IMAGE.matcher(content);
if (m.find()) {
return m.group(1);
}
if (m.find()) return m.group(1);
// 再从纯 HTML 里解析一次(如果传入的是渲染后的)
try {
Document doc = Jsoup.parse(content);
Element img = doc.selectFirst("img[src]");
if (img != null) return img.attr("src");
} catch (Exception ignored) {}
return null;
}
private String getMimeType(String url) {
String lower = url.toLowerCase();
if (lower.endsWith(".png")) return "image/png";
if (lower.endsWith(".gif")) return "image/gif";
private static String getMimeType(String url) {
String lower = url == null ? "" : url.toLowerCase(Locale.ROOT);
if (lower.endsWith(".png")) return "image/png";
if (lower.endsWith(".gif")) return "image/gif";
if (lower.endsWith(".webp")) return "image/webp";
if (lower.endsWith(".svg")) return "image/svg+xml";
if (lower.endsWith(".avif")) return "image/avif";
if (lower.endsWith(".jpg") || lower.endsWith(".jpeg")) return "image/jpeg";
// 默认兜底
return "image/jpeg";
}
/* ===================== 时间/字符串/XML ===================== */
private static String toRfc1123Gmt(ZonedDateTime zdt) {
return zdt.withZoneSameInstant(ZoneId.of("GMT")).format(RFC1123);
}
private static String cdata(String s) {
if (s == null) return "<![CDATA[]]>";
// 防止出现 "]]>" 终止标记破坏 CDATA
return "<![CDATA[" + s.replace("]]>", "]]]]><![CDATA[>") + "]]>";
}
private static void elem(StringBuilder sb, String name, String value) {
sb.append('<').append(name).append('>').append(value).append("</").append(name).append('>');
}
private static String escapeXml(String s) {
if (s == null) return "";
return s.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
.replace("\"", "&quot;").replace("'", "&apos;");
}
private static String trimTrailingSlash(String s) {
if (s == null) return "";
return s.endsWith("/") ? s.substring(0, s.length() - 1) : s;
}
private static String ensureTrailingSlash(String s) {
if (s == null || s.isEmpty()) return "/";
return s.endsWith("/") ? s : s + "/";
}
private static String nullSafe(String s) { return s == null ? "" : s; }
}