From f72d5e7ba9bdf01f207ca7ddc35cc0f7f21ee6ad Mon Sep 17 00:00:00 2001 From: Tim <135014430+nagisa77@users.noreply.github.com> Date: Fri, 24 Oct 2025 17:06:16 +0800 Subject: [PATCH] feat: add MCP search service --- .gitignore | 1 + docker/docker-compose.yaml | 26 +++++ mcp/Dockerfile | 17 ++++ mcp/README.md | 39 ++++++++ mcp/pyproject.toml | 29 ++++++ mcp/src/openisle_mcp/__init__.py | 10 ++ mcp/src/openisle_mcp/client.py | 79 +++++++++++++++ mcp/src/openisle_mcp/models.py | 58 +++++++++++ mcp/src/openisle_mcp/py.typed | 0 mcp/src/openisle_mcp/server.py | 164 +++++++++++++++++++++++++++++++ 10 files changed, 423 insertions(+) create mode 100644 mcp/Dockerfile create mode 100644 mcp/README.md create mode 100644 mcp/pyproject.toml create mode 100644 mcp/src/openisle_mcp/__init__.py create mode 100644 mcp/src/openisle_mcp/client.py create mode 100644 mcp/src/openisle_mcp/models.py create mode 100644 mcp/src/openisle_mcp/py.typed create mode 100644 mcp/src/openisle_mcp/server.py diff --git a/.gitignore b/.gitignore index edacf6a72..d3db0239d 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ dist # misc .DS_Store +__pycache__/ *.pem npm-debug.log* yarn-debug.log* diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 5bf093c77..d57b6bac3 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -213,6 +213,32 @@ services: - dev_local_backend - prod + mcp-service: + build: + context: .. + dockerfile: mcp/Dockerfile + container_name: ${COMPOSE_PROJECT_NAME}-openisle-mcp + env_file: + - ${ENV_FILE:-../.env} + environment: + FASTMCP_HOST: 0.0.0.0 + FASTMCP_PORT: ${MCP_PORT:-8765} + OPENISLE_BACKEND_URL: ${OPENISLE_BACKEND_URL:-http://springboot:8080} + OPENISLE_BACKEND_TIMEOUT: ${OPENISLE_BACKEND_TIMEOUT:-10} + OPENISLE_MCP_TRANSPORT: ${OPENISLE_MCP_TRANSPORT:-sse} + OPENISLE_MCP_SSE_MOUNT_PATH: ${OPENISLE_MCP_SSE_MOUNT_PATH:-/mcp} + ports: + - "${MCP_PORT:-8765}:${MCP_PORT:-8765}" + depends_on: + springboot: + condition: service_healthy + restart: unless-stopped + networks: + - openisle-network + profiles: + - dev + - prod + frontend_dev: image: node:20 container_name: ${COMPOSE_PROJECT_NAME}-openisle-frontend-dev diff --git a/mcp/Dockerfile b/mcp/Dockerfile new file mode 100644 index 000000000..0e9f22932 --- /dev/null +++ b/mcp/Dockerfile @@ -0,0 +1,17 @@ +FROM python:3.11-slim AS runtime + +ENV PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=1 + +WORKDIR /app + +COPY mcp/pyproject.toml /app/pyproject.toml +COPY mcp/README.md /app/README.md +COPY mcp/src /app/src + +RUN pip install --upgrade pip \ + && pip install . + +EXPOSE 8765 + +CMD ["openisle-mcp"] diff --git a/mcp/README.md b/mcp/README.md new file mode 100644 index 000000000..8fa83270e --- /dev/null +++ b/mcp/README.md @@ -0,0 +1,39 @@ +# OpenIsle MCP Server + +This package provides a [Model Context Protocol](https://github.com/modelcontextprotocol) (MCP) server that exposes the OpenIsle +search capabilities to AI assistants. The server wraps the existing Spring Boot backend and currently provides a single `search` +tool. Future iterations can extend the server with additional functionality such as publishing new posts or moderating content. + +## Features + +- 🔍 **Global search** — delegates to the existing `/api/search/global` endpoint exposed by the OpenIsle backend. +- 🧠 **Structured results** — responses include highlights and deep links so AI clients can present the results cleanly. +- ⚙️ **Configurable** — point the server at any reachable OpenIsle backend by setting environment variables. + +## Local development + +```bash +cd mcp +python -m venv .venv +source .venv/bin/activate +pip install -e . +openisle-mcp --transport stdio # or "sse"/"streamable-http" +``` + +Environment variables: + +| Variable | Description | Default | +| --- | --- | --- | +| `OPENISLE_BACKEND_URL` | Base URL of the Spring Boot backend | `http://springboot:8080` | +| `OPENISLE_BACKEND_TIMEOUT` | Timeout (seconds) for backend HTTP calls | `10` | +| `OPENISLE_PUBLIC_BASE_URL` | Optional base URL used to build deep links in search results | *(unset)* | +| `OPENISLE_MCP_TRANSPORT` | MCP transport (`stdio`, `sse`, `streamable-http`) | `stdio` | +| `OPENISLE_MCP_SSE_MOUNT_PATH` | Mount path when using SSE transport | `/mcp` | +| `FASTMCP_HOST` | Host for SSE / HTTP transports | `127.0.0.1` | +| `FASTMCP_PORT` | Port for SSE / HTTP transports | `8000` | + +## Docker + +A dedicated Docker image is provided and wired into `docker-compose.yaml`. The container listens on +`${MCP_PORT:-8765}` and connects to the backend service running in the same compose stack. + diff --git a/mcp/pyproject.toml b/mcp/pyproject.toml new file mode 100644 index 000000000..75a9c46f3 --- /dev/null +++ b/mcp/pyproject.toml @@ -0,0 +1,29 @@ +[build-system] +requires = ["setuptools>=68", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "openisle-mcp" +version = "0.1.0" +description = "Model Context Protocol server exposing OpenIsle search capabilities" +readme = "README.md" +authors = [{name = "OpenIsle Team"}] +license = {text = "MIT"} +requires-python = ">=3.11" +dependencies = [ + "mcp>=1.19.0", + "httpx>=0.28.0", + "pydantic>=2.12.0", +] + +[project.scripts] +openisle-mcp = "openisle_mcp.server:main" + +[tool.setuptools] +package-dir = {"" = "src"} + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.setuptools.package-data] +openisle_mcp = ["py.typed"] diff --git a/mcp/src/openisle_mcp/__init__.py b/mcp/src/openisle_mcp/__init__.py new file mode 100644 index 000000000..dbbd48b92 --- /dev/null +++ b/mcp/src/openisle_mcp/__init__.py @@ -0,0 +1,10 @@ +"""OpenIsle MCP server package.""" + +from importlib import metadata + +try: + __version__ = metadata.version("openisle-mcp") +except metadata.PackageNotFoundError: # pragma: no cover - best effort during dev + __version__ = "0.0.0" + +__all__ = ["__version__"] diff --git a/mcp/src/openisle_mcp/client.py b/mcp/src/openisle_mcp/client.py new file mode 100644 index 000000000..d98646fea --- /dev/null +++ b/mcp/src/openisle_mcp/client.py @@ -0,0 +1,79 @@ +"""HTTP client for talking to the OpenIsle backend.""" + +from __future__ import annotations + +import json +import logging +from typing import List + +import httpx +from pydantic import ValidationError + +from .models import BackendSearchResult + +__all__ = ["BackendClientError", "OpenIsleBackendClient"] + +logger = logging.getLogger(__name__) + + +class BackendClientError(RuntimeError): + """Raised when the backend cannot fulfil a request.""" + + +class OpenIsleBackendClient: + """Tiny wrapper around the Spring Boot search endpoints.""" + + def __init__(self, base_url: str, timeout: float = 10.0) -> None: + if not base_url: + raise ValueError("base_url must not be empty") + self._base_url = base_url.rstrip("/") + timeout = timeout if timeout > 0 else 10.0 + self._timeout = httpx.Timeout(timeout, connect=timeout, read=timeout) + + @property + def base_url(self) -> str: + return self._base_url + + async def search_global(self, keyword: str) -> List[BackendSearchResult]: + """Call `/api/search/global` and normalise the payload.""" + + url = f"{self._base_url}/api/search/global" + params = {"keyword": keyword} + headers = {"Accept": "application/json"} + logger.debug("Calling OpenIsle backend", extra={"url": url, "params": params}) + + try: + async with httpx.AsyncClient(timeout=self._timeout, headers=headers, follow_redirects=True) as client: + response = await client.get(url, params=params) + response.raise_for_status() + except httpx.HTTPStatusError as exc: # pragma: no cover - network errors are rare in tests + body_preview = _truncate_body(exc.response.text) + raise BackendClientError( + f"Backend returned HTTP {exc.response.status_code}: {body_preview}" + ) from exc + except httpx.RequestError as exc: # pragma: no cover - network errors are rare in tests + raise BackendClientError(f"Failed to reach backend: {exc}") from exc + + try: + payload = response.json() + except json.JSONDecodeError as exc: + raise BackendClientError("Backend returned invalid JSON") from exc + + if not isinstance(payload, list): + raise BackendClientError("Unexpected search payload type; expected a list") + + results: list[BackendSearchResult] = [] + for item in payload: + try: + results.append(BackendSearchResult.model_validate(item)) + except ValidationError as exc: + raise BackendClientError(f"Invalid search result payload: {exc}") from exc + + return results + + +def _truncate_body(body: str, limit: int = 200) -> str: + body = body.strip() + if len(body) <= limit: + return body + return f"{body[:limit]}…" diff --git a/mcp/src/openisle_mcp/models.py b/mcp/src/openisle_mcp/models.py new file mode 100644 index 000000000..756608642 --- /dev/null +++ b/mcp/src/openisle_mcp/models.py @@ -0,0 +1,58 @@ +"""Pydantic models used by the OpenIsle MCP server.""" + +from __future__ import annotations + +from typing import Dict, Optional + +from pydantic import BaseModel, ConfigDict, Field + +__all__ = [ + "BackendSearchResult", + "SearchResult", + "SearchResponse", +] + + +class BackendSearchResult(BaseModel): + """Shape of the payload returned by the OpenIsle backend.""" + + type: str + id: Optional[int] = None + text: Optional[str] = None + sub_text: Optional[str] = Field(default=None, alias="subText") + extra: Optional[str] = None + post_id: Optional[int] = Field(default=None, alias="postId") + highlighted_text: Optional[str] = Field(default=None, alias="highlightedText") + highlighted_sub_text: Optional[str] = Field(default=None, alias="highlightedSubText") + highlighted_extra: Optional[str] = Field(default=None, alias="highlightedExtra") + + model_config = ConfigDict(populate_by_name=True, extra="ignore") + + +class SearchResult(BaseModel): + """Structured search result returned to MCP clients.""" + + type: str = Field(description="Entity type, e.g. post, comment, user") + id: Optional[int] = Field(default=None, description="Primary identifier for the entity") + title: Optional[str] = Field(default=None, description="Primary text to display") + subtitle: Optional[str] = Field(default=None, description="Secondary text (e.g. author or category)") + extra: Optional[str] = Field(default=None, description="Additional descriptive snippet") + post_id: Optional[int] = Field(default=None, description="Associated post id for comment results") + url: Optional[str] = Field(default=None, description="Deep link to the resource inside OpenIsle") + highlights: Dict[str, Optional[str]] = Field( + default_factory=dict, + description="Highlighted HTML fragments keyed by field name", + ) + + model_config = ConfigDict(populate_by_name=True) + + +class SearchResponse(BaseModel): + """Response envelope returned from the MCP search tool.""" + + keyword: str = Field(description="Sanitised keyword that was searched for") + total_results: int = Field(description="Total number of results returned by the backend") + limit: int = Field(description="Maximum number of results included in the response") + results: list[SearchResult] = Field(default_factory=list, description="Search results up to the requested limit") + + model_config = ConfigDict(populate_by_name=True) diff --git a/mcp/src/openisle_mcp/py.typed b/mcp/src/openisle_mcp/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/mcp/src/openisle_mcp/server.py b/mcp/src/openisle_mcp/server.py new file mode 100644 index 000000000..a8f21062d --- /dev/null +++ b/mcp/src/openisle_mcp/server.py @@ -0,0 +1,164 @@ +"""Entry point for the OpenIsle MCP server.""" + +from __future__ import annotations + +import argparse +import logging +import os +from typing import Annotated, Optional + +from mcp.server.fastmcp import Context, FastMCP +from mcp.server.fastmcp import exceptions as mcp_exceptions +from pydantic import Field + +from .client import BackendClientError, OpenIsleBackendClient +from .models import BackendSearchResult, SearchResponse, SearchResult + +logger = logging.getLogger(__name__) + +APP_NAME = "openisle-mcp" +DEFAULT_BACKEND_URL = "http://springboot:8080" +DEFAULT_TRANSPORT = "stdio" +DEFAULT_TIMEOUT = 10.0 +DEFAULT_LIMIT = 20 +MAX_LIMIT = 50 + +server = FastMCP( + APP_NAME, + instructions=( + "Use the `search` tool to query OpenIsle content. " + "Results include posts, comments, users, categories, and tags." + ), +) + + +def _env(name: str, default: Optional[str] = None) -> Optional[str]: + value = os.getenv(name, default) + if value is None: + return None + trimmed = value.strip() + return trimmed or default + + +def _load_timeout() -> float: + raw = _env("OPENISLE_BACKEND_TIMEOUT", str(DEFAULT_TIMEOUT)) + try: + timeout = float(raw) if raw is not None else DEFAULT_TIMEOUT + except ValueError: + logger.warning("Invalid OPENISLE_BACKEND_TIMEOUT value '%s', falling back to %s", raw, DEFAULT_TIMEOUT) + return DEFAULT_TIMEOUT + if timeout <= 0: + logger.warning("Non-positive OPENISLE_BACKEND_TIMEOUT %s, falling back to %s", timeout, DEFAULT_TIMEOUT) + return DEFAULT_TIMEOUT + return timeout + + +_BACKEND_CLIENT = OpenIsleBackendClient( + base_url=_env("OPENISLE_BACKEND_URL", DEFAULT_BACKEND_URL) or DEFAULT_BACKEND_URL, + timeout=_load_timeout(), +) +_PUBLIC_BASE_URL = _env("OPENISLE_PUBLIC_BASE_URL") + + +def _build_url(result: BackendSearchResult) -> Optional[str]: + if not _PUBLIC_BASE_URL: + return None + base = _PUBLIC_BASE_URL.rstrip("/") + if result.type in {"post", "post_title"} and result.id is not None: + return f"{base}/posts/{result.id}" + if result.type == "comment" and result.post_id is not None: + anchor = f"#comment-{result.id}" if result.id is not None else "" + return f"{base}/posts/{result.post_id}{anchor}" + if result.type == "user" and result.id is not None: + return f"{base}/users/{result.id}" + if result.type == "category" and result.id is not None: + return f"{base}/?categoryId={result.id}" + if result.type == "tag" and result.id is not None: + return f"{base}/?tagIds={result.id}" + return None + + +def _to_search_result(result: BackendSearchResult) -> SearchResult: + highlights = { + "text": result.highlighted_text, + "subText": result.highlighted_sub_text, + "extra": result.highlighted_extra, + } + # Remove empty highlight entries to keep the payload clean + highlights = {key: value for key, value in highlights.items() if value} + return SearchResult( + type=result.type, + id=result.id, + title=result.text, + subtitle=result.sub_text, + extra=result.extra, + post_id=result.post_id, + url=_build_url(result), + highlights=highlights, + ) + + +KeywordParam = Annotated[str, Field(description="Keyword to search for", min_length=1)] +LimitParam = Annotated[ + int, + Field(ge=1, le=MAX_LIMIT, description=f"Maximum number of results to return (<= {MAX_LIMIT})"), +] + + +@server.tool(name="search", description="Search OpenIsle content") +async def search(keyword: KeywordParam, limit: LimitParam = DEFAULT_LIMIT, ctx: Optional[Context] = None) -> SearchResponse: + """Run a search query against the OpenIsle backend.""" + + trimmed = keyword.strip() + if not trimmed: + raise mcp_exceptions.ToolError("Keyword must not be empty") + + if ctx is not None: + await ctx.debug(f"Searching OpenIsle for '{trimmed}' (limit={limit})") + + try: + raw_results = await _BACKEND_CLIENT.search_global(trimmed) + except BackendClientError as exc: + if ctx is not None: + await ctx.error(f"Search request failed: {exc}") + raise mcp_exceptions.ToolError(f"Search failed: {exc}") from exc + + results = [_to_search_result(result) for result in raw_results] + limited = results[:limit] + + if ctx is not None: + await ctx.info( + "Search completed", + keyword=trimmed, + total_results=len(results), + returned=len(limited), + ) + + return SearchResponse(keyword=trimmed, total_results=len(results), limit=limit, results=limited) + + +def main() -> None: + parser = argparse.ArgumentParser(description="Run the OpenIsle MCP server") + parser.add_argument( + "--transport", + choices=["stdio", "sse", "streamable-http"], + default=_env("OPENISLE_MCP_TRANSPORT", DEFAULT_TRANSPORT), + help="Transport protocol to use", + ) + parser.add_argument( + "--mount-path", + default=_env("OPENISLE_MCP_SSE_MOUNT_PATH", "/mcp"), + help="Mount path when using the SSE transport", + ) + args = parser.parse_args() + + logging.basicConfig(level=os.getenv("OPENISLE_MCP_LOG_LEVEL", "INFO")) + logger.info( + "Starting OpenIsle MCP server", extra={"transport": args.transport, "backend": _BACKEND_CLIENT.base_url} + ) + + server.run(transport=args.transport, mount_path=args.mount_path) + + +if __name__ == "__main__": + main()