MCP Server¶

FastMCP tool definitions — the interface LLMs interact with. Tools are provider-agnostic; any MCP client can connect via stdio or streamable HTTP.

MCP server exposing SQL indexer tools.

This is the interface LLMs interact with. Tools are provider-agnostic — any MCP client (Claude, Cursor, Continue.dev, etc.) can connect via stdio or streamable HTTP.

Focused entirely on SQL: tables, views, CTEs, column lineage, transforms, WHERE filters, and dependency tracing across dialects.

configure ¶

configure(db_path, repos, sql_dialect=None)

Initialise the graph and indexer with repo configuration.

Parameters:

Name	Type	Description	Default
`db_path`	`str \| Path`	Path to DuckDB file	required
`repos`	`dict`	{repo_name: path_or_config} — value is either a string path or a dict with "path", "dialect", "dialect_overrides" keys	required
`sql_dialect`	`str \| None`	Global fallback SQL dialect (overridden by per-repo config)	`None`

Thread-safety: builds a new immutable _ServerState and swaps it in with a single assignment, so concurrent readers never see partial updates.

Source code in src/sqlprism/core/mcp_tools.py

def configure(db_path: str | Path, repos: dict, sql_dialect: str | None = None):
    """Initialise the graph and indexer with repo configuration.

    Args:
        db_path: Path to DuckDB file
        repos: {repo_name: path_or_config} — value is either a string path
               or a dict with "path", "dialect", "dialect_overrides" keys
        sql_dialect: Global fallback SQL dialect (overridden by per-repo config)

    Thread-safety: builds a new immutable ``_ServerState`` and swaps it in
    with a single assignment, so concurrent readers never see partial updates.
    """
    global _state
    graph = GraphDB(db_path)
    indexer = Indexer(graph)
    config = {
        "db_path": str(db_path),
        "repos": repos,
        "sql_dialect": sql_dialect,
    }

    # Register repos before publishing new state
    for name, cfg in repos.items():
        path = cfg["path"] if isinstance(cfg, dict) else cfg
        repo_type = cfg.get("repo_type", "sql") if isinstance(cfg, dict) else "sql"
        graph.upsert_repo(name, path, repo_type=repo_type)

    # Atomic swap — readers always get a consistent triple
    _state = _ServerState(graph=graph, indexer=indexer, config=config)

search `async` ¶

search(params)

Search for SQL entities by name across the codebase graph.

Finds tables, views, CTEs, and queries by partial name match. Returns matches with name, kind, file path, repo, and line numbers.

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="search",
    annotations=ToolAnnotations(
        readOnlyHint=True,
        destructiveHint=False,
        idempotentHint=True,
        openWorldHint=False,
    ),
)
async def search(params: SearchInput) -> dict:
    """Search for SQL entities by name across the codebase graph.

    Finds tables, views, CTEs, and queries by partial name match.
    Returns matches with name, kind, file path, repo, and line numbers.
    """
    return await asyncio.to_thread(
        _get_graph().query_search,
        pattern=params.pattern,
        kind=params.kind,
        schema=params.sql_schema,
        repo=params.repo,
        limit=params.limit,
        offset=params.offset,
        include_snippets=params.include_snippets,
    )

find_references `async` ¶

find_references(params)

Find everything connected to a named SQL entity.

Returns both inbound (what depends on this) and outbound (what this depends on) relationships. Each result includes: name, kind, relationship type, file path, repo.

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="find_references",
    annotations=ToolAnnotations(
        readOnlyHint=True,
        destructiveHint=False,
        idempotentHint=True,
        openWorldHint=False,
    ),
)
async def find_references(params: FindReferencesInput) -> dict:
    """Find everything connected to a named SQL entity.

    Returns both inbound (what depends on this) and outbound (what this depends on)
    relationships. Each result includes: name, kind, relationship type, file path, repo.
    """
    return await asyncio.to_thread(
        _get_graph().query_references,
        name=params.name,
        kind=params.kind,
        schema=params.sql_schema,
        repo=params.repo,
        direction=params.direction,
        include_snippets=params.include_snippets,
        limit=params.limit,
        offset=params.offset,
    )

find_column_usage `async` ¶

find_column_usage(params)

Find where and how columns are used across SQL models.

Powered by sqlglot's column lineage analysis. Shows usage type, transforms (CAST, COALESCE, etc.), output aliases, and WHERE conditions.

Answers: "where is customer_id used in WHERE clauses?", "how is animal.breed_id transformed?", "show all column usage on orders."

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="find_column_usage",
    annotations=ToolAnnotations(
        readOnlyHint=True,
        destructiveHint=False,
        idempotentHint=True,
        openWorldHint=False,
    ),
)
async def find_column_usage(params: FindColumnUsageInput) -> dict:
    """Find where and how columns are used across SQL models.

    Powered by sqlglot's column lineage analysis. Shows usage type,
    transforms (CAST, COALESCE, etc.), output aliases, and WHERE conditions.

    Answers: "where is customer_id used in WHERE clauses?",
    "how is animal.breed_id transformed?", "show all column usage on orders."
    """
    return await asyncio.to_thread(
        _get_graph().query_column_usage,
        table=params.table,
        column=params.column,
        usage_type=params.usage_type,
        repo=params.repo,
        limit=params.limit,
        offset=params.offset,
    )

trace_dependencies `async` ¶

trace_dependencies(params)

Trace multi-hop dependency chains through the SQL graph.

Follows table → view → CTE → query chains. Use for impact analysis: "if I change this table, what models break?"

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="trace_dependencies",
    annotations=ToolAnnotations(
        readOnlyHint=True,
        destructiveHint=False,
        idempotentHint=True,
        openWorldHint=False,
    ),
)
async def trace_dependencies(params: TraceDependenciesInput) -> dict:
    """Trace multi-hop dependency chains through the SQL graph.

    Follows table → view → CTE → query chains. Use for impact analysis:
    "if I change this table, what models break?"
    """
    return await asyncio.to_thread(
        _get_graph().query_trace,
        name=params.name,
        kind=params.kind,
        direction=params.direction,
        max_depth=params.max_depth,
        repo=params.repo,
        include_snippets=params.include_snippets,
        limit=params.limit,
    )

trace_column_lineage `async` ¶

trace_column_lineage(params)

Trace end-to-end column lineage through CTEs and subqueries.

Shows how an output column traces back to source table columns, with each intermediate hop (CTE, subquery) and any transforms (CAST, etc.).

Answers: "where does dim_users.created_date come from?", "which output columns depend on orders.amount?"

Note: SELECT * lineage requires a schema catalog built from prior column usage data. On a fresh index, SELECT * columns may not be expanded. Run a second full reindex to populate the catalog and resolve them.

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="trace_column_lineage",
    annotations=ToolAnnotations(
        readOnlyHint=True,
        destructiveHint=False,
        idempotentHint=True,
        openWorldHint=False,
    ),
)
async def trace_column_lineage(params: TraceColumnLineageInput) -> dict:
    """Trace end-to-end column lineage through CTEs and subqueries.

    Shows how an output column traces back to source table columns, with
    each intermediate hop (CTE, subquery) and any transforms (CAST, etc.).

    Answers: "where does dim_users.created_date come from?",
    "which output columns depend on orders.amount?"

    Note: SELECT * lineage requires a schema catalog built from prior column
    usage data. On a fresh index, SELECT * columns may not be expanded.
    Run a second full reindex to populate the catalog and resolve them.
    """
    return await asyncio.to_thread(
        _get_graph().query_column_lineage,
        table=params.table,
        column=params.column,
        output_node=params.output_node,
        repo=params.repo,
        limit=params.limit,
        offset=params.offset,
    )

get_schema `async` ¶

get_schema(params)

Get the schema of a table or model — columns, types, descriptions, and dependencies.

Returns column definitions (name, type, position, source, description), upstream dependencies (what this model reads from), and downstream dependencies (what reads from this model). The primary tool for understanding table structure.

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="get_schema",
    annotations=ToolAnnotations(
        readOnlyHint=True,
        destructiveHint=False,
        idempotentHint=True,
        openWorldHint=False,
    ),
)
async def get_schema(params: GetSchemaInput) -> dict:
    """Get the schema of a table or model — columns, types, descriptions, and dependencies.

    Returns column definitions (name, type, position, source, description),
    upstream dependencies (what this model reads from), and downstream
    dependencies (what reads from this model). The primary tool for
    understanding table structure.
    """
    return await asyncio.to_thread(
        _get_graph().query_schema,
        name=params.name,
        repo=params.repo,
    )

get_context `async` ¶

get_context(params)

Get comprehensive context for a model — the first tool to call when working with a model.

Returns a complete context dump including: - Model metadata (name, kind, file, repo) - Column definitions with types and descriptions - Upstream and downstream dependencies - Column usage summary (most used columns, join keys, aggregations) - Source code snippet (first 30 lines) - Graph metrics (PageRank importance) when DuckPGQ is available

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="get_context",
    annotations=ToolAnnotations(
        readOnlyHint=True,
        destructiveHint=False,
        idempotentHint=True,
        openWorldHint=False,
    ),
)
async def get_context(params: GetContextInput) -> dict:
    """Get comprehensive context for a model — the first tool to call when working with a model.

    Returns a complete context dump including:
    - Model metadata (name, kind, file, repo)
    - Column definitions with types and descriptions
    - Upstream and downstream dependencies
    - Column usage summary (most used columns, join keys, aggregations)
    - Source code snippet (first 30 lines)
    - Graph metrics (PageRank importance) when DuckPGQ is available
    """
    return await asyncio.to_thread(
        _get_graph().query_context,
        name=params.name,
        repo=params.repo,
    )

find_path `async` ¶

find_path(params)

Find the shortest dependency path between two models.

Uses DuckPGQ graph traversal to find the shortest chain of dependencies connecting two models. Returns the full path with intermediate models and path length.

Requires DuckPGQ extension. Returns an error if not installed.

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="find_path",
    annotations=ToolAnnotations(
        readOnlyHint=True,
        destructiveHint=False,
        idempotentHint=True,
        openWorldHint=False,
    ),
)
async def find_path(params: FindPathInput) -> dict:
    """Find the shortest dependency path between two models.

    Uses DuckPGQ graph traversal to find the shortest chain of
    dependencies connecting two models. Returns the full path
    with intermediate models and path length.

    Requires DuckPGQ extension. Returns an error if not installed.
    """
    return await asyncio.to_thread(
        _get_graph().query_find_path,
        from_model=params.from_model,
        to_model=params.to_model,
        max_hops=params.max_hops,
    )

find_critical_models `async` ¶

find_critical_models(params)

Find the most critical models by importance (PageRank) and downstream impact.

Ranks models by their graph centrality — models with high PageRank are referenced by many important models. Use to identify high-impact models that need extra care when modifying.

Requires DuckPGQ extension.

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="find_critical_models",
    annotations=ToolAnnotations(
        readOnlyHint=True,
        destructiveHint=False,
        idempotentHint=True,
        openWorldHint=False,
    ),
)
async def find_critical_models(params: FindCriticalModelsInput) -> dict:
    """Find the most critical models by importance (PageRank) and downstream impact.

    Ranks models by their graph centrality — models with high PageRank are
    referenced by many important models. Use to identify high-impact models
    that need extra care when modifying.

    Requires DuckPGQ extension.
    """
    return await asyncio.to_thread(
        _get_graph().query_find_critical_models,
        top_n=params.top_n,
        repo=params.repo,
    )

detect_cycles `async` ¶

detect_cycles(params)

Detect circular dependencies in the SQL dependency graph.

Finds cycles where models form dependency loops (A -> B -> C -> A). Uses recursive CTE traversal — no DuckPGQ extension required.

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="detect_cycles",
    annotations=ToolAnnotations(
        readOnlyHint=True,
        destructiveHint=False,
        idempotentHint=True,
        openWorldHint=False,
    ),
)
async def detect_cycles(params: DetectCyclesInput) -> dict:
    """Detect circular dependencies in the SQL dependency graph.

    Finds cycles where models form dependency loops (A -> B -> C -> A).
    Uses recursive CTE traversal — no DuckPGQ extension required.
    """
    return await asyncio.to_thread(
        _get_graph().query_detect_cycles,
        repo=params.repo,
        max_cycle_length=params.max_cycle_length,
    )

find_subgraphs `async` ¶

find_subgraphs(params)

Identify weakly connected components (subgraphs) in the dependency graph.

Reveals isolated model clusters, orphaned models, and overall graph topology. Requires DuckPGQ extension.

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="find_subgraphs",
    annotations=ToolAnnotations(
        readOnlyHint=True,
        destructiveHint=False,
        idempotentHint=True,
        openWorldHint=False,
    ),
)
async def find_subgraphs(params: FindSubgraphsInput) -> dict:
    """Identify weakly connected components (subgraphs) in the dependency graph.

    Reveals isolated model clusters, orphaned models, and overall graph topology.
    Requires DuckPGQ extension.
    """
    return await asyncio.to_thread(
        _get_graph().query_find_subgraphs,
        repo=params.repo,
    )

find_bottlenecks `async` ¶

find_bottlenecks(params)

Find bottleneck models with high fan-in/out that are single points of failure.

Combines edge counting (plain SQL) with optional DuckPGQ clustering coefficient. Models with high downstream count and low clustering are flagged as high risk.

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="find_bottlenecks",
    annotations=ToolAnnotations(
        readOnlyHint=True,
        destructiveHint=False,
        idempotentHint=True,
        openWorldHint=False,
    ),
)
async def find_bottlenecks(params: FindBottlenecksInput) -> dict:
    """Find bottleneck models with high fan-in/out that are single points of failure.

    Combines edge counting (plain SQL) with optional DuckPGQ clustering coefficient.
    Models with high downstream count and low clustering are flagged as high risk.
    """
    return await asyncio.to_thread(
        _get_graph().query_find_bottlenecks,
        min_downstream=params.min_downstream,
        repo=params.repo,
    )

check_impact `async` ¶

check_impact(params)

Check the downstream impact of proposed column changes BEFORE modifying code.

Analyzes column usage across downstream models to classify each change as: - breaking: SELECT/JOIN usage — downstream model will error - warning: WHERE/GROUP BY usage — filter breaks but model may not error - safe: column not referenced downstream

Call this BEFORE removing, renaming, or adding columns to understand the blast radius.

Note: add_column does not detect SELECT * usage — downstream models using wildcard selects may still be affected by new columns.

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="check_impact",
    annotations=ToolAnnotations(
        readOnlyHint=True,
        destructiveHint=False,
        idempotentHint=True,
        openWorldHint=False,
    ),
)
async def check_impact(params: CheckImpactInput) -> dict:
    """Check the downstream impact of proposed column changes BEFORE modifying code.

    Analyzes column usage across downstream models to classify each change as:
    - **breaking**: SELECT/JOIN usage — downstream model will error
    - **warning**: WHERE/GROUP BY usage — filter breaks but model may not error
    - **safe**: column not referenced downstream

    Call this BEFORE removing, renaming, or adding columns to understand the blast radius.

    Note: ``add_column`` does not detect ``SELECT *`` usage — downstream models
    using wildcard selects may still be affected by new columns.
    """
    return await asyncio.to_thread(
        _get_graph().query_check_impact,
        model=params.model,
        changes=[c.model_dump() for c in params.changes],
        repo=params.repo,
    )

pr_impact `async` ¶

pr_impact(params)

Analyse the structural impact of SQL changes since a base commit.

Computes structural diff (added/removed/modified tables, views, CTEs, column usage) then traces the blast radius through the full index.

Delta mode caveat: compare_mode="delta" shows net-new downstream impact by approximating the base blast radius via edge exclusion on the HEAD graph. It does not detect reduced blast radius from removed edges — no_longer_affected will be empty when a PR only removes dependencies. Use compare_mode="absolute" for a full picture when edge removals are the primary change.

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="pr_impact",
    annotations=ToolAnnotations(
        readOnlyHint=True,
        destructiveHint=False,
        idempotentHint=False,
        openWorldHint=False,
    ),
)
async def pr_impact(params: PrImpactInput) -> dict:
    """Analyse the structural impact of SQL changes since a base commit.

    Computes structural diff (added/removed/modified tables, views, CTEs,
    column usage) then traces the blast radius through the full index.

    **Delta mode caveat:** ``compare_mode="delta"`` shows **net-new downstream
    impact** by approximating the base blast radius via edge exclusion on the
    HEAD graph.  It does **not** detect reduced blast radius from removed edges
    — ``no_longer_affected`` will be empty when a PR only removes dependencies.
    Use ``compare_mode="absolute"`` for a full picture when edge removals are
    the primary change.
    """
    state = _get_state()
    indexer = state.indexer
    graph = state.graph
    config = state.config

    # Determine which repo
    if params.repo:
        path, dialect, _dialect_overrides = _resolve_repo_config(params.repo)
        repo_path = Path(path)
    elif len(config["repos"]) == 1:
        repo_name = next(iter(config["repos"].keys()))
        path, dialect, _dialect_overrides = _resolve_repo_config(repo_name)
        repo_path = Path(path)
    else:
        return {"error": "Multiple repos configured — specify which repo to analyse."}

    def _blocking_pr_impact() -> dict:
        changed_files = indexer.get_changed_files(repo_path, params.base_commit)
        if not changed_files:
            return {"files_changed": [], "structural_diff": {}, "blast_radius": {}}

        old_results: dict[str, ParseResult] = {}
        new_results: dict[str, ParseResult] = {}

        for file_path in changed_files:
            full_path = repo_path / file_path
            if full_path.exists() and is_sql_file(file_path):
                content = full_path.read_text(errors="replace")
                new_results[file_path] = indexer.parse_file(file_path, content, dialect)

            old = indexer.parse_file_at_commit(repo_path, file_path, params.base_commit, dialect)
            if old:
                old_results[file_path] = old

        diff = _compute_structural_diff(old_results, new_results)

        affected_node_names = (
            [n["name"] for n in diff["nodes_added"]]
            + [n["name"] for n in diff["nodes_removed"]]
            + [n["name"] for n in diff["nodes_modified"]]
        )

        # Names of truly new nodes (no base trace needed for these)
        added_names = {n["name"] for n in diff["nodes_added"]}

        # Build exclude set: edges added in HEAD that did not exist at base
        edges_added_set: set[tuple[str, str]] = {(e["source"], e["target"]) for e in diff.get("edges_added", [])}

        is_delta = params.compare_mode == "delta"

        blast_radius: dict = {}
        if affected_node_names:
            head_affected: set[tuple[str, str]] = set()
            base_affected: set[tuple[str, str]] = set()
            all_head_paths: list[dict] = []  # flat list for repo counting
            repos_hit: set[str] = set()
            truncated = len(affected_node_names) > 20

            affected_node_names.sort()
            for node_name in affected_node_names[:20]:
                # HEAD blast radius (current graph)
                head_trace = graph.query_trace(
                    name=node_name,
                    direction="downstream",
                    max_depth=params.max_blast_radius_depth,
                )
                head_paths = head_trace.get("paths", [])
                head_affected.update((p["name"], p["kind"]) for p in head_paths)
                all_head_paths.extend(head_paths)
                repos_hit.update(head_trace.get("repos_affected", []))

                # Base blast radius approximation (exclude new edges)
                if is_delta and node_name not in added_names:
                    base_trace = graph.query_trace(
                        name=node_name,
                        direction="downstream",
                        max_depth=params.max_blast_radius_depth,
                        exclude_edges=edges_added_set,
                    )
                    base_affected.update((p["name"], p["kind"]) for p in base_trace.get("paths", []))

            if is_delta:
                newly_affected = head_affected - base_affected
                no_longer_affected = base_affected - head_affected

                blast_radius = {
                    "compare_mode": "delta",
                    "head_total": len(head_affected),
                    "base_total": len(base_affected),
                    "delta": len(head_affected) - len(base_affected),
                    "newly_affected": [{"name": n, "kind": k} for n, k in sorted(newly_affected)],
                    "no_longer_affected": [{"name": n, "kind": k} for n, k in sorted(no_longer_affected)],
                    "unchanged_affected": len(head_affected & base_affected),
                    "note": (
                        "Delta mode approximates the base blast radius by "
                        "excluding newly-added edges from the HEAD graph. "
                        "It shows net-new downstream impact but does not "
                        "detect reduced blast radius from removed edges."
                    ),
                    # Backward-compat fields
                    "transitively_affected": len(head_affected),
                    "repos_affected": sorted(repos_hit),
                    "truncated": truncated,
                    "total_affected_nodes": len(affected_node_names),
                }
            else:
                # Absolute mode (v1 behavior)
                blast_radius = {
                    "compare_mode": "absolute",
                    "transitively_affected": len(all_head_paths),
                    "affected_by_repo": {r: sum(1 for a in all_head_paths if a.get("repo") == r) for r in repos_hit},
                    "repos_affected": sorted(repos_hit),
                    "truncated": truncated,
                    "total_affected_nodes": len(affected_node_names),
                }

            if truncated:
                blast_radius["truncation_message"] = (
                    f"Blast radius incomplete — {len(affected_node_names)} affected nodes, "
                    "only first 20 traced. Use trace_dependencies "
                    "on specific nodes for full picture."
                )

        return {
            "files_changed": changed_files,
            "structural_diff": diff,
            "blast_radius": blast_radius,
        }

    return await asyncio.to_thread(_blocking_pr_impact)

get_conventions `async` ¶

get_conventions(params)

Get naming conventions, reference rules, and required columns for a layer.

Returns inferred conventions with confidence scores. Agents should follow high-confidence conventions (>0.9) and ask about low-confidence ones (<0.7).

Use this before writing new models to understand project patterns: naming conventions, allowed layer references, required columns, and column naming style.

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="get_conventions",
    annotations=ToolAnnotations(
        readOnlyHint=True,
        destructiveHint=False,
        idempotentHint=True,
        openWorldHint=False,
    ),
)
async def get_conventions(params: GetConventionsInput) -> dict:
    """Get naming conventions, reference rules, and required columns for a layer.

    Returns inferred conventions with confidence scores. Agents should follow
    high-confidence conventions (>0.9) and ask about low-confidence ones (<0.7).

    Use this before writing new models to understand project patterns:
    naming conventions, allowed layer references, required columns, and
    column naming style.
    """
    return await asyncio.to_thread(
        _get_graph().query_conventions,
        layer=params.layer,
        repo=params.repo,
    )

search_by_tag `async` ¶

search_by_tag(params)

Find models tagged with a business domain concept, ranked by confidence.

Returns models whose semantic tags match the given tag name, ordered by confidence score (highest first). Use list_tags first to discover the available tags in the project's business domain vocabulary.

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="search_by_tag",
    annotations=ToolAnnotations(
        readOnlyHint=True,
        destructiveHint=False,
        idempotentHint=True,
        openWorldHint=False,
    ),
)
async def search_by_tag(params: SearchByTagInput) -> dict:
    """Find models tagged with a business domain concept, ranked by confidence.

    Returns models whose semantic tags match the given tag name, ordered by
    confidence score (highest first). Use list_tags first to discover the
    available tags in the project's business domain vocabulary.
    """
    return await asyncio.to_thread(
        _get_graph().query_search_by_tag,
        tag=params.tag,
        repo=params.repo,
        min_confidence=params.min_confidence,
    )

list_tags `async` ¶

list_tags(params)

Return all semantic tags with model counts and average confidence.

Provides the project's business domain vocabulary — the set of conceptual tags that have been assigned to models. Use this to discover available tags before calling search_by_tag.

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="list_tags",
    annotations=ToolAnnotations(
        readOnlyHint=True,
        destructiveHint=False,
        idempotentHint=True,
        openWorldHint=False,
    ),
)
async def list_tags(params: ListTagsInput) -> dict:
    """Return all semantic tags with model counts and average confidence.

    Provides the project's business domain vocabulary — the set of conceptual
    tags that have been assigned to models. Use this to discover available tags
    before calling search_by_tag.
    """
    return await asyncio.to_thread(
        _get_graph().query_list_tags,
        repo=params.repo,
    )

find_similar_models `async` ¶

find_similar_models(params)

Find existing models similar to what you're building.

Compares reference overlap, column overlap, and layer placement to find models that already do something similar. Helps avoid duplicate work and suggests models to extend rather than recreate.

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="find_similar_models",
    annotations=ToolAnnotations(
        readOnlyHint=True,
        destructiveHint=False,
        idempotentHint=True,
        openWorldHint=False,
    ),
)
async def find_similar_models(params: FindSimilarModelsInput) -> dict:
    """Find existing models similar to what you're building.

    Compares reference overlap, column overlap, and layer placement to find
    models that already do something similar. Helps avoid duplicate work and
    suggests models to extend rather than recreate.
    """
    return await asyncio.to_thread(
        _get_graph().query_find_similar_models,
        references=params.references,
        output_columns=params.output_columns,
        model=params.model,
        limit=params.limit,
        repo=params.repo,
    )

suggest_placement `async` ¶

suggest_placement(params)

Suggest where to place a new model based on its references.

Uses inferred layer flow rules and naming conventions to recommend the right layer, directory, and model name. Returns similar existing models to help avoid duplicate work.

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="suggest_placement",
    annotations=ToolAnnotations(
        readOnlyHint=True,
        destructiveHint=False,
        idempotentHint=True,
        openWorldHint=False,
    ),
)
async def suggest_placement(params: SuggestPlacementInput) -> dict:
    """Suggest where to place a new model based on its references.

    Uses inferred layer flow rules and naming conventions to recommend the
    right layer, directory, and model name. Returns similar existing models
    to help avoid duplicate work.
    """
    return await asyncio.to_thread(
        _get_graph().query_suggest_placement,
        references=params.references,
        name=params.name,
        repo=params.repo,
    )

reindex `async` ¶

reindex(params)

Trigger a reindex of SQL files. Checksums and re-parses only what changed.

Runs in the background so queries remain available during reindex. Supports per-repo SQL dialects and path-based dialect overrides.

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="reindex",
    annotations=ToolAnnotations(
        readOnlyHint=False,
        destructiveHint=False,
        idempotentHint=True,
        openWorldHint=False,
    ),
)
async def reindex(params: ReindexInput) -> dict:
    """Trigger a reindex of SQL files. Checksums and re-parses only what changed.

    Runs in the background so queries remain available during reindex.
    Supports per-repo SQL dialects and path-based dialect overrides.
    """
    global _reindex_task, _reindex_status

    async with _reindex_lock:
        # If already running, return status
        if _reindex_task and not _reindex_task.done():
            return {"status": "in_progress", **_reindex_status}

        state = _get_state()
        indexer = state.indexer

        repos = state.config["repos"]
        if params.repo:
            if params.repo not in repos:
                return {"error": f"Repo '{params.repo}' not found in config"}
            repos = {params.repo: repos[params.repo]}

        repo_names = list(repos.keys())
        _reindex_status = {
            "state": "started",
            "started_at": datetime.now().isoformat(),
            "repos": repo_names,
        }

        async def _background_reindex():
            global _reindex_status
            try:

                def _blocking():
                    global _reindex_status
                    results = {}
                    for name, _cfg in repos.items():
                        _reindex_status = {**_reindex_status, "current_repo": name}
                        path, dialect, dialect_overrides = _resolve_repo_config(name)
                        results[name] = indexer.reindex_repo(
                            name,
                            path,
                            dialect=dialect,
                            dialect_overrides=dialect_overrides,
                        )
                    return results

                result = await asyncio.to_thread(_blocking)
                global _last_parse_errors
                all_errors = []
                for repo_result in result.values():
                    all_errors.extend(repo_result.get("parse_errors", []))
                _last_parse_errors = all_errors
                _reindex_status = {
                    **_reindex_status,
                    "state": "completed",
                    "completed_at": datetime.now().isoformat(),
                    "result": result,
                }
                return result
            except Exception as e:
                _reindex_status = {
                    **_reindex_status,
                    "state": "failed",
                    "error": str(e),
                    "failed_at": datetime.now().isoformat(),
                }

        _reindex_task = asyncio.create_task(_background_reindex())

    return {
        "status": "started",
        "message": ("Reindex running in background. Queries remain available. Call index_status to check progress."),
        "repos": repo_names,
    }

reindex_sqlmesh `async` ¶

reindex_sqlmesh(params)

Index a sqlmesh project by rendering all models into clean SQL.

Runs in the background so queries remain available during reindex. Uses sqlmesh's rendering engine to expand macros and resolve variables, then parses with sqlglot to extract tables, CTEs, edges, column lineage.

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="reindex_sqlmesh",
    annotations=ToolAnnotations(
        readOnlyHint=False,
        destructiveHint=False,
        idempotentHint=True,
        openWorldHint=False,
    ),
)
async def reindex_sqlmesh(params: ReindexSqlmeshInput) -> dict:
    """Index a sqlmesh project by rendering all models into clean SQL.

    Runs in the background so queries remain available during reindex.
    Uses sqlmesh's rendering engine to expand macros and resolve variables,
    then parses with sqlglot to extract tables, CTEs, edges, column lineage.
    """
    global _reindex_task, _reindex_status

    async with _reindex_lock:
        # If already running, return status
        if _reindex_task and not _reindex_task.done():
            return {"status": "in_progress", **_reindex_status}

        indexer = _get_indexer()

        var_dict: dict[str, str | int] = {}
        if params.variables:
            for k, v in params.variables.items():
                try:
                    var_dict[k] = int(v)
                except ValueError:
                    var_dict[k] = v

        _reindex_status = {
            "state": "started",
            "started_at": datetime.now().isoformat(),
            "repos": [params.name],
            "tool": "reindex_sqlmesh",
        }

        async def _background_reindex():
            global _reindex_status
            try:
                result = await asyncio.to_thread(
                    indexer.reindex_sqlmesh,
                    repo_name=params.name,
                    project_path=params.project_path,
                    env_file=params.env_file,
                    variables=var_dict,
                    dialect=params.dialect,
                    sqlmesh_command=params.sqlmesh_command,
                )
                global _last_parse_errors
                if isinstance(result, dict):
                    _last_parse_errors = result.get("parse_errors", [])
                _reindex_status = {
                    **_reindex_status,
                    "state": "completed",
                    "completed_at": datetime.now().isoformat(),
                    "result": result,
                }
                return result
            except Exception as e:
                _reindex_status = {
                    **_reindex_status,
                    "state": "failed",
                    "error": str(e),
                    "failed_at": datetime.now().isoformat(),
                }

        _reindex_task = asyncio.create_task(_background_reindex())

    return {
        "status": "started",
        "message": (
            "SQLMesh reindex running in background. Queries remain available. Call index_status to check progress."
        ),
        "repos": [params.name],
    }

reindex_dbt `async` ¶

reindex_dbt(params)

Index a dbt project by compiling all models into clean SQL.

Runs in the background so queries remain available during reindex. Runs dbt compile, then parses with sqlglot to extract tables, CTEs, edges, column lineage with transforms.

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="reindex_dbt",
    annotations=ToolAnnotations(
        readOnlyHint=False,
        destructiveHint=False,
        idempotentHint=True,
        openWorldHint=False,
    ),
)
async def reindex_dbt(params: ReindexDbtInput) -> dict:
    """Index a dbt project by compiling all models into clean SQL.

    Runs in the background so queries remain available during reindex.
    Runs `dbt compile`, then parses with sqlglot to extract tables, CTEs,
    edges, column lineage with transforms.
    """
    global _reindex_task, _reindex_status

    async with _reindex_lock:
        # If already running, return status
        if _reindex_task and not _reindex_task.done():
            return {"status": "in_progress", **_reindex_status}

        indexer = _get_indexer()

        _reindex_status = {
            "state": "started",
            "started_at": datetime.now().isoformat(),
            "repos": [params.name],
            "tool": "reindex_dbt",
        }

        async def _background_reindex():
            global _reindex_status
            try:
                result = await asyncio.to_thread(
                    indexer.reindex_dbt,
                    repo_name=params.name,
                    project_path=params.project_path,
                    profiles_dir=params.profiles_dir,
                    env_file=params.env_file,
                    target=params.target,
                    dbt_command=params.dbt_command,
                    dialect=params.dialect,
                )
                global _last_parse_errors
                if isinstance(result, dict):
                    _last_parse_errors = result.get("parse_errors", [])
                _reindex_status = {
                    **_reindex_status,
                    "state": "completed",
                    "completed_at": datetime.now().isoformat(),
                    "result": result,
                }
                return result
            except Exception as e:
                _reindex_status = {
                    **_reindex_status,
                    "state": "failed",
                    "error": str(e),
                    "failed_at": datetime.now().isoformat(),
                }

        _reindex_task = asyncio.create_task(_background_reindex())

    return {
        "status": "started",
        "message": (
            "dbt reindex running in background. Queries remain available. Call index_status to check progress."
        ),
        "repos": [params.name],
    }

reindex_files `async` ¶

reindex_files(params)

Reindex specific files after save. Non-blocking.

Fast path for on-save reindex. Accepts absolute file paths, resolves to repos, and reindexes only the affected models.

Plain SQL files: reindexed in ~50ms
dbt/sqlmesh models: compiled + reindexed in ~2-5s

Multiple rapid calls are debounced per repo. Returns immediately; reindex runs in background.

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="reindex_files",
    annotations=ToolAnnotations(
        readOnlyHint=False,
        destructiveHint=False,
        idempotentHint=False,
        openWorldHint=False,
    ),
)
async def reindex_files(params: ReindexFilesInput) -> dict:
    """Reindex specific files after save. Non-blocking.

    Fast path for on-save reindex. Accepts absolute file paths,
    resolves to repos, and reindexes only the affected models.

    - Plain SQL files: reindexed in ~50ms
    - dbt/sqlmesh models: compiled + reindexed in ~2-5s

    Multiple rapid calls are debounced per repo. Returns immediately;
    reindex runs in background.
    """
    state = _state
    if not state:
        return {"error": "Server not configured. Call configure() first."}

    sql_files = [p for p in params.paths if is_sql_file(p)]
    if not sql_files:
        return {"accepted": 0, "skipped": len(params.paths), "reason": "No SQL files in paths"}

    # Resolve files to repos and group by (repo_name, repo_type)
    all_repos = state.indexer.graph.get_all_repos()
    enqueued = 0
    skipped = 0
    grouped: dict[tuple[str, str], list[str]] = defaultdict(list)
    for path in sql_files:
        resolved = state.indexer._resolve_file_repo(Path(path).resolve(), all_repos)
        if resolved:
            _repo_id, repo_name, _repo_path, repo_type = resolved
            grouped[(repo_name, repo_type)].append(path)
            enqueued += 1
        else:
            skipped += 1

    for (repo_name, repo_type), paths in grouped.items():
        await _enqueue_reindex(repo_name, repo_type, paths)

    non_sql_skipped = len(params.paths) - len(sql_files)
    result: dict = {
        "accepted": enqueued,
        "skipped": skipped + non_sql_skipped,
        "queued_at": datetime.now().isoformat(),
    }
    if enqueued > 0:
        result["note"] = "Reindex queued. Check index_status for progress."
    else:
        result["reason"] = "No SQL files matched a configured repo"
    return result

index_status `async` ¶

index_status()

Current state of the index — repos, file counts, last commit, staleness.

Source code in src/sqlprism/core/mcp_tools.py

@mcp.tool(
    name="index_status",
    annotations=ToolAnnotations(
        readOnlyHint=True,
        destructiveHint=False,
        idempotentHint=True,
        openWorldHint=False,
    ),
)
async def index_status() -> dict:
    """Current state of the index — repos, file counts, last commit, staleness."""
    status = await asyncio.to_thread(_get_graph().get_index_status)
    if _reindex_task and not _reindex_task.done():
        status["reindex_in_progress"] = True
        status["reindex_status"] = _reindex_status
    elif _reindex_status.get("state") in ("completed", "failed"):
        status["last_reindex"] = _reindex_status
    status["parse_error_count"] = len(_last_parse_errors)
    if _last_parse_errors:
        status["last_parse_errors"] = _last_parse_errors[:50]  # cap at 50
    return status

MCP Server¶

configure ¶

search async ¶

find_references async ¶

find_column_usage async ¶

trace_dependencies async ¶

trace_column_lineage async ¶

get_schema async ¶

get_context async ¶

find_path async ¶

find_critical_models async ¶

detect_cycles async ¶

find_subgraphs async ¶

find_bottlenecks async ¶

check_impact async ¶

pr_impact async ¶

get_conventions async ¶

search_by_tag async ¶

list_tags async ¶

find_similar_models async ¶

suggest_placement async ¶

reindex async ¶

reindex_sqlmesh async ¶

reindex_dbt async ¶

reindex_files async ¶

index_status async ¶

search `async` ¶

find_references `async` ¶

find_column_usage `async` ¶

trace_dependencies `async` ¶

trace_column_lineage `async` ¶

get_schema `async` ¶

get_context `async` ¶

find_path `async` ¶

find_critical_models `async` ¶

detect_cycles `async` ¶

find_subgraphs `async` ¶

find_bottlenecks `async` ¶

check_impact `async` ¶

pr_impact `async` ¶

get_conventions `async` ¶

search_by_tag `async` ¶

list_tags `async` ¶

find_similar_models `async` ¶

suggest_placement `async` ¶

reindex `async` ¶

reindex_sqlmesh `async` ¶

reindex_dbt `async` ¶

reindex_files `async` ¶

index_status `async` ¶