diff --git a/agent/discovery/validator.py b/agent/discovery/validator.py
index e794016..11516a5 100644
--- a/agent/discovery/validator.py
+++ b/agent/discovery/validator.py
@@ -119,9 +119,14 @@ def validate_analysis(
 ) -> List[str]:
     """Validate post-analysis output.
 
+    Citations now live in sidecar ``{component}.citations.json`` files
+    produced by the citation extractor (the ``## Citations`` block is
+    stripped from the Markdown after extraction to avoid duplication).
+
     Checks:
     - Every component has a corresponding analysis .md file
-    - Analysis files contain a ## Citations section with valid JSON
+    - Every component has a corresponding .citations.json file with a
+      non-empty citations array
     - Cited files exist in the repository (spot check)
     - Cited line ranges are plausible (start <= end, within file length)
     """
@@ -134,65 +139,69 @@ def validate_analysis(
             errors.append(f"Missing analysis file for component '{comp.name}'")
             continue
 
-        # Spot-check citations
-        citation_errors = _validate_citations_in_file(
-            analysis_file, comp.name, repo_root
+        # Spot-check citations from the sidecar JSON
+        citations_file = analyses_dir / f"{comp.name}.citations.json"
+        citation_errors = _validate_citations_sidecar(
+            citations_file, comp.name, repo_root
         )
         errors.extend(citation_errors)
 
     return errors
 
 
-def _validate_citations_in_file(
-    md_path: Path,
+def _validate_citations_sidecar(
+    sidecar_path: Path,
     component_name: str,
     repo_root: Path,
     max_spot_checks: int = 5,
 ) -> List[str]:
-    """Validate citations embedded in a Markdown analysis file.
+    """Validate citations from the ``{component}.citations.json`` sidecar.
 
     Performs lightweight checks:
-    - ## Citations section exists
+    - Sidecar file exists
     - JSON is parseable
-    - Spot-checks up to max_spot_checks citations for file existence and line plausibility.
+    - Contains a non-empty ``citations`` array
+    - Spot-checks up to max_spot_checks citations for file existence and
+      line plausibility.
     """
     import json
     import re
 
     errors: List[str] = []
-    text = md_path.read_text(encoding="utf-8")
 
-    # Look for ## Citations + ```json block
-    pattern = re.compile(
-        r"^## Citations\b.*?```json\s*\n(.*?)\n\s*```",
-        re.MULTILINE | re.DOTALL,
-    )
-    match = pattern.search(text)
-    if not match:
+    if not sidecar_path.exists():
         errors.append(
-            f"[{component_name}] Missing ## Citations section with JSON block"
+            f"[{component_name}] Missing sidecar file: {sidecar_path.name}"
         )
         return errors
 
-    json_text = match.group(1).strip()
-    if not json_text:
-        errors.append(f"[{component_name}] ## Citations JSON block is empty")
-        return errors
-
     try:
-        data = json.loads(json_text)
+        payload = json.loads(sidecar_path.read_text(encoding="utf-8"))
     except json.JSONDecodeError as exc:
-        errors.append(f"[{component_name}] Invalid JSON in ## Citations: {exc}")
+        errors.append(
+            f"[{component_name}] Invalid JSON in {sidecar_path.name}: {exc}"
+        )
         return errors
 
+    if not isinstance(payload, dict):
+        errors.append(
+            f"[{component_name}] {sidecar_path.name} must be a JSON object, "
+            f"got {type(payload).__name__}"
+        )
+        return errors
+
+    data = payload.get("citations", [])
     if not isinstance(data, list):
         errors.append(
-            f"[{component_name}] ## Citations must be a JSON array, got {type(data).__name__}"
+            f"[{component_name}] 'citations' must be a JSON array, "
+            f"got {type(data).__name__}"
         )
         return errors
 
     if len(data) == 0:
-        errors.append(f"[{component_name}] ## Citations array is empty (expected 10+)")
+        errors.append(
+            f"[{component_name}] citations array is empty (expected 10+)"
+        )
         return errors
 
     # Spot-check a sample of citations
diff --git a/agent/utils/citation_extractor.py b/agent/utils/citation_extractor.py
index 78be773..d4940df 100644
--- a/agent/utils/citation_extractor.py
+++ b/agent/utils/citation_extractor.py
@@ -33,6 +33,48 @@
     re.MULTILINE | re.DOTALL,
 )
 
+# Patterns for stripping extracted JSON sections out of the Markdown once
+# they've been lifted into sidecar files. Each pattern matches the full
+# section (heading + fenced JSON block) so it can be removed wholesale.
+# Anchored with ``\b`` on the heading keyword so ``## Analysis Summary`` is
+# not caught by the ``## Analysis Data`` pattern.
+_STRIP_CITATIONS_RE = re.compile(
+    r"^## Citations\b[^\n]*\n.*?```json\s*\n.*?\n\s*```\s*\n?",
+    re.MULTILINE | re.DOTALL,
+)
+_STRIP_ANALYSIS_DATA_RE = re.compile(
+    r"^## Analysis Data\b[^\n]*\n.*?```json\s*\n.*?\n\s*```\s*\n?",
+    re.MULTILINE | re.DOTALL,
+)
+# Collapse three-or-more consecutive blank lines left behind by the strips.
+_COLLAPSE_BLANK_LINES_RE = re.compile(r"\n{3,}")
+
+
+def strip_extracted_sections(markdown_text: str) -> str:
+    """Remove ``## Citations`` and ``## Analysis Data`` JSON blocks from Markdown.
+
+    These sections are emitted by the component-analyzer for machine
+    consumption, but the citations are lifted into a sidecar ``.citations.json``
+    and ``## Analysis Data`` is never consumed downstream. Leaving them in the
+    human-facing Markdown is just duplicated noise.
+
+    This function is idempotent: running it twice on the same text produces
+    the same result as running it once, so re-invoking the pipeline on an
+    already-cleaned analysis file is a no-op.
+
+    Args:
+        markdown_text: Raw Markdown from a component analysis file.
+
+    Returns:
+        Markdown with the two JSON sections removed and trailing blank-line
+        runs collapsed. Ends with a single trailing newline.
+    """
+    cleaned = _STRIP_CITATIONS_RE.sub("", markdown_text)
+    cleaned = _STRIP_ANALYSIS_DATA_RE.sub("", cleaned)
+    cleaned = _COLLAPSE_BLANK_LINES_RE.sub("\n\n", cleaned)
+    # Normalise trailing whitespace to exactly one newline.
+    return cleaned.rstrip() + "\n"
+
 
 @dataclass
 class ExtractionResult:
@@ -86,13 +128,20 @@ def extract_citations_from_markdown(
 
 
 def validate_citation_dict(
-    raw: Dict[str, Any], repo_root: Optional[Path] = None
+    raw: Dict[str, Any],
+    repo_root: Optional[Path] = None,
+    component_root: Optional[str] = None,
 ) -> Tuple[Optional[CodeCitation], List[str]]:
     """Validate a single raw citation dict and convert to CodeCitation.
 
     Args:
         raw: A dict from the parsed JSON array.
         repo_root: Optional path to the source repository for file existence checks.
+        component_root: Optional repo-root-relative path of the component whose
+            analysis produced this citation (e.g. ``"omlx/models"``). Used as a
+            fallback prefix when the LLM emits component-relative paths like
+            ``"models/base_model.py"`` instead of repo-root-relative
+            ``"omlx/models/base_model.py"``.
 
     Returns:
         (CodeCitation or None, list of error strings).
@@ -133,11 +182,58 @@ def validate_citation_dict(
             )
             file_path = cleaned
 
-    # Optional: check that the file exists on disk
+    # Existence check with path-shape fallbacks. LLMs emit citation paths
+    # in several shapes; we try the canonical one first, then fall back.
     if repo_root and file_path:
-        full_path = repo_root / file_path
-        if not full_path.exists():
+        normalised = file_path.lstrip("./").lstrip("/")
+
+        candidate_paths: List[str] = [normalised]
+        if component_root:
+            root_parts = [
+                p for p in component_root.strip("./").rstrip("/").split("/") if p
+            ]
+            file_parts = [p for p in normalised.split("/") if p]
+
+            # Strategy A: dedupe trailing segments of component_root that
+            # overlap with leading segments of file_path, then join. Handles
+            # the common case where the LLM repeats the component's own
+            # directory name:
+            #   component_root = "omlx/models"
+            #   file_path      = "models/base_model.py"
+            #   -> merge to "omlx/models/base_model.py"
+            for overlap in range(
+                min(len(file_parts), len(root_parts)), 0, -1
+            ):
+                if root_parts[-overlap:] == file_parts[:overlap]:
+                    merged = "/".join(root_parts + file_parts[overlap:])
+                    if merged not in candidate_paths:
+                        candidate_paths.append(merged)
+                    break
+
+            # Strategy B: straight prefix (no overlap). Handles the case
+            # where the LLM wrote a bare filename:
+            #   component_root = "omlx/models"
+            #   file_path      = "base_model.py"
+            #   -> "omlx/models/base_model.py"
+            joined = "/".join(root_parts + file_parts) if root_parts else normalised
+            if joined not in candidate_paths:
+                candidate_paths.append(joined)
+
+        resolved: Optional[str] = None
+        for candidate in candidate_paths:
+            if (repo_root / candidate).exists():
+                resolved = candidate
+                break
+
+        if resolved is None:
             errors.append(f"Cited file does not exist: {file_path}")
+        elif resolved != file_path:
+            logger.debug(
+                "Resolved citation path via fallback: %s -> %s",
+                file_path,
+                resolved,
+            )
+            file_path = resolved
 
     if errors:
         return None, errors
@@ -156,6 +252,7 @@ def extract_component_citations(
     md_path: Path,
     component_name: str,
     repo_root: Optional[Path] = None,
+    component_root: Optional[str] = None,
 ) -> ExtractionResult:
     """Extract and validate citations from a single component analysis Markdown file.
 
@@ -163,6 +260,9 @@ def extract_component_citations(
         md_path: Path to the component's .md analysis file.
         component_name: Name of the component (for labeling).
         repo_root: Optional source repo path for file existence validation.
+        component_root: Optional repo-root-relative path of this component
+            (e.g. ``"omlx/models"``), used as a fallback path prefix when
+            the LLM emits component-relative citation paths.
 
     Returns:
         ExtractionResult with validated citations and any errors.
@@ -182,7 +282,9 @@ def extract_component_citations(
     result.raw_count = len(raw_dicts)
 
     for i, raw in enumerate(raw_dicts):
-        citation, val_errors = validate_citation_dict(raw, repo_root)
+        citation, val_errors = validate_citation_dict(
+            raw, repo_root, component_root
+        )
         if val_errors:
             for err in val_errors:
                 result.errors.append(f"Citation [{i}]: {err}")
@@ -197,6 +299,7 @@ def build_citations_index(
     repo_root: Optional[Path] = None,
     source_repo: str = "",
     source_commit: str = "",
+    component_roots: Optional[Dict[str, str]] = None,
 ) -> Dict[str, Any]:
     """Extract citations from all analysis Markdown files and produce an index.
 
@@ -207,6 +310,12 @@ def build_citations_index(
         repo_root: Optional source repo path for file existence validation.
         source_repo: Repository URL for generating deep links.
         source_commit: Git commit hash for permalink stability.
+        component_roots: Optional ``{component_name: root_path}`` map. When
+            provided, a citation path that isn't valid at the repo root will
+            be retried with the component's root_path as prefix. This
+            recovers citations where the LLM wrote paths relative to the
+            component directory (e.g. ``"base_model.py"`` for the ``models``
+            component at ``omlx/models``).
 
     Returns:
         Dict suitable for writing as all_citations.json, containing:
@@ -217,13 +326,19 @@ def build_citations_index(
     from datetime import datetime, timezone
 
     results: List[ExtractionResult] = []
+    component_roots = component_roots or {}
 
     # Find all .md analysis files (skip any .citations.json files)
     md_files = sorted(analyses_dir.glob("*.md"))
 
     for md_path in md_files:
         component_name = md_path.stem
-        result = extract_component_citations(md_path, component_name, repo_root)
+        result = extract_component_citations(
+            md_path,
+            component_name,
+            repo_root,
+            component_root=component_roots.get(component_name),
+        )
         results.append(result)
 
         # Write per-component citations file
@@ -254,6 +369,23 @@ def build_citations_index(
                 result.raw_count,
             )
 
+            # Citations + analysis-data JSON blocks are now in the sidecar;
+            # strip them from the Markdown so the human-facing analysis isn't
+            # duplicating machine-readable artefacts. Guarded by the
+            # successful-write branch so that files with malformed citation
+            # blocks are left untouched for debugging.
+            try:
+                original = md_path.read_text(encoding="utf-8")
+                stripped = strip_extracted_sections(original)
+                if stripped != original:
+                    md_path.write_text(stripped, encoding="utf-8")
+            except OSError as exc:
+                logger.warning(
+                    "Could not strip extracted sections from %s: %s",
+                    md_path,
+                    exc,
+                )
+
     # Build aggregated index
     total_valid = sum(r.valid_count for r in results)
     total_raw = sum(r.raw_count for r in results)
diff --git a/tests/test_citations.py b/tests/test_citations.py
index fb3181a..589cca3 100644
--- a/tests/test_citations.py
+++ b/tests/test_citations.py
@@ -11,6 +11,7 @@
     validate_citation_dict,
     extract_component_citations,
     build_citations_index,
+    strip_extracted_sections,
     ExtractionResult,
 )
 
@@ -393,3 +394,325 @@ def test_source_url_enrichment(self, tmp_path):
         assert citation["source_url"] == (
             "https://github.com/myorg/myrepo/blob/deadbeef/src/config.rs#L1-L15"
         )
+
+
+# ---------------------------------------------------------------------------
+# component_root fallback resolution
+#
+# LLMs frequently emit citation paths that aren't repo-root-relative. The
+# validator/extractor recover these using the component's root_path as a
+# prefix candidate, with two strategies (suffix-overlap dedup, plain
+# prepend). These tests pin each recovery path.
+# ---------------------------------------------------------------------------
+
+
+class TestComponentRootFallback:
+    def _make_repo(self, tmp_path, relpath):
+        full = tmp_path / relpath
+        full.parent.mkdir(parents=True, exist_ok=True)
+        full.write_text("content\n")
+        return full
+
+    def test_repo_root_relative_path_accepted_as_is(self, tmp_path):
+        """If the LLM emits the canonical repo-root path, no rewrite happens."""
+        self._make_repo(tmp_path, "omlx/models/base.py")
+        raw = {
+            "file_path": "omlx/models/base.py",
+            "start_line": 1,
+            "end_line": 5,
+            "claim": "base model",
+        }
+        citation, errors = validate_citation_dict(
+            raw, repo_root=tmp_path, component_root="omlx/models"
+        )
+        assert errors == []
+        assert citation.file_path == "omlx/models/base.py"
+
+    def test_plain_prepend_for_bare_filename(self, tmp_path):
+        """Bare filename + component_root -> prepended full path."""
+        self._make_repo(tmp_path, "omlx/models/base.py")
+        raw = {
+            "file_path": "base.py",
+            "start_line": 1,
+            "end_line": 5,
+            "claim": "base model",
+        }
+        citation, errors = validate_citation_dict(
+            raw, repo_root=tmp_path, component_root="omlx/models"
+        )
+        assert errors == []
+        assert citation.file_path == "omlx/models/base.py"
+
+    def test_suffix_overlap_dedup(self, tmp_path):
+        """Path that repeats the component's directory name (a common LLM
+        mistake) is merged via overlap dedup, not naive concat."""
+        self._make_repo(tmp_path, "omlx/models/base.py")
+        raw = {
+            "file_path": "models/base.py",  # repeats last segment of component_root
+            "start_line": 1,
+            "end_line": 5,
+            "claim": "base model",
+        }
+        citation, errors = validate_citation_dict(
+            raw, repo_root=tmp_path, component_root="omlx/models"
+        )
+        assert errors == []
+        # NOT "omlx/models/models/base.py" — the dedup must win.
+        assert citation.file_path == "omlx/models/base.py"
+
+    def test_multi_segment_overlap(self, tmp_path):
+        """A longer overlap should dedup correctly too."""
+        self._make_repo(tmp_path, "pkg/auth/token/jwt.rs")
+        raw = {
+            "file_path": "auth/token/jwt.rs",
+            "start_line": 1,
+            "end_line": 5,
+            "claim": "jwt",
+        }
+        citation, errors = validate_citation_dict(
+            raw, repo_root=tmp_path, component_root="pkg/auth/token"
+        )
+        assert errors == []
+        assert citation.file_path == "pkg/auth/token/jwt.rs"
+
+    def test_leading_dot_slash_stripped(self, tmp_path):
+        self._make_repo(tmp_path, "omlx/models/base.py")
+        raw = {
+            "file_path": "./base.py",
+            "start_line": 1,
+            "end_line": 5,
+            "claim": "base",
+        }
+        citation, errors = validate_citation_dict(
+            raw, repo_root=tmp_path, component_root="omlx/models"
+        )
+        assert errors == []
+        assert citation.file_path == "omlx/models/base.py"
+
+    def test_absolute_project_prefix_still_stripped(self, tmp_path):
+        """The existing /tmp/*/project/ strip continues to work alongside
+        the new component-root fallback."""
+        self._make_repo(tmp_path, "omlx/models/base.py")
+        raw = {
+            "file_path": "/tmp/omlx/project/omlx/models/base.py",
+            "start_line": 1,
+            "end_line": 5,
+            "claim": "base",
+        }
+        citation, errors = validate_citation_dict(
+            raw, repo_root=tmp_path, component_root="omlx/models"
+        )
+        assert errors == []
+        assert citation.file_path == "omlx/models/base.py"
+
+    def test_unresolvable_path_still_errors(self, tmp_path):
+        """If neither the bare path nor component-rooted variants exist,
+        we keep the original error."""
+        raw = {
+            "file_path": "ghost/file.py",
+            "start_line": 1,
+            "end_line": 5,
+            "claim": "ghost",
+        }
+        citation, errors = validate_citation_dict(
+            raw, repo_root=tmp_path, component_root="omlx/models"
+        )
+        assert citation is None
+        assert any("does not exist" in e for e in errors)
+
+    def test_component_root_unused_when_no_repo_root(self):
+        """component_root only activates when repo_root is provided (i.e.
+        when we're actually doing existence checks)."""
+        raw = {
+            "file_path": "models/base.py",
+            "start_line": 1,
+            "end_line": 5,
+            "claim": "base",
+        }
+        citation, errors = validate_citation_dict(
+            raw, repo_root=None, component_root="omlx/models"
+        )
+        assert errors == []
+        # Path is passed through unchanged when we can't verify.
+        assert citation.file_path == "models/base.py"
+
+    def test_index_uses_component_roots_map(self, tmp_path):
+        """build_citations_index threads component_roots through to the
+        validator."""
+        # Layout: repo with the component's real file
+        (tmp_path / "omlx/models").mkdir(parents=True)
+        (tmp_path / "omlx/models/base.py").write_text("x\n")
+
+        # Analysis emits a component-relative path (the common failure mode)
+        analyses_dir = tmp_path / "service_analyses"
+        analyses_dir.mkdir()
+        (analyses_dir / "models.md").write_text(
+            dedent(
+                """
+                # models
+
+                ## Citations
+                ```json
+                [
+                  {
+                    "file_path": "base.py",
+                    "start_line": 1,
+                    "end_line": 5,
+                    "claim": "entry",
+                    "section": "Key Components"
+                  }
+                ]
+                ```
+                """
+            )
+        )
+
+        index = build_citations_index(
+            analyses_dir=analyses_dir,
+            repo_root=tmp_path,
+            component_roots={"models": "omlx/models"},
+        )
+        assert index["metadata"]["total_citations"] == 1
+        assert index["all_citations"][0]["file_path"] == "omlx/models/base.py"
+
+    def test_index_without_component_roots_still_works(self, tmp_path):
+        """Backwards compatibility: callers that don't pass component_roots
+        get the pre-existing behavior (path accepted as-is or rejected)."""
+        (tmp_path / "src").mkdir()
+        (tmp_path / "src/main.rs").write_text("fn main() {}\n")
+
+        analyses_dir = tmp_path / "service_analyses"
+        analyses_dir.mkdir()
+        (analyses_dir / "lib.md").write_text(
+            dedent(
+                """
+                # lib
+
+                ## Citations
+                ```json
+                [
+                  {
+                    "file_path": "src/main.rs",
+                    "start_line": 1,
+                    "end_line": 1,
+                    "claim": "entry",
+                    "section": "Key Components"
+                  }
+                ]
+                ```
+                """
+            )
+        )
+
+        index = build_citations_index(
+            analyses_dir=analyses_dir,
+            repo_root=tmp_path,
+            # no component_roots
+        )
+        assert index["metadata"]["total_citations"] == 1
+
+
+# ---------------------------------------------------------------------------
+# strip_extracted_sections — ensures the duplicated JSON blocks are removed
+# from the human-facing Markdown after being lifted into sidecar files.
+# ---------------------------------------------------------------------------
+
+
+class TestStripExtractedSections:
+    def test_strips_citations_section(self):
+        md = SAMPLE_MD_WITH_CITATIONS
+        cleaned = strip_extracted_sections(md)
+        assert "## Citations" not in cleaned
+        assert "```json" not in cleaned
+        # Prose sections around the block are preserved.
+        assert "## Architecture" in cleaned
+        assert "## Analysis Summary" in cleaned
+
+    def test_strips_analysis_data_section(self):
+        md = dedent(
+            """\
+            # lib
+
+            ## Architecture
+            Prose.
+
+            ## Analysis Data
+            ```json
+            {"summary": "x", "tech_stack": ["python"]}
+            ```
+
+            ## Citations
+            ```json
+            [{"file_path": "a.py", "start_line": 1, "end_line": 2, "claim": "c"}]
+            ```
+            """
+        )
+        cleaned = strip_extracted_sections(md)
+        assert "## Analysis Data" not in cleaned
+        assert "## Citations" not in cleaned
+        assert "```json" not in cleaned
+        assert "## Architecture" in cleaned
+
+    def test_does_not_strip_analysis_summary(self):
+        """Regex must not match ``## Analysis Summary`` (a common human section)."""
+        md = SAMPLE_MD_WITH_CITATIONS
+        cleaned = strip_extracted_sections(md)
+        assert "## Analysis Summary" in cleaned
+
+    def test_idempotent(self):
+        md = SAMPLE_MD_WITH_CITATIONS
+        once = strip_extracted_sections(md)
+        twice = strip_extracted_sections(once)
+        assert once == twice
+
+    def test_no_change_when_no_sections(self):
+        md = SAMPLE_MD_NO_CITATIONS
+        cleaned = strip_extracted_sections(md)
+        # Content survives; we only normalise trailing whitespace.
+        assert cleaned.rstrip() == md.rstrip()
+
+    def test_trailing_newline_normalised(self):
+        cleaned = strip_extracted_sections(SAMPLE_MD_WITH_CITATIONS)
+        assert cleaned.endswith("\n")
+        assert not cleaned.endswith("\n\n\n")
+
+    def test_build_index_strips_md_after_extraction(self, tmp_path):
+        """End-to-end: running the pipeline removes the duplicated blocks
+        from the Markdown while preserving the sidecar JSON."""
+        analyses_dir = tmp_path / "service_analyses"
+        analyses_dir.mkdir()
+        md_path = analyses_dir / "my-library.md"
+        md_path.write_text(SAMPLE_MD_WITH_CITATIONS)
+
+        build_citations_index(
+            analyses_dir=analyses_dir,
+            source_repo="https://github.com/org/repo",
+            source_commit="abc",
+        )
+
+        # Sidecar was written
+        sidecar = analyses_dir / "my-library.citations.json"
+        assert sidecar.exists()
+
+        # MD no longer contains the extracted sections
+        cleaned_md = md_path.read_text()
+        assert "## Citations" not in cleaned_md
+        assert "```json" not in cleaned_md
+        # Non-extracted prose is preserved
+        assert "## Architecture" in cleaned_md
+
+    def test_build_index_leaves_md_untouched_on_extraction_failure(self, tmp_path):
+        """If extraction fails (e.g. invalid JSON), the MD must be left as-is
+        so the malformed block is still visible for debugging."""
+        analyses_dir = tmp_path / "service_analyses"
+        analyses_dir.mkdir()
+        md_path = analyses_dir / "bad.md"
+        md_path.write_text(SAMPLE_MD_INVALID_JSON)
+        original = md_path.read_text()
+
+        build_citations_index(analyses_dir=analyses_dir)
+
+        # MD unchanged
+        assert md_path.read_text() == original
+        # No sidecar written
+        assert not (analyses_dir / "bad.citations.json").exists()