Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- Experimental KG schemas: `GraphConstraintType` (`UNIQUENESS`, `EXISTENCE`) and extended `ConstraintType` so `EXISTENCE` can target a node property or a relationship property; graph pruning and schema visualization respect `EXISTENCE` constraints.
- Experimental: `GraphConstraintType.KEY` (Neo4j NODE KEY / RELATIONSHIP KEY, single property) on `GraphSchema.constraints`; pruning treats KEY like EXISTENCE for mandatory (non-null) properties. UNIQUENESS and KEY cannot target the same node property. Helpers: `key_property_names_for_node`, `key_property_names_for_relationship`, `uniqueness_property_names_for_node`, `mandatory_property_names_for_node`, `mandatory_property_names_for_relationship`.
- Experimental: `SchemaFromExistingGraphExtractor` maps Neo4j `NODE_KEY` / `RELATIONSHIP_KEY` metadata to `GraphConstraintType.KEY` (existence-only constraints still map to `EXISTENCE`).
- Experimental: composite (multi-property) UNIQUENESS and KEY constraints via `ConstraintType.property_names: Tuple[str, ...]`. EXISTENCE remains single-property only. The old `property_name: str` field is deprecated but still accepted and migrated automatically. Parquet metadata now includes a structured `constraints` list preserving composite grouping. LLM extraction prompt uses `property_names` (list) format.
- `LLMBase`: new abstract base class (`neo4j_graphrag.llm.LLMBase`) that combines `LLMInterface` and `LLMInterfaceV2`. Concrete LLM subclasses can extend `LLMBase` instead of both interfaces to avoid repeating overload boilerplate and to suppress mypy `[no-overload-impl]` / `[no-redef]` errors.
- MarkdownLoader (experimental): added a Markdown loader to support `.md` and `.markdown` files.
- Added Amazon Bedrock support: `BedrockLLM` (generation/tool calling) via the boto3 Converse API, and `BedrockEmbeddings` (embeddings) via the boto3 InvokeModel API.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,19 +104,19 @@
ConstraintType(
type=GraphConstraintType.EXISTENCE,
node_type="Person",
property_name="firstName",
property_names=("firstName",),
relationship_type=None,
),
ConstraintType(
type=GraphConstraintType.EXISTENCE,
node_type="Person",
property_name="lastName",
property_names=("lastName",),
relationship_type=None,
),
ConstraintType(
type=GraphConstraintType.EXISTENCE,
node_type="Organization",
property_name="name",
property_names=("name",),
relationship_type=None,
),
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def print_schema_summary(schema: GraphSchema, title: str) -> None:
print(f"\nConstraints ({len(schema.constraints)}):")
for constraint in schema.constraints:
print(
f" - {constraint.type} on {constraint.node_type}.{constraint.property_name}"
f" - {constraint.type} on {constraint.node_type}.{list(constraint.property_names)}"
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class ExtractedConstraintType(BaseModel):
"""

type: Literal["UNIQUENESS", "EXISTENCE", "KEY"]
property_name: str
property_names: list[str] = Field(default_factory=list)
node_type: str = ""
relationship_type: str = ""

Expand Down
27 changes: 18 additions & 9 deletions src/neo4j_graphrag/experimental/components/kg_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,15 +407,24 @@ async def run(
name = meta.node_label or (
meta.labels[0] if meta.labels else resolved_stem
)
files.append(
{
"name": name,
"file_path": file_path,
"columns": columns,
"is_node": True,
"labels": meta.labels or [],
}
)
# Build structured constraints list for composite support
constraints_meta: list[dict[str, Any]] = []
for props in meta.key_constraints or []:
constraints_meta.append({"type": "KEY", "properties": list(props)})
for props in meta.uniqueness_constraints or []:
constraints_meta.append(
{"type": "UNIQUENESS", "properties": list(props)}
)
file_entry: dict[str, Any] = {
"name": name,
"file_path": file_path,
"columns": columns,
"is_node": True,
"labels": meta.labels or [],
}
if constraints_meta:
file_entry["constraints"] = constraints_meta
files.append(file_entry)

base_rel = self.relationships_dest.output_path.rstrip("/")
for filename, content in data["relationships"].items():
Expand Down
74 changes: 66 additions & 8 deletions src/neo4j_graphrag/experimental/components/parquet_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,19 @@ def _constraint_relationship_type_unset(constraint: dict[str, Any]) -> bool:
return rt is None or (isinstance(rt, str) and rt.strip() == "")


def _resolve_constraint_property_names(constraint: dict[str, Any]) -> list[str]:
"""Resolve property names from a constraint dict (``property_names`` or ``property_name``)."""
pns = constraint.get("property_names") or ()
if pns:
return list(pns)
pn = constraint.get("property_name", "")
return [pn] if pn else []


def get_uniqueness_property_names_for_node_type(
schema: Optional[dict[str, Any]], node_label: str
) -> list[str]:
"""Property names with a UNIQUENESS constraint for this node label (order as in schema)."""
"""Property names with a UNIQUENESS constraint for this node label (flat, order as in schema)."""
if not schema:
return []
out: list[str] = []
Expand All @@ -93,16 +102,14 @@ def get_uniqueness_property_names_for_node_type(
continue
if constraint.get("node_type", "") != node_label:
continue
pn = constraint.get("property_name", "")
if pn:
out.append(pn)
out.extend(_resolve_constraint_property_names(constraint))
return out


def get_key_property_names_for_node_type(
schema: Optional[dict[str, Any]], node_label: str
) -> list[str]:
"""Property names with a KEY constraint (node scope) for this node label."""
"""Property names with a KEY constraint (node scope) for this node label (flat)."""
if not schema:
return []
out: list[str] = []
Expand All @@ -113,9 +120,51 @@ def get_key_property_names_for_node_type(
continue
if not _constraint_relationship_type_unset(constraint):
continue
pn = constraint.get("property_name", "")
if pn:
out.append(pn)
out.extend(_resolve_constraint_property_names(constraint))
return out


def get_key_constraints_for_node_type(
schema: Optional[dict[str, Any]], node_label: str
) -> list[tuple[str, ...]]:
"""KEY constraints for a node label, preserving composite grouping.

Returns a list of tuples, each containing the property names for one KEY constraint.
"""
if not schema:
return []
out: list[tuple[str, ...]] = []
for constraint in schema.get("constraints", ()) or ():
if constraint.get("type") != "KEY":
continue
if constraint.get("node_type", "") != node_label:
continue
if not _constraint_relationship_type_unset(constraint):
continue
props = _resolve_constraint_property_names(constraint)
if props:
out.append(tuple(props))
return out


def get_uniqueness_constraints_for_node_type(
schema: Optional[dict[str, Any]], node_label: str
) -> list[tuple[str, ...]]:
"""UNIQUENESS constraints for a node label, preserving composite grouping.

Returns a list of tuples, each containing the property names for one UNIQUENESS constraint.
"""
if not schema:
return []
out: list[tuple[str, ...]] = []
for constraint in schema.get("constraints", ()) or ():
if constraint.get("type") != "UNIQUENESS":
continue
if constraint.get("node_type", "") != node_label:
continue
props = _resolve_constraint_property_names(constraint)
if props:
out.append(tuple(props))
return out


Expand Down Expand Up @@ -172,6 +221,9 @@ class FileMetadata:
head_uniqueness_property_names: Optional[list[str]] = None
tail_primary_key_property_names: Optional[list[str]] = None
tail_uniqueness_property_names: Optional[list[str]] = None
# Grouped constraint metadata (preserves composite grouping)
key_constraints: Optional[list[tuple[str, ...]]] = None
uniqueness_constraints: Optional[list[tuple[str, ...]]] = None


@dataclass
Expand Down Expand Up @@ -595,6 +647,12 @@ def format_graph(
uniqueness_property_names=get_uniqueness_property_names_for_node_type(
self.schema, label
),
key_constraints=get_key_constraints_for_node_type(
self.schema, label
),
uniqueness_constraints=get_uniqueness_constraints_for_node_type(
self.schema, label
),
)
)

Expand Down
Loading
Loading