Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 14 additions & 8 deletions drivers/python/age/age.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,14 +208,18 @@ def _validate_column(col: str) -> str:
name, type_name = parts
validate_identifier(name, "Column name")
validate_identifier(type_name, "Column type")
return f"{name} {type_name}"
# Only the column name is double-quoted. The type name is left
# unquoted because PostgreSQL type names in column definitions
# are case-insensitive identifiers; double-quoting them would
# force exact-case matching and break user-defined type lookup.
Comment on lines +211 to +214
Copy link

Copilot AI Apr 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The rationale comment about not quoting type_name is a bit misleading: double-quoting doesn’t inherently “break user-defined type lookup”; it changes name-folding semantics (quoted identifiers become case-sensitive, unquoted fold to lower-case). Consider rewording to clarify that leaving the type unquoted preserves PostgreSQL’s default case-folding behavior and avoids surprising resolution changes.

Suggested change
# Only the column name is double-quoted. The type name is left
# unquoted because PostgreSQL type names in column definitions
# are case-insensitive identifiers; double-quoting them would
# force exact-case matching and break user-defined type lookup.
# Only the column name is double-quoted. The type name is left
# unquoted so PostgreSQL applies its default identifier folding
# for type names in column definitions. Double-quoting would make
# the type name case-sensitive and could change type resolution in
# surprising ways for user-defined types.

Copilot uses AI. Check for mistakes.
return f'"{name}" {type_name}'
else:
validate_identifier(col, "Column name")
return f"{col} agtype"
return f'"{col}" agtype'
Comment on lines +215 to +218
Copy link

Copilot AI Apr 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Always double-quoting column names changes PostgreSQL’s identifier folding semantics: previously an unquoted alias like V would be exposed as v (folded to lower-case), but now "V" will stay uppercase. If callers relied on the old lowercasing behavior for cols values with uppercase letters, this is a user-visible behavior change; consider normalizing name to lower-case before quoting or documenting this change.

Copilot uses AI. Check for mistakes.


def buildCypher(graphName:str, cypherStmt:str, columns:list) ->str:
if graphName == None:
if graphName is None:
raise _EXCEPTION_GraphNotSet

columnExp=[]
Expand All @@ -225,16 +229,18 @@ def buildCypher(graphName:str, cypherStmt:str, columns:list) ->str:
if validated:
columnExp.append(validated)
else:
columnExp.append('v agtype')
columnExp.append('"v" agtype')
Copy link

Copilot AI Apr 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new always-quoted column output from buildCypher() / _validate_column() requires updating any in-repo callers/tests that compare generated SQL strings (e.g., security tests that look for v agtype or n agtype). Without updating those expectations, the Python test suite will fail.

Suggested change
columnExp.append('"v" agtype')
columnExp.append(_validate_column('v'))

Copilot uses AI. Check for mistakes.

# Design note: String concatenation is used here instead of
# psycopg.sql.Identifier() because column specifications are
# "name type" pairs (e.g. "v agtype") that don't map directly to
# "name type" pairs (e.g. '"v" agtype') that don't map directly to
# sql.Identifier(). Each component has already been validated by
# _validate_column() → validate_identifier(), which restricts
# names to ^[A-Za-z_][A-Za-z0-9_]*$ and max 63 chars. The
# graphName and cypherStmt are NOT embedded here — this template
# only contains the validated column list and static SQL keywords.
# names to ^[A-Za-z_][A-Za-z0-9_]*$ and max 63 chars. Column names
# are always double-quoted to avoid conflicts with PostgreSQL
# reserved words (e.g. "count", "order", "type"). The graphName
# and cypherStmt are NOT embedded here — this template only
# contains the validated column list and static SQL keywords.
stmtArr = []
stmtArr.append("SELECT * from cypher(NULL,NULL) as (")
stmtArr.append(','.join(columnExp))
Expand Down
72 changes: 72 additions & 0 deletions drivers/python/test_age_py.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,17 @@
# specific language governing permissions and limitations
# under the License.
import json
import re

from age.models import Vertex
import unittest
import decimal
import age
# _validate_column is private but tested directly because its quoting
# behavior is security-relevant and the public surface (buildCypher)
# makes it difficult to isolate quoting assertions.
from age.age import buildCypher, _validate_column
from age.exceptions import InvalidIdentifier
import argparse

TEST_HOST = "localhost"
Expand All @@ -28,6 +34,72 @@
TEST_GRAPH_NAME = "test_graph"


class TestBuildCypher(unittest.TestCase):
"""Unit tests for buildCypher() and _validate_column() — no DB required."""

def test_simple_column(self):
result = buildCypher("g", "MATCH (n) RETURN n", ["n"])
self.assertIn('"n" agtype', result)

def test_column_with_type(self):
result = buildCypher("g", "MATCH (n) RETURN n", ["n agtype"])
self.assertIn('"n" agtype', result)
Comment on lines +40 to +46
Copy link

Copilot AI Apr 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change will break existing Python tests that assert unquoted column specs (e.g., drivers/python/test_security.py currently expects v agtype / n agtype in buildCypher() output and _validate_column() return values). Update those assertions to match the new always-quoted behavior so the test suite stays consistent and CI doesn’t fail.

Copilot uses AI. Check for mistakes.

def test_reserved_word_count(self):
"""Issue #2370: 'count' is a PostgreSQL reserved word."""
result = buildCypher("g", "MATCH (n) RETURN count(n)", ["count"])
self.assertIn('"count" agtype', result)
# Verify 'count' never appears unquoted as a column name
self.assertIsNone(
re.search(r'(?<!")\bcount\s+agtype\b', result),
f"'count' must be quoted in: {result}"
)

def test_reserved_word_order(self):
"""Issue #2370: 'order' is a PostgreSQL reserved word."""
result = buildCypher("g", "MATCH (n) RETURN n.order", ["order"])
self.assertIn('"order" agtype', result)

def test_reserved_word_type(self):
"""Issue #2370: 'type' is a PostgreSQL reserved word."""
result = buildCypher("g", "MATCH ()-[r]->() RETURN type(r)", ["type"])
self.assertIn('"type" agtype', result)

def test_reserved_word_select(self):
"""Issue #2370: 'select' is a PostgreSQL reserved word."""
result = buildCypher("g", "MATCH (n) RETURN n", ["select"])
self.assertIn('"select" agtype', result)

def test_reserved_word_group(self):
"""Issue #2370: 'group' is a PostgreSQL reserved word."""
result = buildCypher("g", "MATCH (n) RETURN n", ["group"])
self.assertIn('"group" agtype', result)

def test_multiple_columns(self):
result = buildCypher("g", "MATCH (n) RETURN n.name, count(n)", ["name", "count"])
self.assertIn('"name" agtype', result)
self.assertIn('"count" agtype', result)

def test_default_column(self):
result = buildCypher("g", "MATCH (n) RETURN n", None)
self.assertIn('"v" agtype', result)

def test_invalid_column_rejected(self):
with self.assertRaises(InvalidIdentifier):
buildCypher("g", "MATCH (n) RETURN n", ["invalid;col"])

def test_reserved_word_in_name_type_pair(self):
"""Quoting applies even when the column is specified as 'name type'."""
result = buildCypher("g", "MATCH (n) RETURN n.order", ["order agtype"])
self.assertIn('"order" agtype', result)

def test_validate_column_quoting(self):
self.assertEqual(_validate_column("v"), '"v" agtype')
self.assertEqual(_validate_column("v agtype"), '"v" agtype')
self.assertEqual(_validate_column("count"), '"count" agtype')
self.assertEqual(_validate_column("my_col"), '"my_col" agtype')


class TestAgeBasic(unittest.TestCase):
ag = None
args: argparse.Namespace = argparse.Namespace(
Expand Down
Loading