Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 55 additions & 16 deletions specifyweb/backend/inheritance/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from specifyweb.specify.models import Collectionobject, Collectionobjectgroupjoin, Component

def parent_inheritance_post_query_processing(query, tableid, field_specs, collection, user):
if tableid == 1029 and 'catalogNumber' in [fs.fieldspec.join_path[0].name for fs in field_specs if fs.fieldspec.join_path]:
if tableid == 1029 and 'catalogNumber' in [fs.fieldspec.join_path[0].name for fs in field_specs if fs.fieldspec.join_path]:
if not get_parent_cat_num_inheritance_setting(collection, user):
return list(query)

Expand All @@ -14,16 +14,30 @@ def parent_inheritance_post_query_processing(query, tableid, field_specs, collec
return list(query)

results = list(query)
updated_results = []

# Map results, replacing null catalog numbers with the parent catalog number
# Collect IDs of rows needing catalog number lookup
ids_needing_lookup = [
result[0] for result in results
if result[catalog_number_field_index] is None or result[catalog_number_field_index] == ''
]

# Bulk prefetch: single query to get all component -> collectionobject catalog numbers
catnum_by_component_id = {}
if ids_needing_lookup:
catnum_by_component_id = dict(
Component.objects.filter(id__in=ids_needing_lookup)
.select_related('collectionobject')
.values_list('id', 'collectionobject__catalognumber')
)

updated_results = []
for result in results:
result = list(result)
if result[catalog_number_field_index] is None or result[catalog_number_field_index] == '':
component_id = result[0] # Assuming the first column is the child's ID
component_obj = Component.objects.filter(id=component_id).first()
if component_obj and component_obj.collectionobject:
result[catalog_number_field_index] = component_obj.collectionobject.catalognumber
component_id = result[0]
catnum = catnum_by_component_id.get(component_id)
if catnum:
result[catalog_number_field_index] = catnum
updated_results.append(tuple(result))

return updated_results
Expand All @@ -44,20 +58,45 @@ def cog_inheritance_post_query_processing(query, tableid, field_specs, collectio
return list(query)

results = list(query)
updated_results = []

# Map results, replacing null catalog numbers with the collection object group primary collection catalog number
# Collect IDs of rows needing COG inheritance lookup
ids_needing_lookup = [
result[0] for result in results
if result[catalog_number_field_index] is None or result[catalog_number_field_index] == ''
]

# Bulk prefetch step 1: get parentcog_id for each childco_id
cog_by_child = {}
if ids_needing_lookup:
cog_by_child = dict(
Collectionobjectgroupjoin.objects.filter(childco_id__in=ids_needing_lookup)
.values_list('childco_id', 'parentcog_id')
)

# Bulk prefetch step 2: get primary member's catalog number for each COG
catnum_by_cog = {}
cog_ids = set(cog_by_child.values())
if cog_ids:
catnum_by_cog = dict(
Collectionobjectgroupjoin.objects.filter(
parentcog_id__in=cog_ids, isprimary=True
)
.select_related('childco')
.values_list('parentcog_id', 'childco__catalognumber')
)

updated_results = []
for result in results:
result = list(result)
if result[catalog_number_field_index] is None or result[catalog_number_field_index] == '':
cojo = Collectionobjectgroupjoin.objects.filter(childco_id=result[0]).first()
if cojo:
primary_cojo = Collectionobjectgroupjoin.objects.filter(
parentcog=cojo.parentcog, isprimary=True).first()
if primary_cojo:
result[catalog_number_field_index] = primary_cojo.childco.catalognumber
child_id = result[0]
cog_id = cog_by_child.get(child_id)
if cog_id is not None:
catnum = catnum_by_cog.get(cog_id)
if catnum:
result[catalog_number_field_index] = catnum
updated_results.append(tuple(result))

return updated_results

return query
return query
Empty file.
161 changes: 161 additions & 0 deletions specifyweb/backend/inheritance/tests/test_n_plus_one.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
"""
Tests demonstrating and verifying the fix for N+1 query patterns
in inheritance post-query processing (#7875).
"""

from unittest.mock import patch

from django.test.utils import CaptureQueriesContext
from django.db import connection

from specifyweb.backend.interactions.tests.test_cog import TestCogInteractions
from specifyweb.specify.models import (
Collectionobject,
Collectionobjectgroup,
Collectionobjectgroupjoin,
Component,
)
from specifyweb.backend.inheritance.api import (
parent_inheritance_post_query_processing,
cog_inheritance_post_query_processing,
)


class _FakeFieldSpec:
"""Minimal stand-in for the field_specs entries used by the inheritance API."""

def __init__(self, name, op_num=1, has_join_path=True):
self.op_num = op_num

class _JoinPathItem:
def __init__(self, n):
self.name = n

class _Inner:
def __init__(self, n):
self.join_path = [_JoinPathItem(n)]

class _NoJoinPath:
join_path = []

self.fieldspec = _Inner(name) if has_join_path else _NoJoinPath()


def _make_field_specs():
"""Return field_specs where catalogNumber is at result index 1.

The id field has no join_path (it's the implicit row-id column at index 0).
catalogNumber is the first field with a join_path, so
catalog_number_field_index = 0 + 1 = 1, matching our (id, catnum) tuples.
"""
return [
_FakeFieldSpec('id', has_join_path=False),
_FakeFieldSpec('catalogNumber', op_num=1),
]


class TestParentInheritanceNPlusOne(TestCogInteractions):
"""Verify parent_inheritance_post_query_processing query count is O(1), not O(N)."""

@patch(
'specifyweb.backend.inheritance.api.get_parent_cat_num_inheritance_setting',
return_value=True,
)
def test_query_count_scales_constantly(self, _mock_setting):
"""With N rows that need catalog-number lookup, the number of DB
queries should be constant (bulk prefetch), not proportional to N."""

parent_co = self.collectionobjects[0]
parent_co.catalognumber = 'PARENT-001'
parent_co.save()

# Create 20 Components with null catalogNumber pointing to parent_co
n = 20
components = []
for i in range(n):
comp = Component.objects.create(
collectionobject=parent_co,
)
components.append(comp)

# Build fake query results: (component_id, None) — catalogNumber is null
# tableid 1029 = Component
fake_results = [(comp.id, None) for comp in components]

field_specs = _make_field_specs()

with CaptureQueriesContext(connection) as ctx:
result = parent_inheritance_post_query_processing(
fake_results, 1029, field_specs, self.collection, self.specifyuser,
)

# Every row should have inherited the parent catalog number
for row in result:
self.assertEqual(row[1], 'PARENT-001')

# With the N+1 bug, we'd see >= N queries (one per row).
# After the fix, we expect a small constant number (at most ~3).
query_count = len(ctx.captured_queries)
self.assertLessEqual(
query_count,
5,
f"Expected O(1) queries but got {query_count} for {n} rows — "
f"N+1 pattern detected.",
)


class TestCogInheritanceNPlusOne(TestCogInteractions):
"""Verify cog_inheritance_post_query_processing query count is O(1), not O(N)."""

@patch(
'specifyweb.backend.inheritance.api.get_cat_num_inheritance_setting',
return_value=True,
)
def test_query_count_scales_constantly(self, _mock_setting):
"""With N rows needing COG primary lookup, DB queries should be constant."""

primary_co = self.collectionobjects[0]
primary_co.catalognumber = 'PRIMARY-001'
primary_co.save()

cog = self.test_cog_discrete

# Link primary_co as primary member of the COG
self._link_co_cog(primary_co, cog, isprimary=True, issubstrate=False)

# Create 20 child COs with null catalog numbers, each linked as non-primary
n = 20
child_cos = []
for i in range(n):
co = Collectionobject.objects.create(
collection=self.collection,
catalognumber=None,
collectionobjecttype=self.collectionobjecttype,
)
self._link_co_cog(co, cog, isprimary=False, issubstrate=False)
child_cos.append(co)

# Build fake query results: (child_co_id, None)
# tableid 1 = CollectionObject
fake_results = [(co.id, None) for co in child_cos]

field_specs = _make_field_specs()

with CaptureQueriesContext(connection) as ctx:
result = cog_inheritance_post_query_processing(
fake_results, 1, field_specs, self.collection, self.specifyuser,
)

# Every row should have inherited the primary's catalog number
for row in result:
self.assertEqual(row[1], 'PRIMARY-001')

# With the N+1 bug we'd see >= 2*N queries (two per row).
# After the fix, expect a small constant number (at most ~5).
query_count = len(ctx.captured_queries)
self.assertLessEqual(
query_count,
5,
f"Expected O(1) queries but got {query_count} for {n} rows — "
f"N+1 pattern detected.",
)
4 changes: 2 additions & 2 deletions specifyweb/backend/stored_queries/execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ def query_to_csv(
]
csv_writer.writerow(encoded)
else:
for row in query.yield_per(1):
for row in query.yield_per(2000):
if row_filter is not None and not row_filter(row):
continue
encoded = [
Expand Down Expand Up @@ -438,7 +438,7 @@ def query_to_kml(
)
documentElement.appendChild(placemarkElement)
else:
for row in query.yield_per(1):
for row in query.yield_per(2000):
if row_has_geocoords(coord_cols, row):
placemarkElement = createPlacemark(
kmlDoc, row, coord_cols, table, captions, host
Expand Down
Loading