Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
Empty file.
186 changes: 186 additions & 0 deletions specifyweb/backend/delete_blockers/tests/test_pagination.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
"""
Tests for paginated delete blockers (#7515).

The old implementation used Django's Collector.collect() which loads ALL
related objects into memory. For a Taxon like "Life" with 900K+
determinations this causes OOM kills.

The fix replaces Collector with targeted COUNT + limited-ID queries per
PROTECT relationship, returning total_count alongside a capped ids list.
"""

from django.test import TestCase

from specifyweb.specify.models import (
Collection,
Collectionobject,
Datatype,
Determination,
Discipline,
Division,
Geographytreedef,
Geographytreedefitem,
Geologictimeperiodtreedef,
Institution,
Taxon,
Taxontreedef,
Taxontreedefitem,
)
from specifyweb.backend.delete_blockers.views import _collect_delete_blockers


class TestDeleteBlockerPagination(TestCase):
"""Verify that _collect_delete_blockers returns total_count and caps ids."""

@classmethod
def setUpTestData(cls):
"""Build a minimal Specify object graph with many determinations."""
cls.institution = Institution.objects.create(
name='Test Institution',
isaccessionsglobal=True,
issecurityon=False,
isserverbased=False,
issharinglocalities=True,
issinglegeographytree=True,
)
cls.division = Division.objects.create(
institution=cls.institution, name='Test Division',
)
cls.geographytreedef = Geographytreedef.objects.create(name='Test gtd')
cls.geographytreedefitem = Geographytreedefitem.objects.create(
treedef=cls.geographytreedef, name='Planet', rankid=0,
)
cls.geologictimeperiodtreedef = Geologictimeperiodtreedef.objects.create(
name='Test gtptd',
)
cls.taxontreedef = Taxontreedef.objects.create(name='Test ttd')
cls.taxontreedefitem = Taxontreedefitem.objects.create(
treedef=cls.taxontreedef, name='Life', rankid=0,
)
cls.datatype = Datatype.objects.create(name='Test datatype')
cls.discipline = Discipline.objects.create(
division=cls.division,
datatype=cls.datatype,
geologictimeperiodtreedef=cls.geologictimeperiodtreedef,
geographytreedef=cls.geographytreedef,
taxontreedef=cls.taxontreedef,
type='fish',
)
cls.collection = Collection.objects.create(
catalognumformatname='test',
collectionname='TestCollection',
discipline=cls.discipline,
isembeddedcollectingevent=False,
)

# The taxon we will try to "delete" -- has many determinations
cls.taxon = Taxon.objects.create(
definition=cls.taxontreedef,
definitionitem=cls.taxontreedefitem,
name='Life',
rankid=0,
)

# Create 150 collection objects each with a determination pointing
# to cls.taxon. This exceeds the id_limit of 100 so we can verify
# pagination.
cos = Collectionobject.objects.bulk_create([
Collectionobject(
catalognumber=str(i).zfill(9),
collection=cls.collection,
collectionmemberid=cls.collection.id,
)
for i in range(150)
])
Determination.objects.bulk_create([
Determination(
collectionobject=co,
collectionmemberid=cls.collection.id,
taxon=cls.taxon,
iscurrent=True,
)
for co in cos
])

def test_total_count_present(self):
"""Every blocker entry must include total_count."""
using = 'default'
result = _collect_delete_blockers(self.taxon, using)
for entry in result:
self.assertIn(
'total_count', entry,
f"Missing total_count in blocker for "
f"{entry.get('table')}.{entry.get('field')}",
)

def test_total_count_reflects_true_count(self):
"""total_count must reflect the real number of blocking rows."""
using = 'default'
result = _collect_delete_blockers(self.taxon, using)
det_blocker = next(
(e for e in result
if e['table'] == 'Determination' and e['field'] == 'taxon'),
None,
)
self.assertIsNotNone(det_blocker, 'Expected a Determination/taxon blocker')
self.assertEqual(det_blocker['total_count'], 150)

def test_ids_capped_at_limit(self):
"""ids list must not exceed the default limit of 100."""
using = 'default'
result = _collect_delete_blockers(self.taxon, using)
det_blocker = next(
(e for e in result
if e['table'] == 'Determination' and e['field'] == 'taxon'),
None,
)
self.assertIsNotNone(det_blocker)
self.assertLessEqual(len(det_blocker['ids']), 100)

def test_small_blocker_has_exact_ids(self):
"""When blocker count is below limit, ids lists all of them and
total_count equals len(ids)."""
taxon2 = Taxon.objects.create(
definition=self.taxontreedef,
definitionitem=self.taxontreedefitem,
name='SmallTaxon',
rankid=0,
)
cos = Collectionobject.objects.bulk_create([
Collectionobject(
catalognumber=str(i + 9000).zfill(9),
collection=self.collection,
collectionmemberid=self.collection.id,
)
for i in range(3)
])
Determination.objects.bulk_create([
Determination(
collectionobject=co,
collectionmemberid=self.collection.id,
taxon=taxon2,
iscurrent=True,
)
for co in cos
])

using = 'default'
result = _collect_delete_blockers(taxon2, using)
det_blocker = next(
(e for e in result
if e['table'] == 'Determination' and e['field'] == 'taxon'),
None,
)
self.assertIsNotNone(det_blocker)
self.assertEqual(det_blocker['total_count'], 3)
self.assertEqual(len(det_blocker['ids']), 3)

def test_backward_compatible_keys(self):
"""Response must still contain table, field, and ids."""
using = 'default'
result = _collect_delete_blockers(self.taxon, using)
self.assertGreater(len(result), 0, 'Expected at least one blocker')
for entry in result:
self.assertIn('table', entry)
self.assertIn('field', entry)
self.assertIn('ids', entry)
45 changes: 29 additions & 16 deletions specifyweb/backend/delete_blockers/views.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from django import http
from django.db import router, transaction
from django.db.models.deletion import Collector

from specifyweb.middleware.general import require_http_methods
from specifyweb.specify.api.crud import (
Expand All @@ -9,6 +8,7 @@
prepare_discipline_for_delete,
)
from specifyweb.specify.api.serializers import toJson
from specifyweb.specify.models import protect_with_blockers
from specifyweb.specify.views import login_maybe_required

@login_maybe_required
Expand Down Expand Up @@ -37,19 +37,32 @@ def delete_blockers(request, model, id):

return http.HttpResponse(toJson(result), content_type='application/json')

def _collect_delete_blockers(obj, using) -> list[dict]:
collector = Collector(using=using)
collector.delete_blockers = []
collector.collect([obj])
return flatten([
[
{
'table': sub_objs[0].__class__.__name__,
'field': field.name,
'ids': [sub_obj.id for sub_obj in sub_objs]
}
] for field, sub_objs in collector.delete_blockers
])
def _collect_delete_blockers(obj, using, id_limit=100) -> list[dict]:
"""Find PROTECT-related objects that block deletion of *obj*.

def flatten(l):
return [item for sublist in l for item in sublist]
Instead of using Django's Collector (which loads every related row
into memory), we iterate the model's reverse relations, run a COUNT
query for each PROTECT relationship, and fetch at most *id_limit*
primary keys. This keeps memory bounded regardless of how many
blocking rows exist (#7515).
"""
result = []
for related in obj._meta.get_fields(include_hidden=True):
if not related.auto_created or related.concrete:
continue
if not (related.one_to_many or related.one_to_one):
continue
if related.on_delete is not protect_with_blockers:
continue
qs = related.related_model._base_manager.db_manager(using).filter(
**{related.field.name: obj.pk})
total = qs.count()
if total > 0:
ids = list(qs.order_by('pk').values_list('pk', flat=True)[:id_limit])
result.append({
'table': related.related_model.__name__,
'field': related.field.name,
'ids': ids,
'total_count': total,
})
return result
9 changes: 8 additions & 1 deletion specifyweb/specify/tests/test_delete_blockers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,14 @@ def setUp(self):
def _assertSame(self, base, other):
key = lambda obj: (obj['table'], obj['field'])

sort_by_ids = lambda _blockers: [{**obj, 'ids': sorted(obj['ids'])} for obj in _blockers]
sort_by_ids = lambda _blockers: [
{
**obj,
'ids': sorted(obj['ids']),
'total_count': obj.get('total_count', len(obj['ids'])),
}
for obj in _blockers
]
base = sorted(base, key=key)
other = sorted(other, key=key)

Expand Down
Loading