diff --git a/specifyweb/backend/stored_queries/execution.py b/specifyweb/backend/stored_queries/execution.py
index e20f4f8b61d..2f666659b1d 100644
--- a/specifyweb/backend/stored_queries/execution.py
+++ b/specifyweb/backend/stored_queries/execution.py
@@ -3,23 +3,30 @@
import logging
import os
import re
+import uuid
+from io import StringIO
+from xml.sax.saxutils import escape
+from zipfile import ZIP_DEFLATED, ZipFile
-from typing import Literal, NamedTuple
+from typing import Any, Literal, NamedTuple
import xml.dom.minidom
from collections import namedtuple, defaultdict
from functools import reduce
from django.conf import settings
+from django.apps import apps
from django.db import transaction
from django.utils import timezone
from specifyweb.backend.inheritance.api import cog_inheritance_post_query_processing, parent_inheritance_post_query_processing
from specifyweb.backend.inheritance.utils import get_cat_num_inheritance_setting, get_parent_cat_num_inheritance_setting
+from specifyweb.backend.context.schema_localization import get_schema_localization
from specifyweb.backend.stored_queries.utils import log_sqlalchemy_query
from specifyweb.specify.utils.field_change_info import FieldChangeInfo
+from specifyweb.specify.utils.uiformatters import CNNField, get_catalognumber_format, get_uiformatter
from sqlalchemy import sql, orm, func, text
from sqlalchemy.sql.expression import asc, desc, insert, literal
-from specifyweb.specify.models_utils.models_by_table_id import get_table_id_by_model_name
+from specifyweb.specify.models_utils.models_by_table_id import get_model_by_table_id, get_table_id_by_model_name
from specifyweb.backend.stored_queries.group_concat import group_by_displayed_fields
from specifyweb.backend.trees.utils import get_search_filters
@@ -28,13 +35,15 @@
from .query_construct import QueryConstruct
from .relative_date_utils import apply_absolute_date
from .field_spec_maps import apply_specify_user_name
+from .web_portal_export import query_to_web_portal_zip as _query_to_web_portal_zip, _portal_attachment_map
from specifyweb.backend.notifications.models import Message
from specifyweb.backend.permissions.permissions import check_table_permissions
from specifyweb.specify.models import Loan, Loanpreparation, Loanreturnpreparation, Taxontreedef
from specifyweb.backend.workbench.upload.auditlog import auditlog
from specifyweb.backend.stored_queries.group_concat import group_by_displayed_fields
-from specifyweb.backend.stored_queries.queryfield import fields_from_json, QUREYFIELD_SORT_T
+from specifyweb.backend.stored_queries.queryfield import QueryField, fields_from_json, QUREYFIELD_SORT_T
from specifyweb.backend.stored_queries.synonomy import synonymize_tree_query
+
from specifyweb.specify.datamodel import datamodel, is_tree_table
logger = logging.getLogger(__name__)
@@ -268,12 +277,53 @@ def do_export(spquery, collection, user, filename, exporttype, host):
query_to_kml(session, collection, user, tableid, field_specs, path, spquery['captions'], host,
recordsetid=recordsetid, strip_id=False, selected_rows=spquery.get('selectedrows', None))
message_type = 'query-export-to-kml-complete'
+ elif exporttype == 'webportal':
+ query_to_web_portal_zip(
+ session,
+ collection,
+ user,
+ tableid,
+ field_specs,
+ path,
+ spquery['captions'],
+ recordsetid=recordsetid,
+ distinct=spquery['selectdistinct'],
+ )
+ message_type = 'query-export-to-web-portal-complete'
Message.objects.create(user=user, content=json.dumps({
'type': message_type,
'file': filename,
}))
+
+def query_to_web_portal_zip(
+ session,
+ collection,
+ user,
+ tableid,
+ field_specs,
+ path,
+ captions,
+ recordsetid=None,
+ distinct=False,
+):
+ return _query_to_web_portal_zip(
+ session,
+ collection,
+ user,
+ tableid,
+ field_specs,
+ path,
+ captions,
+ build_query_fn=build_query,
+ build_query_props_cls=BuildQueryProps,
+ apply_special_post_query_processing_fn=apply_special_post_query_processing,
+ set_group_concat_max_len_fn=set_group_concat_max_len,
+ recordsetid=recordsetid,
+ distinct=distinct,
+ )
+
# def stored_query_to_csv(query_id, collection, user, path):
# """Executes a query from the Spquery table with the given id and send
# the results to a CSV file at path.
diff --git a/specifyweb/backend/stored_queries/queryfieldspec.py b/specifyweb/backend/stored_queries/queryfieldspec.py
index ceec7f6c8bf..e56d12e1f05 100644
--- a/specifyweb/backend/stored_queries/queryfieldspec.py
+++ b/specifyweb/backend/stored_queries/queryfieldspec.py
@@ -18,6 +18,7 @@
from . import models
from .query_ops import QueryOps
from specifyweb.specify.models_utils.load_datamodel import Table, Field, Relationship
+from specifyweb.specify.datamodel import is_tree_table
logger = logging.getLogger(__name__)
@@ -259,7 +260,19 @@ def from_stringid(cls, stringid: str, is_relation: bool):
field = node.get_field(extracted_fieldname, strict=False)
tree_rank_name = None
- if field is None: # try finding tree
+ if (
+ field is None
+ and is_relation
+ and not is_tree_table(node)
+ and extracted_fieldname.lower() == table_name.lower() == node.name.lower()
+ ):
+ # Legacy relation stringids like "locality.locality" serialize the current related table as a formatted
+ # step, not as an actual field on that table.
+ # Preserve that sentinel so nested formatted relations keep the same row plan shape, without treating
+ # arbitrary unknown fields on non-tree tables as tree ranks.
+ tree_rank_name = extracted_fieldname
+ join_path.append(TreeRankQuery.create(tree_rank_name, node.name))
+ elif field is None and is_tree_table(node): # try finding tree only on tree tables
tree_rank_name, field = find_tree_and_field(node, extracted_fieldname)
if tree_rank_name:
tree_rank = TreeRankQuery.create(
diff --git a/specifyweb/backend/stored_queries/tests/test_execution/test_field_specs_from_json.py b/specifyweb/backend/stored_queries/tests/test_execution/test_field_specs_from_json.py
index c317fa53cf1..52932c5fff9 100644
--- a/specifyweb/backend/stored_queries/tests/test_execution/test_field_specs_from_json.py
+++ b/specifyweb/backend/stored_queries/tests/test_execution/test_field_specs_from_json.py
@@ -108,3 +108,24 @@ def test_static_field_specs(self): # pragma: no cover
# generate_fields_test_str(query_fields, "static_simple_field_spec")
self.assertEqual(static_simple_field_spec, query_fields)
+
+ def test_non_tree_table_does_not_parse_tree_rank(self):
+ table = datamodel.get_table_strict("CollectionObject")
+ stringid = f"{table.tableId}.collectionobject.NotARealField"
+
+ fieldspec = QueryFieldSpec.from_stringid(stringid, False)
+
+ self.assertFalse(fieldspec.contains_tree_rank())
+ self.assertIsNone(fieldspec.tree_rank)
+ self.assertIsNone(fieldspec.get_field())
+
+ def test_nested_formatted_relation_keeps_legacy_sentinel(self):
+ fieldspec = QueryFieldSpec.from_stringid("1,10,2.locality.locality", True)
+
+ self.assertTrue(fieldspec.contains_tree_rank())
+ self.assertEqual(fieldspec.tree_rank, "locality")
+ self.assertEqual(
+ [node.name for node in fieldspec.join_path],
+ ["collectingEvent", "locality", "locality"],
+ )
+ self.assertIsInstance(fieldspec.get_field(), TreeRankQuery)
diff --git a/specifyweb/backend/stored_queries/tests/test_views/test_export_web_portal.py b/specifyweb/backend/stored_queries/tests/test_views/test_export_web_portal.py
new file mode 100644
index 00000000000..d4d5b2a1bf7
--- /dev/null
+++ b/specifyweb/backend/stored_queries/tests/test_views/test_export_web_portal.py
@@ -0,0 +1,70 @@
+from unittest.mock import MagicMock, Mock, patch
+
+from django.test import Client
+
+from specifyweb.backend.stored_queries.tests.tests import SQLAlchemySetup
+
+from .raw_query import get_simple_query
+
+
+class TestExportWebPortal(SQLAlchemySetup):
+ @patch("specifyweb.backend.stored_queries.views.Thread")
+ def test_export(self, thread: Mock):
+ c = Client()
+ c.force_login(self.specifyuser)
+
+ response = c.post(
+ "/stored_query/exportwebportal/",
+ get_simple_query(self.specifyuser),
+ content_type="application/json",
+ )
+
+ self._assertStatusCodeEqual(response, 200)
+ thread.assert_called_once()
+ self.assertTrue(thread.return_value.daemon)
+ thread.return_value.start.assert_called_once()
+ self._assertContentEqual(response, "OK")
+
+ def test_portal_attachment_map(self):
+ from specifyweb.backend.stored_queries import execution
+
+ class FakeAttachment:
+ id = 5291
+ attachmentlocation = "sp6896513492722436219.att.JPG"
+ origfilename = "29432.JPG"
+ title = "Figure 1"
+
+ class FakeJoinRecord:
+ collectionobject_id = 123
+ attachment = FakeAttachment()
+
+ class FakeJoinQuery:
+ def select_related(self, *_args, **_kwargs):
+ return [FakeJoinRecord()]
+
+ class FakeJoinManager:
+ def __init__(self):
+ self.filter_kwargs = None
+
+ def filter(self, **kwargs):
+ self.filter_kwargs = kwargs
+ return FakeJoinQuery()
+
+ fake_join_manager = FakeJoinManager()
+ fake_base_model = type("Collectionobject", (), {"_meta": MagicMock(app_label="specifyweb")})
+ fake_table = MagicMock()
+ fake_table.attachments_field = MagicMock()
+
+ with patch.object(execution.datamodel, "get_table_by_id", return_value=fake_table), patch.object(
+ execution, "get_model_by_table_id", return_value=fake_base_model
+ ), patch.object(execution.apps, "get_model", return_value=type("Collectionobjectattachment", (), {"objects": fake_join_manager})):
+ result = execution._portal_attachment_map(1, [123])
+
+ self.assertEqual(
+ fake_join_manager.filter_kwargs,
+ {"collectionobject_id__in": [123], "attachment__ispublic": True},
+ )
+ self.assertEqual(
+ result["123"],
+ '[{AttachmentID:5291,AttachmentLocation:"sp6896513492722436219.att.JPG",Title:"Figure 1"}]',
+ )
diff --git a/specifyweb/backend/stored_queries/urls.py b/specifyweb/backend/stored_queries/urls.py
index ff5b0aee3fb..38dfc774607 100644
--- a/specifyweb/backend/stored_queries/urls.py
+++ b/specifyweb/backend/stored_queries/urls.py
@@ -7,6 +7,7 @@
path('ephemeral/', views.ephemeral),
path('exportcsv/', views.export_csv),
path('exportkml/', views.export_kml),
+ path('exportwebportal/', views.export_to_web_portal),
path('make_recordset/', views.make_recordset),
path('merge_recordsets/', views.merge_recordsets),
path('return_loan_preps/', views.return_loan_preps),
diff --git a/specifyweb/backend/stored_queries/views.py b/specifyweb/backend/stored_queries/views.py
index 7efb9c596c3..8fee235a908 100644
--- a/specifyweb/backend/stored_queries/views.py
+++ b/specifyweb/backend/stored_queries/views.py
@@ -34,6 +34,7 @@ class QueryBuilderPt(PermissionTarget):
execute = PermissionTargetAction()
export_csv = PermissionTargetAction()
export_kml = PermissionTargetAction()
+ export_to_web_portal = PermissionTargetAction()
create_recordset = PermissionTargetAction()
def value_from_request(field, get):
@@ -202,6 +203,39 @@ def export_kml(request):
thread.start()
return HttpResponse('OK', content_type='text/plain')
+
+@require_POST
+@login_maybe_required
+@never_cache
+def export_to_web_portal(request):
+ """Executes and returns as ZIP the web portal export package for the query provided as JSON in the POST body."""
+ check_permission_targets(request.specify_collection.id, request.specify_user.id, [
+ QueryBuilderPt.execute,
+ QueryBuilderPt.export_to_web_portal,
+ ])
+ try:
+ spquery = json.load(request)
+ except ValueError as e:
+ return HttpResponseBadRequest(e)
+
+ logger.info('export web portal query: %s', spquery)
+
+ if 'collectionid' in spquery:
+ collection = Collection.objects.get(pk=spquery['collectionid'])
+ logger.debug('forcing collection to %s', collection.collectionname)
+ else:
+ collection = request.specify_collection
+
+ file_name = format_export_file_name(spquery, 'zip')
+
+ thread = Thread(
+ target=do_export,
+ args=(spquery, collection, request.specify_user, file_name, 'webportal', None),
+ )
+ thread.daemon = True
+ thread.start()
+ return HttpResponse('OK', content_type='text/plain')
+
@require_POST
@login_maybe_required
@never_cache
diff --git a/specifyweb/backend/stored_queries/web_portal_export.py b/specifyweb/backend/stored_queries/web_portal_export.py
new file mode 100644
index 00000000000..43234003d3d
--- /dev/null
+++ b/specifyweb/backend/stored_queries/web_portal_export.py
@@ -0,0 +1,603 @@
+import csv
+import json
+import logging
+import os
+import re
+import uuid
+from collections import defaultdict
+from io import StringIO
+from itertools import zip_longest
+from typing import Any, Callable
+from urllib.parse import urlsplit, urlunsplit
+from xml.sax.saxutils import escape
+from zipfile import ZIP_DEFLATED, ZipFile
+
+from django.apps import apps
+from django.conf import settings
+
+from specifyweb.backend.context.schema_localization import get_schema_localization
+from specifyweb.specify.datamodel import datamodel
+from specifyweb.specify.models_utils.models_by_table_id import get_model_by_table_id
+from specifyweb.specify.utils.uiformatters import CNNField, get_catalognumber_format
+
+
+logger = logging.getLogger(__name__)
+# Filename used by asset server URLs that should be stripped from portal asset paths.
+_ASSET_STORE_FILENAME = 'web_asset_store.xml'
+
+
+def _build_portal_collection_name(collection) -> str:
+ """Return the portal collection name override if configured.
+
+ The export uses either the env var, settings value, or the actual
+ collection name.
+ """
+
+ return (
+ os.getenv('WEB_ATTACHMENT_COLLECTION')
+ or settings.WEB_ATTACHMENT_COLLECTION
+ or collection.collectionname
+ )
+
+
+def _strip_asset_store_xml(url: str) -> str:
+ """Remove the asset store file path from attachment URLs.
+
+ The portal export should expose a clean base asset path, not the
+ internal web asset store XML filename itself.
+ """
+ if not url:
+ return ''
+
+ parsed = urlsplit(url)
+ path = parsed.path or ''
+ trimmed_path = path.rstrip('/')
+ path_parts = trimmed_path.split('/') if trimmed_path else []
+
+ if not path_parts or path_parts[-1].lower() != _ASSET_STORE_FILENAME:
+ return url
+
+ base_path = '/'.join(path_parts[:-1]).rstrip('/')
+ return urlunsplit((parsed.scheme, parsed.netloc, base_path, parsed.query, parsed.fragment))
+
+
+def _build_portal_image_base_url() -> str:
+ """Build the base URL that portal image attachments should use."""
+
+ raw_url = (os.getenv('ASSET_SERVER_URL') or settings.WEB_ATTACHMENT_URL or '').strip()
+ return _strip_asset_store_xml(raw_url)
+
+
+def _schema_localization_or_empty(collection) -> dict[str, Any]:
+ try:
+ return get_schema_localization(collection, 0, 'en-us')
+ except Exception:
+ logger.exception('Failed loading schema localization for web portal export')
+ return {}
+
+
+def _clean_cell(value: Any) -> str:
+ return re.sub("\r|\n", " ", str(value if value is not None else ''))
+
+
+def _clean_portal_attachment_text(value: Any) -> str:
+ return re.sub(r'\r|\n|"', ' ', str(value if value is not None else '')).strip()
+
+
+def _portal_attachment_entry(attachment) -> str:
+ attachment_location = _clean_portal_attachment_text(attachment.attachmentlocation)
+ title = _clean_portal_attachment_text(
+ os.path.basename(
+ attachment.title or attachment.origfilename or attachment.attachmentlocation or ''
+ )
+ )
+ return (
+ '{'
+ f'AttachmentID:{attachment.id},'
+ f'AttachmentLocation:"{attachment_location}",'
+ f'Title:"{title}"'
+ '}'
+ )
+
+
+def _portal_attachment_map(tableid: int, record_ids: list[Any]) -> dict[Any, str]:
+ """Collect attachment metadata for portal rows by record id.
+
+ The portal CSV stores image attachments as JSON strings in the "img"
+ column, so we prebuild mapping from record IDs to attachments.
+ """
+ if not record_ids:
+ return {}
+
+ table = datamodel.get_table_by_id(tableid, strict=True)
+ if table.attachments_field is None:
+ return {}
+
+ base_model = get_model_by_table_id(tableid)
+ join_model_name = base_model.__name__ + 'attachment'
+ join_model = apps.get_model(base_model._meta.app_label, join_model_name)
+ record_id_field = f'{base_model.__name__.lower()}_id'
+
+ join_records = join_model.objects.filter(
+ **{
+ f'{record_id_field}__in': record_ids,
+ 'attachment__ispublic': True,
+ }
+ ).select_related('attachment')
+ attachment_entries_by_record_id: dict[str, list[str]] = defaultdict(list)
+
+ for join_record in join_records:
+ attachment = join_record.attachment
+ if attachment.attachmentlocation is None:
+ continue
+ record_key = str(getattr(join_record, record_id_field))
+ attachment_entries_by_record_id[record_key].append(_portal_attachment_entry(attachment))
+
+ return {
+ record_id: '[' + ', '.join(entries) + ']'
+ for record_id, entries in attachment_entries_by_record_id.items()
+ }
+
+
+def _dedupe_name(name: str, used_names: set[str]) -> str:
+ candidate = name
+ suffix = 2
+ while candidate in used_names:
+ candidate = f"{name}_{suffix}"
+ suffix += 1
+ used_names.add(candidate)
+ return candidate
+
+
+def _normalize_portal_column_name(name: Any, query_field=None) -> str:
+ """Normalize portal column captions for export.
+
+ Removes names and full name field labels which are not useful
+ in the portal.
+ """
+ normalized = str(name if name is not None else '').strip()
+
+ if query_field is not None:
+ fieldspec = query_field.fieldspec
+ field = fieldspec.get_field()
+ if (
+ fieldspec.tree_rank is not None
+ and field is not None
+ and field.name in {'name', 'fullName'}
+ and ' - ' in normalized
+ ):
+ return normalized.rsplit(' - ', 1)[0].rstrip()
+
+ return normalized
+
+
+def _portal_solr_type(query_field, collection, user) -> str:
+ """Map a query field to the Solr field type used in portal metadata."""
+ fieldspec = query_field.fieldspec
+ field = fieldspec.get_field()
+
+ # Relationship fields are always emitted as strings for portal search.
+ if field is None or field.is_relationship:
+ return 'string'
+
+ if fieldspec.table.name == 'CollectionObject' and field.name == 'catalogNumber':
+ formatter = get_catalognumber_format(collection, query_field.format_name, user)
+ if (
+ formatter is not None
+ and len(formatter.fields) == 1
+ and isinstance(formatter.fields[0], CNNField)
+ ):
+ return 'pint'
+ return 'string'
+
+ if field.type in ('java.lang.String', 'text'):
+ return 'string'
+ if field.type in ('java.util.Date', 'java.sql.Timestamp'):
+ return 'string'
+ if field.type == 'java.util.Calendar':
+ return 'pint' if fieldspec.date_part in {'Day', 'Month', 'Year'} else 'string'
+ if field.type in ('java.lang.Integer', 'java.lang.Byte', 'java.lang.Short'):
+ return 'pint'
+ if field.type == 'java.lang.Long':
+ return 'plong'
+ if field.type == 'java.lang.Float':
+ return 'pfloat'
+ if field.type in ('java.lang.Double', 'java.math.BigDecimal'):
+ return 'pdouble'
+ if field.type == 'java.lang.Boolean':
+ return 'string'
+ return 'string'
+
+
+def _portal_field_metadata(
+ query_field,
+ caption: str,
+ colname: str,
+ index: int,
+ schema_localization: dict[str, Any],
+ collection,
+ user,
+) -> dict[str, Any]:
+ """Build the metadata row for one exported portal field.
+
+ This metadata is written to flds.json and is consumed by the portal
+ frontend to build field definitions, sorting, display labels, and linkification.
+ """
+ fieldspec = query_field.fieldspec
+ table = fieldspec.table
+ field = fieldspec.get_field()
+
+ table_key = table.name.lower()
+ table_localization = schema_localization.get(table_key, {})
+ item_localization = (
+ table_localization.get('items', {}).get(field.name.lower(), {})
+ if field is not None
+ else {}
+ )
+
+ spfld = field.name if field is not None else table.idFieldName
+ if field is not None and field.is_relationship:
+ field_type = 'java.lang.String'
+ else:
+ field_type = field.type if field is not None else 'java.lang.String'
+ field_length = field.length if field is not None and field.length is not None else 255
+ solr_type = _portal_solr_type(query_field, collection, user)
+ is_linkified = solr_type == 'string' and field_type in ('java.lang.String', 'text')
+
+ return {
+ 'colname': colname,
+ 'solrname': spfld,
+ 'solrtype': solr_type,
+ 'title': caption,
+ 'type': field_type,
+ 'width': field_length,
+ 'concept': colname,
+ 'concepturl': 'http://rs.tdwg.org/dwc/terms/',
+ 'sptable': table_key,
+ 'sptabletitle': table_localization.get('name', table.name),
+ 'spfld': spfld,
+ 'spfldtitle': item_localization.get('name', spfld),
+ 'spdescription': item_localization.get('desc', spfld),
+ 'colidx': index,
+ 'linkify': is_linkified,
+ 'advancedsearch': True,
+ 'displaycolidx': index,
+ }
+
+
+def _simplify_portal_field_metadata(field_meta: dict[str, Any]) -> dict[str, Any]:
+ """Create the metadata for portal fields."""
+ simplified = {
+ 'colname': field_meta['colname'],
+ 'solrname': field_meta['solrname'],
+ 'solrtype': field_meta['solrtype'],
+ }
+
+ for key in (
+ 'title',
+ 'type',
+ 'width',
+ 'concept',
+ 'sptable',
+ 'sptabletitle',
+ 'spfld',
+ 'spfldtitle',
+ 'colidx',
+ 'linkify',
+ 'advancedsearch',
+ 'displaycolidx',
+ 'treeid',
+ 'treerank',
+ ):
+ if key in field_meta:
+ simplified[key] = field_meta[key]
+
+ return simplified
+
+
+def _make_solr_schema_xml(fields: list[dict[str, Any]]) -> str:
+ """Create a minimal Solr schema for exported portal fields.
+
+ It defines the fields that the portal will index and search.
+ """
+ lines = [
+ '',
+ '',
+ ]
+
+ lines.append(
+ ''
+ )
+ lines.append(
+ ''
+ )
+ lines.append(
+ ''
+ )
+
+ emitted: set[str] = {'contents', 'geoc', 'img'}
+ for field in fields:
+ name = str(field['solrname'])
+ if name in emitted:
+ continue
+ emitted.add(name)
+
+ escaped_name = escape(name)
+ solr_type = 'string' if name == 'spid' else escape(str(field['solrtype']))
+ required = 'true' if name == 'spid' else 'false'
+ lines.append(
+ f''
+ )
+ return "\n".join(lines) + "\n"
+
+
+def _image_info_fields_from_column_defs(
+ column_defs: list[tuple[str, str, str, dict[str, Any]]],
+) -> list[str]:
+ """Grab the best image info fields."""
+ image_info_fields: list[str] = []
+ target_spflds = {
+ 'catalognumber',
+ 'fieldnumber',
+ 'stationfieldnumber',
+ }
+ for _, solrname, _, metadata in column_defs:
+ if str(metadata.get('spfld', '')).lower() in target_spflds:
+ image_info_fields.append(solrname)
+ return image_info_fields
+
+
+def _serialize_portal_data(
+ rows: list[list[str]],
+ header: list[str],
+) -> str:
+ """Output rows to CSV.
+
+ The web portal expects a standard CSV file with a header row followed by
+ one row per portal record.
+ """
+ output = StringIO()
+ writer = csv.writer(output)
+ writer.writerow(header)
+ writer.writerows(rows)
+ return output.getvalue()
+
+
+def _find_geoc_field_indexes(
+ column_defs: list[tuple[str, str, str, dict[str, Any]]],
+) -> tuple[int | None, int | None, int | None, int | None]:
+ """Locate latitude/longitude columns for geocoding the portal row."""
+ lat1_idx = None
+ lon1_idx = None
+ lat2_idx = None
+ lon2_idx = None
+
+ for index, (_, __, ___, metadata) in enumerate(column_defs):
+ # Only locality fields are relevant for geocoding.
+ if str(metadata.get('sptable', '')).lower() != 'locality':
+ continue
+
+ spfld = str(metadata.get('spfld', '')).lower()
+ if spfld == 'latitude1' and lat1_idx is None:
+ lat1_idx = index
+ elif spfld == 'longitude1' and lon1_idx is None:
+ lon1_idx = index
+ elif spfld == 'latitude2' and lat2_idx is None:
+ lat2_idx = index
+ elif spfld == 'longitude2' and lon2_idx is None:
+ lon2_idx = index
+
+ return lat1_idx, lon1_idx, lat2_idx, lon2_idx
+
+
+def _build_geoc_value(
+ cleaned_values: list[str],
+ lat1_idx: int | None,
+ lon1_idx: int | None,
+ lat2_idx: int | None,
+ lon2_idx: int | None,
+) -> str:
+ """Build the 'geoc' column value for portal mapping.
+
+ The portal uses the first valid latitude/longitude pair it finds.
+ """
+ def _pair_value(lat_idx: int | None, lon_idx: int | None) -> str:
+ if lat_idx is None or lon_idx is None:
+ return ''
+ if lat_idx >= len(cleaned_values) or lon_idx >= len(cleaned_values):
+ return ''
+
+ latitude = cleaned_values[lat_idx].strip()
+ longitude = cleaned_values[lon_idx].strip()
+ if not latitude or not longitude:
+ return ''
+ return f'{latitude} {longitude}'
+
+ primary = _pair_value(lat1_idx, lon1_idx)
+ if primary:
+ return primary
+ return _pair_value(lat2_idx, lon2_idx)
+
+
+def query_to_web_portal_zip(
+ session,
+ collection,
+ user,
+ tableid,
+ field_specs,
+ path,
+ captions,
+ build_query_fn: Callable[..., tuple[Any, Any]],
+ build_query_props_cls,
+ apply_special_post_query_processing_fn: Callable[..., Any],
+ set_group_concat_max_len_fn: Callable[[Any], None],
+ recordsetid=None,
+ distinct=False,
+):
+ """Export a stored query as a web portal ZIP package.
+
+ This writes PortalData.csv, flds.json, PortalInstanceSetting.json, and
+ SolrFldSchema.xml into the destination ZIP file.
+ """
+ set_group_concat_max_len_fn(session.connection())
+ query, __ = build_query_fn(
+ session,
+ collection,
+ user,
+ tableid,
+ field_specs,
+ build_query_props_cls(recordsetid=recordsetid, replace_nulls=True, distinct=distinct),
+ )
+ query = apply_special_post_query_processing_fn(
+ query,
+ tableid,
+ field_specs,
+ collection,
+ user,
+ should_list_query=False,
+ )
+
+ display_fields = [field_spec for field_spec in field_specs if field_spec.display]
+
+ # Match the exported captions to the actual displayed fields. Captions may
+ # be provided for all query fields, but only display fields are exported.
+ if captions and isinstance(captions, list):
+ if len(captions) == len(display_fields):
+ effective_captions = captions
+ elif len(captions) == len(field_specs):
+ effective_captions = [
+ caption
+ for field_spec, caption in zip(field_specs, captions)
+ if field_spec.display
+ ]
+ else:
+ effective_captions = captions[: len(display_fields)]
+ else:
+ effective_captions = []
+
+ if len(effective_captions) != len(display_fields):
+ effective_captions = [
+ (
+ field_spec.fieldspec.get_field().name
+ if field_spec.fieldspec.get_field() is not None
+ else field_spec.fieldspec.table.name
+ )
+ for field_spec in display_fields
+ ]
+
+ schema_localization = _schema_localization_or_empty(collection)
+
+ used_colnames: set[str] = {'spid'}
+ used_solrnames: set[str] = {'spid'}
+ column_defs: list[tuple[str, str, str, dict[str, Any]]] = []
+ for index, (field_spec, caption) in enumerate(
+ zip_longest(display_fields, effective_captions, fillvalue=''),
+ start=0,
+ ):
+ trimmed_caption = _normalize_portal_column_name(caption, field_spec)
+ base_name = trimmed_caption if trimmed_caption else f'column_{index + 1}'
+ colname = _dedupe_name(base_name, used_colnames)
+
+ field = field_spec.fieldspec.get_field()
+ if field is not None:
+ base_solrname = field.name
+ table_prefix = field_spec.fieldspec.table.name.lower()
+ else:
+ base_solrname = field_spec.fieldspec.table.idFieldName
+ table_prefix = field_spec.fieldspec.table.name.lower()
+
+ if base_solrname in used_solrnames:
+ solrname = _dedupe_name(f'{table_prefix}_{base_solrname}', used_solrnames)
+ else:
+ solrname = _dedupe_name(base_solrname, used_solrnames)
+
+ metadata = _portal_field_metadata(
+ field_spec,
+ trimmed_caption if trimmed_caption else colname,
+ colname,
+ index,
+ schema_localization,
+ collection,
+ user,
+ )
+ metadata['solrname'] = solrname
+ column_defs.append((colname, solrname, metadata['title'], metadata))
+
+ # Build the JSON metadata rows used by the portal's field definition store.
+ # The fixed fields spid and img are always included, plus one row per display field.
+ metadata_rows: list[dict[str, Any]] = [
+ {
+ 'colname': 'spid',
+ 'solrname': 'spid',
+ 'solrtype': 'int',
+ 'title': 'spid',
+ 'linkify': False,
+ 'colidx': 0,
+ 'displaycolidx': 0,
+ },
+ *[
+ _simplify_portal_field_metadata(column_def[3])
+ for column_def in column_defs
+ ],
+ {
+ 'colname': 'img',
+ 'solrname': 'img',
+ 'solrtype': 'string',
+ 'title': 'image',
+ },
+ ]
+
+ output_rows: list[list[str]] = []
+ geoc_lat1_idx, geoc_lon1_idx, geoc_lat2_idx, geoc_lon2_idx = _find_geoc_field_indexes(column_defs)
+ data_rows = query if isinstance(query, list) else list(query.yield_per(1))
+ portal_attachments = _portal_attachment_map(tableid, [row[0] for row in data_rows])
+ # The portal frontend expects each row to have the same number of values as the field metadata.
+ # If the query returns too few values, pad with empty strings; if it returns too many,
+ # truncate extras so the CSV header and row data remain aligned.
+ expected_values = len(column_defs)
+ for row in data_rows:
+ raw_id = row[0] if len(row) > 0 else ''
+ spid = str(uuid.uuid5(uuid.NAMESPACE_URL, f'{tableid}:{raw_id}'))
+ display_values = list(row[1:] if len(row) > 1 else [])
+ if len(display_values) < expected_values:
+ display_values.extend([''] * (expected_values - len(display_values)))
+ elif len(display_values) > expected_values:
+ display_values = display_values[:expected_values]
+ cleaned_values = [_clean_cell(value) for value in display_values]
+ contents = '\t'.join(cleaned_values)
+ img = portal_attachments.get(str(raw_id), '')
+ geoc = _build_geoc_value(
+ cleaned_values,
+ geoc_lat1_idx,
+ geoc_lon1_idx,
+ geoc_lat2_idx,
+ geoc_lon2_idx,
+ )
+ output_rows.append([spid, contents, img, geoc, *cleaned_values])
+
+ header = ['spid', 'contents', 'img', 'geoc', *[column_def[1] for column_def in column_defs]]
+ portal_data = _serialize_portal_data(output_rows, header)
+ # flds.json drives the portal's field definitions and display metadata.
+ flds_json = json.dumps(metadata_rows, indent=2)
+ # SolrFldSchema.xml is a minimal schema fragment for the portal's Solr index.
+ solr_schema = _make_solr_schema_xml(metadata_rows)
+
+ image_info_fields = _image_info_fields_from_column_defs(column_defs)
+ portal_instance_settings = json.dumps(
+ {
+ 'portalInstance': str(uuid.uuid4()),
+ 'collectionName': _build_portal_collection_name(collection),
+ 'imageBaseUrl': _build_portal_image_base_url(),
+ 'imageInfoFlds': ' '.join(image_info_fields),
+ },
+ indent=2,
+ )
+
+ with ZipFile(path, 'w', compression=ZIP_DEFLATED) as archive:
+ archive.writestr('PortalFiles/PortalData.csv', portal_data)
+ archive.writestr('PortalFiles/flds.json', flds_json)
+ archive.writestr(
+ 'PortalFiles/PortalInstanceSetting.json',
+ portal_instance_settings,
+ )
+ archive.writestr('PortalFiles/SolrFldSchema.xml', solr_schema)
diff --git a/specifyweb/backend/trees/utils.py b/specifyweb/backend/trees/utils.py
index 58863726dac..a8603cc6942 100644
--- a/specifyweb/backend/trees/utils.py
+++ b/specifyweb/backend/trees/utils.py
@@ -38,6 +38,8 @@
def get_search_filters(collection: spmodels.Collection, tree: str):
tree_name = tree.lower()
+ if tree_name not in SPECIFY_TREES:
+ raise ValueError(f"unexpected tree type: {tree}")
if tree_name == 'storage':
return Q(institution=collection.discipline.division.institution)
discipline_query = Q(discipline=collection.discipline)
@@ -53,6 +55,8 @@ def get_search_filters(collection: spmodels.Collection, tree: str):
def get_treedefs(collection: spmodels.Collection, tree_name: str) -> list[tuple[int, int]]:
# Get the appropriate TreeDef based on the Collection and tree_name
+ if tree_name.lower() not in SPECIFY_TREES:
+ raise ValueError(f"unexpected tree type: {tree_name}")
# Mimic the old behavior of limiting the query to the first item for trees other than taxon.
# Even though the queryconstruct can handle trees with multiple types.
diff --git a/specifyweb/frontend/js_src/lib/components/Notifications/NotificationRenderers.tsx b/specifyweb/frontend/js_src/lib/components/Notifications/NotificationRenderers.tsx
index 26f4696c407..5d4d5a3b97e 100644
--- a/specifyweb/frontend/js_src/lib/components/Notifications/NotificationRenderers.tsx
+++ b/specifyweb/frontend/js_src/lib/components/Notifications/NotificationRenderers.tsx
@@ -131,6 +131,22 @@ export const notificationRenderers: IR<
>
);
},
+ 'query-export-to-web-portal-complete'(notification) {
+ return (
+ <>
+ {notificationsText.queryExportToWebPortalCompleted()}
+
+ {notificationsText.download()}
+
+ >
+ );
+ },
'dataset-ownership-transferred'(notification) {
return (
fields.some(({ mappingPath }) => !mappingPathIsComplete(mappingPath));
- const [state, setState] = React.useState<'creating' | 'warning' | undefined>(
- undefined
- );
+ const [state, setState] = React.useState<
+ 'creating' | 'warning' | 'duplicateWarning' | undefined
+ >(undefined);
+
+ const hasDuplicateRecordIds = (): boolean => {
+ const seenIds = new Set();
+ return (
+ results.current?.some((row) => {
+ if (row === undefined) return false;
+ const id = row[0];
+ if (id === undefined || id === null) return false;
+ if (seenIds.has(id)) return true;
+ seenIds.add(id);
+ return false;
+ }) ?? false
+ );
+ };
function doQueryExport(
url: string,
@@ -155,6 +170,17 @@ export function QueryExportButtons({
>
{queryText.missingCoordinatesForKmlDescription()}
+ ) : state === 'duplicateWarning' ? (
+
) : undefined}
{containsResults &&
hasPermission('/querybuilder/query', 'export_csv') && (
@@ -193,6 +219,28 @@ export function QueryExportButtons({
{queryText.createKml()}
)}
+ {containsResults &&
+ hasPermission('/querybuilder/query', 'export_to_web_portal') && (
+ {
+ if (hasDuplicateRecordIds()) {
+ setState('duplicateWarning');
+ return;
+ }
+
+ doQueryExport(
+ '/stored_query/exportwebportal/',
+ undefined,
+ undefined,
+ undefined
+ );
+ }}
+ >
+ {queryText.exportToWebPortal()}
+
+ )}
>
);
}
diff --git a/specifyweb/frontend/js_src/lib/localization/notifications.ts b/specifyweb/frontend/js_src/lib/localization/notifications.ts
index a9764aef182..07d052bee9c 100644
--- a/specifyweb/frontend/js_src/lib/localization/notifications.ts
+++ b/specifyweb/frontend/js_src/lib/localization/notifications.ts
@@ -134,6 +134,16 @@ export const notificationsText = createDictionary({
'pt-br': 'Exportação da consulta para KML concluída.',
'hr-hr': 'Izvoz upita u KML je završen.',
},
+ queryExportToWebPortalCompleted: {
+ 'en-us': 'Query export to Web Portal completed.',
+ 'ru-ru': 'Экспорт запроса в веб-портал завершен.',
+ 'es-es': 'La exportación de la consulta al Portal Web se completó.',
+ 'fr-fr': 'Exportation de la requête vers le portail Web terminée.',
+ 'uk-ua': 'Експорт запиту до веб-порталу завершено.',
+ 'de-ch': 'Der Abfrageexport zum Webportal wurde abgeschlossen.',
+ 'pt-br': 'A exportação da consulta para o portal web foi concluída.',
+ 'hr-hr': 'Izvoz upita na web portal je dovršen.',
+ },
dataSetOwnershipTransferred: {
'en-us':
' transferred the ownership of the dataset to you.',
diff --git a/specifyweb/frontend/js_src/lib/localization/query.ts b/specifyweb/frontend/js_src/lib/localization/query.ts
index 4336e5e40a2..51c7947d981 100644
--- a/specifyweb/frontend/js_src/lib/localization/query.ts
+++ b/specifyweb/frontend/js_src/lib/localization/query.ts
@@ -363,6 +363,20 @@ export const queryText = createDictionary({
'pt-br': 'Criar KML',
'hr-hr': 'Izradi KML',
},
+ exportToWebPortal: {
+ 'en-us': 'Export to Web Portal',
+ },
+ webPortalExportDuplicateRecordIds: {
+ 'en-us': 'Query contains duplicate record IDs',
+ },
+ webPortalExportDuplicateRecordIdsDescription: {
+ 'en-us':
+ 'Please modify the query so that it returns unique records for each returned row.',
+ },
+ webPortalExportDuplicateHint: {
+ 'en-us':
+ '(Hint: You may need to add a condition for current determination or use an aggregator for preparations or collectors.)',
+ },
createRecordSet: {
'en-us': 'Create {recordSetTable:string}',
'ru-ru': 'Создать {recordSetTable:string}',