From 5a528b75ac4c0b5af751fd98052e1cc75e953ae4 Mon Sep 17 00:00:00 2001 From: Joe Russack Date: Tue, 7 Apr 2026 21:36:44 -0700 Subject: [PATCH] =?UTF-8?q?feat:=20DwC=20export=20backend=20=E2=80=94=2025?= =?UTF-8?q?=20API=20endpoints,=20cache=20engine,=20EML,=20RSS?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- specifyweb/backend/export/API_DOCS.md | 121 ++ specifyweb/backend/export/attachment_urls.py | 26 + specifyweb/backend/export/cache.py | 209 ++ specifyweb/backend/export/default_mappings.py | 236 +++ specifyweb/backend/export/dwca_from_cache.py | 119 ++ .../backend/export/dwca_from_mapping.py | 180 ++ specifyweb/backend/export/dwca_utils.py | 131 ++ specifyweb/backend/export/extract_query.py | 2 +- specifyweb/backend/export/feed.py | 36 +- specifyweb/backend/export/field_adapter.py | 47 + .../management/commands/update_feed_v2.py | 22 + specifyweb/backend/export/schema_terms.json | 1770 +++++++++++++++++ specifyweb/backend/export/specify_dwc.json | 526 +++++ specifyweb/backend/export/tasks.py | 50 + specifyweb/backend/export/tests/__init__.py | 0 .../export/tests/test_attachment_urls.py | 52 + specifyweb/backend/export/tests/test_cache.py | 125 ++ specifyweb/backend/export/tests/test_dwca.py | 141 ++ specifyweb/backend/export/tests/test_feed.py | 44 + .../backend/export/tests/test_models.py | 246 +++ specifyweb/backend/export/urls.py | 21 + specifyweb/backend/export/views.py | 1025 +++++++++- .../backend/stored_queries/execution.py | 28 +- specifyweb/backend/stored_queries/format.py | 23 +- specifyweb/settings/__init__.py | 5 +- 25 files changed, 5129 insertions(+), 56 deletions(-) create mode 100644 specifyweb/backend/export/API_DOCS.md create mode 100644 specifyweb/backend/export/attachment_urls.py create mode 100644 specifyweb/backend/export/cache.py create mode 100644 specifyweb/backend/export/default_mappings.py create mode 100644 specifyweb/backend/export/dwca_from_cache.py create mode 100644 specifyweb/backend/export/dwca_from_mapping.py create mode 100644 specifyweb/backend/export/dwca_utils.py create mode 100644 specifyweb/backend/export/field_adapter.py create mode 100644 specifyweb/backend/export/management/commands/update_feed_v2.py create mode 100644 specifyweb/backend/export/schema_terms.json create mode 100644 specifyweb/backend/export/specify_dwc.json create mode 100644 specifyweb/backend/export/tasks.py create mode 100644 specifyweb/backend/export/tests/__init__.py create mode 100644 specifyweb/backend/export/tests/test_attachment_urls.py create mode 100644 specifyweb/backend/export/tests/test_cache.py create mode 100644 specifyweb/backend/export/tests/test_dwca.py create mode 100644 specifyweb/backend/export/tests/test_feed.py create mode 100644 specifyweb/backend/export/tests/test_models.py diff --git a/specifyweb/backend/export/API_DOCS.md b/specifyweb/backend/export/API_DOCS.md new file mode 100644 index 00000000000..af9585a6262 --- /dev/null +++ b/specifyweb/backend/export/API_DOCS.md @@ -0,0 +1,121 @@ +# DwC Export API Documentation + +## Endpoints + +### Schema Mappings + +| Method | Endpoint | Description | +|--------|----------|-------------| +| GET | `/export/list_mappings/` | List all schema mappings | +| POST | `/export/create_mapping/` | Create a new mapping | +| PUT | `/export/update_mapping//` | Update a mapping | +| DELETE | `/export/delete_mapping//` | Delete a mapping (fails if referenced by packages) | +| POST | `/export/clone_mapping//` | Clone a mapping | +| POST | `/export/save_mapping_fields//` | Save DwC term assignments | + +### Export Packages + +| Method | Endpoint | Description | +|--------|----------|-------------| +| GET | `/export/list_export_datasets/` | List all export packages | +| POST | `/export/create_dataset/` | Create a new package | +| PUT | `/export/update_dataset//` | Update a package | +| DELETE | `/export/delete_dataset//` | Delete a package | +| POST | `/export/clone_dataset//` | Clone a package | +| POST | `/export/generate_dwca//` | Generate and download DwCA ZIP | + +### RSS Feed + +| Method | Endpoint | Description | +|--------|----------|-------------| +| GET | `/export/rss/` | RSS feed of published exports | +| POST | `/export/force_update/` | Rebuild legacy RSS feed | +| POST | `/export/force_update_packages/` | Rebuild all RSS-enabled export packages | + +### Vocabulary + +| Method | Endpoint | Description | +|--------|----------|-------------| +| GET | `/export/schema_terms/` | Get DwC vocabulary terms | + +## Scripted/Cron Usage + +To automate DwC archive generation on a schedule, call the export API: + +```bash +# Generate a DwCA for a specific export package +curl -X POST \ + -b cookies.txt \ + -H "X-CSRFToken: TOKEN" \ + -o output.zip \ + http://localhost:8001/export/generate_dwca/PACKAGE_ID/ + +# Rebuild all RSS-enabled packages +curl -X POST \ + -b cookies.txt \ + -H "X-CSRFToken: TOKEN" \ + http://localhost:8001/export/force_update_packages/ +``` + +### Authentication + +All endpoints require an authenticated session. For scripted access: + +1. POST to `/accounts/login/` with username/password +2. Extract `csrftoken` and `sessionid` cookies +3. Include both cookies and `X-CSRFToken` header in subsequent requests + +### Example cron script + +```bash +#!/bin/bash +# Export DwC archives nightly at 2 AM +# crontab: 0 2 * * * /path/to/export_dwca.sh + +SPECIFY_URL="http://localhost:8001" +USERNAME="admin" +PASSWORD="password" +PACKAGE_ID=1 + +# Login +COOKIES=$(mktemp) +curl -s -c "$COOKIES" "$SPECIFY_URL/accounts/login/" > /dev/null +CSRF=$(grep csrftoken "$COOKIES" | awk '{print $NF}') +curl -s -c "$COOKIES" -b "$COOKIES" \ + -d "username=$USERNAME&password=$PASSWORD&csrfmiddlewaretoken=$CSRF" \ + -H "Referer: $SPECIFY_URL/accounts/login/" \ + "$SPECIFY_URL/accounts/login/" > /dev/null + +# Generate archive +CSRF=$(grep csrftoken "$COOKIES" | awk '{print $NF}') +curl -s -b "$COOKIES" \ + -H "X-CSRFToken: $CSRF" \ + -X POST \ + -o "/path/to/exports/archive_$(date +%Y%m%d).zip" \ + "$SPECIFY_URL/export/generate_dwca/$PACKAGE_ID/" + +rm "$COOKIES" +``` + +### Idempotency + +`generate_dwca` is safe to call repeatedly. Each call regenerates from current data. `lastExported` is updated on success. + +### Automatic RSS Scheduling + +The management command `update_feed_v2` checks all Export Packages with `RSS = true` and rebuilds those whose `lastExported + frequency` has passed: + +```bash +# Run manually +python manage.py update_feed_v2 + +# Add to system cron for automatic scheduling (e.g., check every hour) +0 * * * * cd /path/to/specify7 && python manage.py update_feed_v2 + +# Force update all RSS packages regardless of schedule +python manage.py update_feed_v2 --force +``` + +A `Frequency` of 0 or null means the package is manual-only (never auto-updated). + +The "Update RSS Feed" button in the Export Packages UI triggers the same process via `POST /export/force_update_packages/`. diff --git a/specifyweb/backend/export/attachment_urls.py b/specifyweb/backend/export/attachment_urls.py new file mode 100644 index 00000000000..a1874fe05e0 --- /dev/null +++ b/specifyweb/backend/export/attachment_urls.py @@ -0,0 +1,26 @@ +"""Auto-construct attachment URLs for DwC exports.""" +from django.conf import settings + + +def construct_attachment_url(collection, filename): + """Build full URL to an attachment file on the web asset server. + + Returns the URL string or empty string if no asset server configured. + """ + base_url = getattr(settings, 'WEB_ATTACHMENT_URL', None) + if not base_url: + return '' + + # Strip trailing slash + base_url = base_url.rstrip('/') + + collection_name = collection.collectionname if collection else '' + return f'{base_url}/{collection_name}/{filename}' + + +def is_attachment_field(field_name): + """Check if a field name corresponds to an attachment field.""" + attachment_fields = { + 'attachmentlocation', 'origfilename', 'attachmentimageattribute', + } + return field_name.lower() in attachment_fields diff --git a/specifyweb/backend/export/cache.py b/specifyweb/backend/export/cache.py new file mode 100644 index 00000000000..3f4075c54b3 --- /dev/null +++ b/specifyweb/backend/export/cache.py @@ -0,0 +1,209 @@ +"""Cache table operations for DwC export pipeline.""" +import logging +import re +from django.db import connection + +from .dwca_utils import sanitize_column_name + +logger = logging.getLogger(__name__) + + +def get_cache_table_name(mapping_id, collection_id, prefix='dwc_cache'): + """Generate a safe cache table name.""" + return f'{prefix}_{mapping_id}_{collection_id}' + + +def create_cache_table(table_name, columns): + """Create a cache table with the given columns. + + columns: list of (column_name, column_type) tuples. + An auto-increment primary key is always added. + """ + safe_name = re.sub(r'[^a-zA-Z0-9_]', '', table_name) + col_defs = ', '.join( + f'`{re.sub(r"[^a-zA-Z0-9_]", "", name)}` {col_type}' + for name, col_type in columns + ) + with connection.cursor() as cursor: + cursor.execute(f'DROP TABLE IF EXISTS `{safe_name}`') + cursor.execute( + f'CREATE TABLE `{safe_name}` (' + f'`id` INT AUTO_INCREMENT PRIMARY KEY, {col_defs}' + f') ENGINE=InnoDB DEFAULT CHARSET=utf8mb4' + ) + logger.info('Created cache table %s', safe_name) + + +def drop_cache_table(table_name): + """Drop a cache table if it exists.""" + safe_name = re.sub(r'[^a-zA-Z0-9_]', '', table_name) + with connection.cursor() as cursor: + cursor.execute(f'DROP TABLE IF EXISTS `{safe_name}`') + logger.info('Dropped cache table %s', safe_name) + + +def build_cache_tables(export_dataset, user=None, progress_callback=None): + """Build cache tables for an ExportDataSet's core mapping and all extensions.""" + core_mapping = export_dataset.coremapping + collection = export_dataset.collection + + _build_single_cache(core_mapping, collection, user=user, + progress_callback=progress_callback) + + for ext in export_dataset.extensions.all().order_by('sortorder').iterator(chunk_size=2000): + _build_single_cache(ext.schemamapping, collection, + prefix=f'dwc_cache_ext{ext.sortorder}', + user=user, progress_callback=progress_callback) + + +def _build_single_cache(mapping, collection, prefix='dwc_cache', user=None, + progress_callback=None): + """Build a single cache table for one SchemaMapping.""" + from .models import CacheTableMeta + from django.utils import timezone + + table_name = get_cache_table_name(mapping.id, collection.id, prefix) + + meta, _ = CacheTableMeta.objects.update_or_create( + schemamapping=mapping, + defaults={'tablename': table_name, 'buildstatus': 'building'} + ) + + try: + display_fields = [ + f for f in mapping.query.fields.order_by('position') + if getattr(f, 'term', None) + ] + + columns = [ + (sanitize_column_name(f.term), _infer_column_type(f)) + for f in display_fields + ] + + create_cache_table(table_name, columns) + + rowcount = _execute_and_populate( + table_name, mapping, collection, user, progress_callback + ) + + meta.buildstatus = 'idle' + meta.lastbuilt = timezone.now() + meta.rowcount = rowcount + meta.save() + + logger.info('Cache table %s built with %d rows', table_name, rowcount) + + except Exception: + meta.buildstatus = 'error' + meta.save() + logger.exception('Failed to build cache table %s', table_name) + raise + + +def _execute_and_populate(table_name, mapping, collection, user, progress_callback=None): + """Execute a mapping's query and INSERT results into the cache table. + + Uses SQLAlchemy build_query() to ensure output matches query_to_csv + (date formatting, null replacement, etc.), then batch-INSERTs rows. + + Returns the number of rows inserted. + """ + from specifyweb.backend.stored_queries.execution import ( + build_query, BuildQueryProps, set_group_concat_max_len, + apply_special_post_query_processing, + ) + from specifyweb.backend.stored_queries.queryfield import QueryField + from specifyweb.backend.stored_queries.models import session_context + from .field_adapter import EphemeralFieldAdapter + + query_obj = mapping.query + display_fields = [ + f for f in query_obj.fields.order_by('position') + if getattr(f, 'term', None) + ] + field_specs = [ + QueryField.from_spqueryfield(EphemeralFieldAdapter(f, force_display=True)) + for f in display_fields + ] + + safe_name = re.sub(r'[^a-zA-Z0-9_]', '', table_name) + col_count = len(display_fields) + placeholders = ', '.join(['%s'] * col_count) + col_names = ', '.join( + f'`{sanitize_column_name(f.term)}`' + for f in display_fields + ) + insert_sql = f'INSERT INTO `{safe_name}` ({col_names}) VALUES ({placeholders})' + + total = 0 + BATCH_SIZE = 2000 + + with session_context() as session: + set_group_concat_max_len(session.connection()) + sa_query, _ = build_query( + session, collection, user, + query_obj.contexttableid, + field_specs, + BuildQueryProps( + replace_nulls=True, + date_format_override='%Y-%m-%d', + ), + ) + sa_query = apply_special_post_query_processing( + sa_query, query_obj.contexttableid, field_specs, collection, user, + should_list_query=False, + ) + + batch = [] + if isinstance(sa_query, list): + iterator = iter(sa_query) + else: + iterator = sa_query.yield_per(BATCH_SIZE) + + for row in iterator: + batch.append(tuple( + str(v) if v is not None else '' for v in row[1:] + )) + + if len(batch) >= BATCH_SIZE: + with connection.cursor() as cursor: + cursor.executemany(insert_sql, batch) + total += len(batch) + batch = [] + if progress_callback: + progress_callback(total, None) + + if batch: + with connection.cursor() as cursor: + cursor.executemany(insert_sql, batch) + total += len(batch) + + if progress_callback: + progress_callback(total, total) + + return total + + +def _infer_column_type(spqueryfield): + """Infer a MySQL column type from a Specify query field.""" + fname = (spqueryfield.fieldname or '').lower() + + if 'guid' in fname or 'uuid' in fname: + return 'VARCHAR(256)' + if fname in ('id', 'rankid', 'number1', 'number2', 'countamt', + 'sortorder', 'position', 'version'): + return 'INT' + if 'numericyear' in fname or 'numericmonth' in fname or 'numericday' in fname: + return 'INT' + if fname in ('latitude1', 'latitude2', 'longitude1', 'longitude2', + 'latlongaccuracy', 'maxelevation', 'minelevation'): + return 'DECIMAL(12,6)' + if fname in ('startdate', 'enddate', 'determineddate', 'catalogeddate', + 'timestampcreated', 'timestampmodified'): + return 'VARCHAR(32)' + if fname.startswith('is') or fname.startswith('yes'): + return 'VARCHAR(8)' + if fname in ('catalognumber', 'altcatalognumber', 'barcode', 'fieldnumber', + 'code', 'abbreviation', 'datum'): + return 'VARCHAR(256)' + return 'TEXT' diff --git a/specifyweb/backend/export/default_mappings.py b/specifyweb/backend/export/default_mappings.py new file mode 100644 index 00000000000..5c46ef694a3 --- /dev/null +++ b/specifyweb/backend/export/default_mappings.py @@ -0,0 +1,236 @@ +"""Default DwC Core mapping field definitions. + +Each entry is a dict with: fieldname, stringid, tablelist, term (DwC IRI). +These match what the Query Builder would produce for these Specify fields. +""" + +DWC_TERMS_URI = 'http://rs.tdwg.org/dwc/terms/' + +DEFAULT_CORE_FIELDS = [ + # occurrenceID is handled separately as the locked first row + {'fieldname': 'catalogNumber', 'stringid': '1.collectionobject.catalogNumber', + 'tablelist': '1', 'term': DWC_TERMS_URI + 'catalogNumber'}, + {'fieldname': 'fullName', 'stringid': '1,9-determinations,4.taxon.fullName', + 'tablelist': '1,9-determinations,4', 'term': DWC_TERMS_URI + 'scientificName'}, + {'fieldname': 'author', 'stringid': '1,9-determinations,4.taxon.author', + 'tablelist': '1,9-determinations,4', 'term': DWC_TERMS_URI + 'scientificNameAuthorship'}, + {'fieldname': 'family', 'stringid': '1,9-determinations,4.taxon.Family', + 'tablelist': '1,9-determinations,4', 'term': DWC_TERMS_URI + 'family'}, + {'fieldname': 'genus', 'stringid': '1,9-determinations,4.taxon.Genus', + 'tablelist': '1,9-determinations,4', 'term': DWC_TERMS_URI + 'genus'}, + {'fieldname': 'species', 'stringid': '1,9-determinations,4.taxon.Species', + 'tablelist': '1,9-determinations,4', 'term': DWC_TERMS_URI + 'specificEpithet'}, + {'fieldname': 'lastName', 'stringid': '1,10,30-collectors,5.agent.lastName', + 'tablelist': '1,10,30-collectors,5', 'term': DWC_TERMS_URI + 'recordedBy'}, + {'fieldname': 'startDate', 'stringid': '1,10.collectingevent.startDate', + 'tablelist': '1,10', 'term': DWC_TERMS_URI + 'eventDate'}, + {'fieldname': 'startDateNumericYear', 'stringid': '1,10.collectingevent.startDateNumericYear', + 'tablelist': '1,10', 'term': DWC_TERMS_URI + 'year'}, + {'fieldname': 'startDateNumericMonth', 'stringid': '1,10.collectingevent.startDateNumericMonth', + 'tablelist': '1,10', 'term': DWC_TERMS_URI + 'month'}, + {'fieldname': 'startDateNumericDay', 'stringid': '1,10.collectingevent.startDateNumericDay', + 'tablelist': '1,10', 'term': DWC_TERMS_URI + 'day'}, + {'fieldname': 'country', 'stringid': '1,10,2,3.geography.Country', + 'tablelist': '1,10,2,3', 'term': DWC_TERMS_URI + 'country'}, + {'fieldname': 'state', 'stringid': '1,10,2,3.geography.State', + 'tablelist': '1,10,2,3', 'term': DWC_TERMS_URI + 'stateProvince'}, + {'fieldname': 'county', 'stringid': '1,10,2,3.geography.County', + 'tablelist': '1,10,2,3', 'term': DWC_TERMS_URI + 'county'}, + {'fieldname': 'localityName', 'stringid': '1,10,2.locality.localityName', + 'tablelist': '1,10,2', 'term': DWC_TERMS_URI + 'locality'}, + {'fieldname': 'latitude1', 'stringid': '1,10,2.locality.latitude1', + 'tablelist': '1,10,2', 'term': DWC_TERMS_URI + 'decimalLatitude'}, + {'fieldname': 'longitude1', 'stringid': '1,10,2.locality.longitude1', + 'tablelist': '1,10,2', 'term': DWC_TERMS_URI + 'decimalLongitude'}, + {'fieldname': 'datum', 'stringid': '1,10,2.locality.datum', + 'tablelist': '1,10,2', 'term': DWC_TERMS_URI + 'geodeticDatum'}, + {'fieldname': 'code', 'stringid': '1,23.collection.code', + 'tablelist': '1,23', 'term': DWC_TERMS_URI + 'collectionCode'}, + {'fieldname': 'altCatalogNumber', 'stringid': '1.collectionobject.altCatalogNumber', + 'tablelist': '1', 'term': DWC_TERMS_URI + 'otherCatalogNumbers'}, + {'fieldname': 'remarks', 'stringid': '1.collectionobject.remarks', + 'tablelist': '1', 'term': DWC_TERMS_URI + 'occurrenceRemarks'}, +] + + +def create_default_core_mapping(collection, user): + """Create the default DwC Occurrence Core mapping for a collection. + + Returns the created SchemaMapping instance. + """ + from specifyweb.specify.models import Spquery, Spqueryfield + from .models import SchemaMapping + + # Check if default already exists + existing = SchemaMapping.objects.filter( + name='DwC Occurrence (Default)', + isdefault=True, + ).first() + if existing: + return existing + + # Create backing query + query = Spquery.objects.create( + name='DwC Occurrence (Default)', + contextname='CollectionObject', + contexttableid=1, + createdbyagent=user.agents.first() if user else None, + specifyuser=user, + isfavorite=False, + ) + + # Create occurrenceID field (position 0) + Spqueryfield.objects.create( + query=query, + fieldname='guid', + stringid='1.collectionobject.guid', + tablelist='1', + position=0, + sorttype=0, + isdisplay=True, + isnot=False, + operstart=8, + startvalue='', + term='http://rs.tdwg.org/dwc/terms/occurrenceID', + ) + + # Create remaining fields + for i, field_def in enumerate(DEFAULT_CORE_FIELDS): + Spqueryfield.objects.create( + query=query, + fieldname=field_def['fieldname'], + stringid=field_def['stringid'], + tablelist=field_def['tablelist'], + position=i + 1, + sorttype=0, + isdisplay=True, + isnot=False, + operstart=8, + startvalue='', + term=field_def['term'], + ) + + # Create mapping + mapping = SchemaMapping.objects.create( + query=query, + mappingtype='Core', + name='DwC Occurrence (Default)', + isdefault=True, + ) + + return mapping + + +# Extension default field definitions +AC_TERMS_URI = 'http://rs.tdwg.org/ac/terms/' +GGBN_TERMS_URI = 'http://data.ggbn.org/schemas/ggbn/terms/' + +DEFAULT_EXTENSION_DEFS = [ + { + 'name': 'Identification History (Default)', + 'fields': [ + {'fieldname': 'guid', 'stringid': '1.collectionobject.guid', + 'tablelist': '1', 'term': DWC_TERMS_URI + 'occurrenceID'}, + {'fieldname': 'fullName', 'stringid': '1,9-determinations,4.taxon.fullName', + 'tablelist': '1,9-determinations,4', 'term': DWC_TERMS_URI + 'scientificName'}, + {'fieldname': 'determinedDate', 'stringid': '1,9-determinations.determination.determinedDate', + 'tablelist': '1,9-determinations', 'term': DWC_TERMS_URI + 'dateIdentified'}, + {'fieldname': 'lastName', 'stringid': '1,9-determinations,5.agent.lastName', + 'tablelist': '1,9-determinations,5', 'term': DWC_TERMS_URI + 'identifiedBy'}, + {'fieldname': 'remarks', 'stringid': '1,9-determinations.determination.remarks', + 'tablelist': '1,9-determinations', 'term': DWC_TERMS_URI + 'identificationRemarks'}, + {'fieldname': 'typeStatusName', 'stringid': '1,9-determinations.determination.typeStatusName', + 'tablelist': '1,9-determinations', 'term': DWC_TERMS_URI + 'typeStatus'}, + ], + }, + { + 'name': 'Audiovisual Core (Default)', + 'fields': [ + {'fieldname': 'guid', 'stringid': '1.collectionobject.guid', + 'tablelist': '1', 'term': DWC_TERMS_URI + 'occurrenceID'}, + {'fieldname': 'attachmentLocation', 'stringid': '1,111-collectionObjectAttachments,41.attachment.attachmentLocation', + 'tablelist': '1,111-collectionObjectAttachments,41', 'term': AC_TERMS_URI + 'accessURI'}, + {'fieldname': 'mimeType', 'stringid': '1,111-collectionObjectAttachments,41.attachment.mimeType', + 'tablelist': '1,111-collectionObjectAttachments,41', 'term': AC_TERMS_URI + 'format'}, + {'fieldname': 'title', 'stringid': '1,111-collectionObjectAttachments,41.attachment.title', + 'tablelist': '1,111-collectionObjectAttachments,41', 'term': AC_TERMS_URI + 'caption'}, + ], + }, + { + 'name': 'GGBN Material Sample (Default)', + 'fields': [ + {'fieldname': 'guid', 'stringid': '1.collectionobject.guid', + 'tablelist': '1', 'term': DWC_TERMS_URI + 'occurrenceID'}, + {'fieldname': 'name', 'stringid': '1,63-preparations,65.preptype.name', + 'tablelist': '1,63-preparations,65', 'term': DWC_TERMS_URI + 'preparations'}, + {'fieldname': 'countAmt', 'stringid': '1.collectionobject.countAmt', + 'tablelist': '1', 'term': DWC_TERMS_URI + 'individualCount'}, + ], + }, + { + 'name': 'EOL References (Default)', + 'fields': [ + {'fieldname': 'guid', 'stringid': '1.collectionobject.guid', + 'tablelist': '1', 'term': DWC_TERMS_URI + 'occurrenceID'}, + {'fieldname': 'fullName', 'stringid': '1,9-determinations,4.taxon.fullName', + 'tablelist': '1,9-determinations,4', 'term': DWC_TERMS_URI + 'scientificName'}, + {'fieldname': 'commonName', 'stringid': '1,9-determinations,4.taxon.commonName', + 'tablelist': '1,9-determinations,4', 'term': DWC_TERMS_URI + 'vernacularName'}, + ], + }, + { + 'name': 'Resource Relationship (Default)', + 'fields': [ + {'fieldname': 'guid', 'stringid': '1.collectionobject.guid', + 'tablelist': '1', 'term': DWC_TERMS_URI + 'occurrenceID'}, + {'fieldname': 'catalogNumber', 'stringid': '1.collectionobject.catalogNumber', + 'tablelist': '1', 'term': DWC_TERMS_URI + 'catalogNumber'}, + ], + }, +] + + +def create_default_extension_mappings(collection, user): + """Create the 5 default Extension mappings.""" + from specifyweb.specify.models import Spquery, Spqueryfield + from .models import SchemaMapping + + created = [] + for ext_def in DEFAULT_EXTENSION_DEFS: + name = ext_def['name'] + if SchemaMapping.objects.filter(name=name, isdefault=True).exists(): + continue + + query = Spquery.objects.create( + name=name, + contextname='CollectionObject', + contexttableid=1, + createdbyagent=user.agents.first() if user else None, + specifyuser=user, + isfavorite=False, + ) + + for i, field_def in enumerate(ext_def['fields']): + Spqueryfield.objects.create( + query=query, + fieldname=field_def['fieldname'], + stringid=field_def['stringid'], + tablelist=field_def['tablelist'], + position=i, + sorttype=0, + isdisplay=True, + isnot=False, + operstart=8, + startvalue='', + term=field_def['term'], + ) + + mapping = SchemaMapping.objects.create( + query=query, + mappingtype='Extension', + name=name, + isdefault=True, + ) + created.append(mapping) + + return created diff --git a/specifyweb/backend/export/dwca_from_cache.py b/specifyweb/backend/export/dwca_from_cache.py new file mode 100644 index 00000000000..ca828bf14d3 --- /dev/null +++ b/specifyweb/backend/export/dwca_from_cache.py @@ -0,0 +1,119 @@ +"""Generate Darwin Core Archives from pre-built cache tables.""" +import csv +import io +import logging +import os +import re +import zipfile + +from django.db import connection + +from .cache import get_cache_table_name +from .dwca_utils import sanitize_term_name, sanitize_column_name, build_meta_xml, build_eml_xml + +logger = logging.getLogger(__name__) + + +def make_dwca_from_dataset(export_dataset, output_dir=None): + """Generate a DwCA zip file from an ExportDataSet and its cache tables. + + Returns the path to the generated zip file. + """ + from django.conf import settings + + if output_dir is None: + output_dir = os.path.join(settings.DEPOSITORY_DIR, 'export_feed') + os.makedirs(output_dir, exist_ok=True) + + output_path = os.path.join(output_dir, export_dataset.filename) + + core_mapping = export_dataset.coremapping + collection = export_dataset.collection + core_table = get_cache_table_name(core_mapping.id, collection.id) + + extensions = [] + for ext in export_dataset.extensions.all().order_by('sortorder'): + ext_table = get_cache_table_name( + ext.schemamapping.id, collection.id, + prefix=f'dwc_cache_ext{ext.sortorder}' + ) + extensions.append({ + 'mapping': ext.schemamapping, + 'table_name': ext_table, + 'sort_order': ext.sortorder, + }) + + with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf: + core_fields = _get_mapping_fields(core_mapping) + core_csv = _table_to_csv(core_table, core_fields) + zf.writestr('occurrence.csv', core_csv) + + ext_filenames = [] + for ext_info in extensions: + ext_fields = _get_mapping_fields(ext_info['mapping']) + ext_csv = _table_to_csv(ext_info['table_name'], ext_fields) + filename = f"extension_{ext_info['sort_order']}.csv" + zf.writestr(filename, ext_csv) + ext_filenames.append({ + 'filename': filename, + 'terms': [f['term_iri'] for f in ext_fields], + }) + + core_term_iris = [f['term_iri'] for f in core_fields] + meta_xml = build_meta_xml(core_term_iris, ext_filenames) + zf.writestr('meta.xml', meta_xml) + + eml_xml = build_eml_xml(export_dataset) + zf.writestr('eml.xml', eml_xml) + + from django.utils import timezone + export_dataset.lastexported = timezone.now() + export_dataset.save(update_fields=['lastexported']) + + logger.info('Generated DwCA: %s', output_path) + return output_path + + +def _get_mapping_fields(mapping): + """Get the query fields for a mapping that have DwC term assignments.""" + result = [] + for f in mapping.query.fields.order_by('position'): + term = getattr(f, 'term', None) + if not term: + continue + result.append({ + 'term_iri': term, + 'column_name': sanitize_term_name(term), + 'cache_column': sanitize_column_name(term), + }) + return result + + +def _table_to_csv(table_name, fields): + """Read a cache table and return CSV string. + + Selects only the mapped columns (skipping the auto-increment id). + """ + safe_name = re.sub(r'[^a-zA-Z0-9_]', '', table_name) + + output = io.StringIO() + writer = csv.writer(output) + + headers = [f['column_name'] for f in fields] + writer.writerow(headers) + + # Select specific columns to skip the auto-increment `id` + col_names = ', '.join(f"`{f['cache_column']}`" for f in fields) + try: + with connection.cursor() as cursor: + cursor.execute(f'SELECT {col_names} FROM `{safe_name}`') + while True: + rows = cursor.fetchmany(2000) + if not rows: + break + for row in rows: + writer.writerow(row) + except Exception: + logger.exception('Could not read cache table %s', safe_name) + + return output.getvalue() diff --git a/specifyweb/backend/export/dwca_from_mapping.py b/specifyweb/backend/export/dwca_from_mapping.py new file mode 100644 index 00000000000..940d2fb9298 --- /dev/null +++ b/specifyweb/backend/export/dwca_from_mapping.py @@ -0,0 +1,180 @@ +"""Generate Darwin Core Archives directly from SchemaMapping queries. + +Bypasses cache tables — executes queries directly and writes CSV output. +""" +import csv +import logging +import os +import re +import shutil +from tempfile import mkdtemp + +from specifyweb.backend.stored_queries.execution import query_to_csv, BuildQueryProps +from specifyweb.backend.stored_queries.queryfield import QueryField +from specifyweb.backend.stored_queries.models import session_context + +from .dwca_utils import sanitize_term_name, build_meta_xml, build_eml_xml +from .field_adapter import EphemeralFieldAdapter + +DWCA_DATE_FORMAT = '%Y-%m-%d' + +logger = logging.getLogger(__name__) + + +def make_dwca_from_dataset(export_dataset, user=None): + """Generate a DwCA zip file from an ExportDataSet. + + Executes the backing SpQuery for each mapping (core + extensions) + and writes the results as CSV into a DwC Archive ZIP. + + Returns the path to the generated zip file. + """ + from django.conf import settings + + output_dir_base = os.path.join(settings.DEPOSITORY_DIR, 'export_feed') + os.makedirs(output_dir_base, exist_ok=True) + output_path = os.path.join(output_dir_base, export_dataset.filename) + + collection = export_dataset.collection + + core_mapping = export_dataset.coremapping + core_query = core_mapping.query + + ext_mappings = [ + ext.schemamapping + for ext in export_dataset.extensions.all().order_by('sortorder') + ] + + temp_dir = mkdtemp() + try: + core_ids = set() + + def collect_core_ids(row): + core_ids.add(row[1]) + return True + + core_fields = _get_query_fields(core_query) + core_field_specs = [QueryField.from_spqueryfield(f['adapter']) + for f in core_fields] + core_csv_headers = [sanitize_term_name(f['term_iri']) for f in core_fields] + core_csv_path = os.path.join(temp_dir, 'occurrence.csv') + + with session_context() as session: + query_to_csv( + session, collection, user, + core_query.contexttableid, + core_field_specs, + core_csv_path, + captions=core_csv_headers, + strip_id=True, + row_filter=collect_core_ids, + date_format_override=DWCA_DATE_FORMAT, + ) + + ext_info = [] + for i, ext_mapping in enumerate(ext_mappings): + ext_query = ext_mapping.query + ext_fields = _get_query_fields(ext_query) + ext_field_specs = [QueryField.from_spqueryfield(f['adapter']) + for f in ext_fields] + ext_csv_headers = [sanitize_term_name(f['term_iri']) for f in ext_fields] + ext_filename = f'extension_{i}.csv' + ext_csv_path = os.path.join(temp_dir, ext_filename) + + def filter_by_core(row, _ids=core_ids): + return row[1] in _ids + + query_to_csv( + session, collection, user, + ext_query.contexttableid, + ext_field_specs, + ext_csv_path, + captions=ext_csv_headers, + strip_id=True, + row_filter=filter_by_core, + date_format_override=DWCA_DATE_FORMAT, + ) + + ext_info.append({ + 'filename': ext_filename, + 'terms': [f['term_iri'] for f in ext_fields], + }) + + _rewrite_attachment_urls(core_csv_path, core_fields, collection) + + # Write meta.xml (pass full IRIs — shared util handles them correctly) + core_term_iris = [f['term_iri'] for f in core_fields] + meta_xml = build_meta_xml(core_term_iris, ext_info) + with open(os.path.join(temp_dir, 'meta.xml'), 'w') as f: + f.write(meta_xml) + + eml_xml = build_eml_xml(export_dataset) + with open(os.path.join(temp_dir, 'eml.xml'), 'w') as f: + f.write(eml_xml) + + basename = re.sub(r'\.zip$', '', output_path) + shutil.make_archive(basename, 'zip', temp_dir, logger=logger) + + finally: + shutil.rmtree(temp_dir) + + from django.utils import timezone + export_dataset.lastexported = timezone.now() + export_dataset.save(update_fields=['lastexported']) + + logger.info('Generated DwCA: %s', output_path) + return output_path + + +def _rewrite_attachment_urls(csv_path, fields, collection): + """Post-process a CSV file to rewrite attachment filenames as full URLs.""" + from django.conf import settings + from .attachment_urls import construct_attachment_url, is_attachment_field + + base_url = getattr(settings, 'WEB_ATTACHMENT_URL', None) + if not base_url: + logger.warning('No WEB_ATTACHMENT_URL configured — attachment fields will be blank in export') + + attachment_cols = set() + for i, f in enumerate(fields): + adapter = f.get('adapter') + if adapter: + fname = getattr(adapter, 'stringId', '') + if 'attachment' in fname.lower() or is_attachment_field(f.get('term_iri', '')): + attachment_cols.add(i) + + if not attachment_cols: + return + + with open(csv_path, 'r', newline='') as infile: + reader = csv.reader(infile) + rows = list(reader) + + if len(rows) < 2: + return + + for row in rows[1:]: + for col_idx in attachment_cols: + if col_idx < len(row) and row[col_idx]: + row[col_idx] = construct_attachment_url(collection, row[col_idx]) + + with open(csv_path, 'w', newline='') as outfile: + writer = csv.writer(outfile) + writer.writerows(rows) + + +def _get_query_fields(spquery): + """Get fields from a SpQuery that have DwC term assignments. + + Returns list of dicts with 'adapter' (EphemeralFieldAdapter) and 'term_iri' (full IRI). + """ + result = [] + for f in spquery.fields.order_by('position'): + term = getattr(f, 'term', None) + if not term: + continue + result.append({ + 'adapter': EphemeralFieldAdapter(f, force_display=True), + 'term_iri': term, + }) + return result diff --git a/specifyweb/backend/export/dwca_utils.py b/specifyweb/backend/export/dwca_utils.py new file mode 100644 index 00000000000..4314b8bb32f --- /dev/null +++ b/specifyweb/backend/export/dwca_utils.py @@ -0,0 +1,131 @@ +"""Shared utilities for DwC archive generation.""" +import re +from datetime import date +from uuid import uuid4 +from xml.etree import ElementTree as ET + + +def sanitize_term_name(term_iri): + """Extract the short name from a DwC term IRI. + + 'http://rs.tdwg.org/dwc/terms/catalogNumber' -> 'catalogNumber' + 'http://purl.org/dc/terms/type' -> 'type' + """ + if '/' in term_iri: + term_iri = term_iri.rsplit('/', 1)[-1] + if '#' in term_iri: + term_iri = term_iri.rsplit('#', 1)[-1] + return term_iri + + +def sanitize_column_name(name): + """Sanitize a term IRI into a valid MySQL column name.""" + name = sanitize_term_name(name) + name = re.sub(r'[^a-zA-Z0-9_]', '_', name) + return name[:64] + + +# Known extension rowType URIs +EXTENSION_ROW_TYPES = { + 'MeasurementOrFact': 'http://rs.iobis.org/obis/terms/ExtendedMeasurementOrFact', + 'ResourceRelationship': 'http://rs.tdwg.org/dwc/terms/ResourceRelationship', + 'Identification': 'http://rs.tdwg.org/dwc/terms/Identification', + 'Multimedia': 'http://rs.gbif.org/terms/1.0/Multimedia', +} + + +def build_meta_xml(core_terms, ext_info_list): + """Build meta.xml describing the DwC archive structure. + + core_terms: list of full term IRIs for the core file + ext_info_list: list of dicts with 'filename' and 'terms' (full IRIs) + """ + archive = ET.Element('archive') + archive.set('xmlns', 'http://rs.tdwg.org/dwc/text/') + archive.set('metadata', 'eml.xml') + + # Core + core = ET.SubElement(archive, 'core') + core.set('encoding', 'UTF-8') + core.set('fieldsTerminatedBy', ',') + core.set('linesTerminatedBy', '\\n') + core.set('fieldsEnclosedBy', '"') + core.set('ignoreHeaderLines', '1') + core.set('rowType', 'http://rs.tdwg.org/dwc/terms/Occurrence') + + files = ET.SubElement(core, 'files') + location = ET.SubElement(files, 'location') + location.text = 'occurrence.csv' + + if core_terms: + id_elem = ET.SubElement(core, 'id') + id_elem.set('index', '0') + + for idx, term_iri in enumerate(core_terms): + f = ET.SubElement(core, 'field') + f.set('index', str(idx)) + f.set('term', term_iri) + + # Extensions + for ext in ext_info_list: + extension = ET.SubElement(archive, 'extension') + extension.set('encoding', 'UTF-8') + extension.set('fieldsTerminatedBy', ',') + extension.set('linesTerminatedBy', '\\n') + extension.set('fieldsEnclosedBy', '"') + extension.set('ignoreHeaderLines', '1') + row_type = ext.get('rowType', 'http://rs.tdwg.org/dwc/terms/MeasurementOrFact') + extension.set('rowType', row_type) + + files = ET.SubElement(extension, 'files') + location = ET.SubElement(files, 'location') + location.text = ext['filename'] + + coreid = ET.SubElement(extension, 'coreid') + coreid.set('index', '0') + + for idx, term_iri in enumerate(ext['terms']): + f = ET.SubElement(extension, 'field') + f.set('index', str(idx)) + f.set('term', term_iri) + + return ET.tostring(archive, encoding='unicode', xml_declaration=True) + + +def build_eml_xml(export_dataset): + """Build EML metadata. Returns custom EML if uploaded, else generates minimal EML.""" + if export_dataset.metadata: + try: + from specifyweb.specify.models import Spappresourcedata + data = Spappresourcedata.objects.filter( + spappresource=export_dataset.metadata + ).first() + if data and data.data: + content = data.data + if isinstance(content, bytes): + content = content.decode('utf-8') + return content + except Exception: + pass + + eml = ET.Element('eml:eml') + eml.set('xmlns:eml', 'eml://ecoinformatics.org/eml-2.1.1') + eml.set('packageId', str(uuid4())) + eml.set('system', 'http://specify.org') + + dataset = ET.SubElement(eml, 'dataset') + title = ET.SubElement(dataset, 'title') + title.text = export_dataset.exportname + + creator = ET.SubElement(dataset, 'creator') + org = ET.SubElement(creator, 'organizationName') + org.text = 'Specify Collection' + + pubdate = ET.SubElement(dataset, 'pubDate') + pubdate.text = date.today().strftime('%Y-%m-%d') + + abstract = ET.SubElement(dataset, 'abstract') + para = ET.SubElement(abstract, 'para') + para.text = f'Darwin Core Archive export: {export_dataset.exportname}' + + return ET.tostring(eml, encoding='unicode', xml_declaration=True) diff --git a/specifyweb/backend/export/extract_query.py b/specifyweb/backend/export/extract_query.py index 3ff381efc51..bfb1444f3bf 100644 --- a/specifyweb/backend/export/extract_query.py +++ b/specifyweb/backend/export/extract_query.py @@ -7,7 +7,7 @@ def extract_query(query): query_node.set('name', query.name) query_node.set('contextTableId', str(query.contexttableid)) - for field in query.fields.all(): + for field in query.fields.all().iterator(chunk_size=2000): field_node = ElementTree.SubElement(query_node, 'field') field_node.set('stringId', field.stringid) field_node.set('oper', str(field.operstart)) diff --git a/specifyweb/backend/export/feed.py b/specifyweb/backend/export/feed.py index fb0c269bc28..03d11c01b13 100644 --- a/specifyweb/backend/export/feed.py +++ b/specifyweb/backend/export/feed.py @@ -31,11 +31,7 @@ def update_feed(force=False, notify_user: Specifyuser | None = None): if feed_resource is None: raise MissingFeedResource() - try: - os.makedirs(FEED_DIR) - except OSError as e: - if e.errno != errno.EEXIST: - raise + os.makedirs(FEED_DIR, exist_ok=True) def_tree = ET.fromstring(feed_resource) for item_node in def_tree.findall('item'): @@ -81,6 +77,7 @@ def needs_update(path, days): except OSError as e: if e.errno != errno.ENOENT: raise + return True else: update_interval = 24*60*60 * days age = time.time() - mtime @@ -94,3 +91,32 @@ def create_notification(user: Specifyuser, filename: str | None): 'type': 'feed-item-updated', 'file': filename })) + + +def update_feed_v2(): + """Update RSS feed using the new ExportDataSet model. + + For each ExportDataSet with isrss=True and stale export, rebuild + cache and regenerate DwCA. + """ + from .models import ExportDataSet + from .dwca_from_mapping import make_dwca_from_dataset + from django.utils import timezone + from datetime import timedelta + + datasets = ExportDataSet.objects.filter(isrss=True) + updated = [] + + for dataset in datasets: + if dataset.frequency and dataset.frequency > 0 and dataset.lastexported: + next_update = dataset.lastexported + timedelta(days=dataset.frequency) + if timezone.now() < next_update: + continue + + try: + make_dwca_from_dataset(dataset) + updated.append(dataset.exportname) + except Exception: + logger.exception('Failed to update RSS feed item: %s', dataset.exportname) + + return updated diff --git a/specifyweb/backend/export/field_adapter.py b/specifyweb/backend/export/field_adapter.py new file mode 100644 index 00000000000..8de8fb41ec5 --- /dev/null +++ b/specifyweb/backend/export/field_adapter.py @@ -0,0 +1,47 @@ +"""Adapts Django Spqueryfield models to the EphemeralField interface +expected by the stored queries QueryField.from_spqueryfield.""" + + +class EphemeralFieldAdapter: + """Bridges Django Spqueryfield (lowercase attrs) to the EphemeralField + interface (camelCase attrs) used by QueryField.from_spqueryfield.""" + + def __init__(self, spqf, force_display=False): + self._spqf = spqf + self._force_display = force_display + + @property + def stringId(self): + return self._spqf.stringid + + @property + def isRelFld(self): + return self._spqf.isrelfld + + @property + def operStart(self): + return self._spqf.operstart + + @property + def startValue(self): + return self._spqf.startvalue or '' + + @property + def isNot(self): + return self._spqf.isnot + + @property + def isDisplay(self): + return True if self._force_display else self._spqf.isdisplay + + @property + def formatName(self): + return self._spqf.formatname + + @property + def sortType(self): + return self._spqf.sorttype + + @property + def isStrict(self): + return getattr(self._spqf, 'isstrict', False) diff --git a/specifyweb/backend/export/management/commands/update_feed_v2.py b/specifyweb/backend/export/management/commands/update_feed_v2.py new file mode 100644 index 00000000000..6bd4ae070a2 --- /dev/null +++ b/specifyweb/backend/export/management/commands/update_feed_v2.py @@ -0,0 +1,22 @@ +"""Management command to update DwC export feed using the new model.""" +from django.core.management.base import BaseCommand +from specifyweb.backend.export.feed import update_feed_v2 + + +class Command(BaseCommand): + help = 'Update DwC export feed items that are due for refresh' + + def add_arguments(self, parser): + parser.add_argument( + '--force', action='store_true', + help='Force update all RSS-enabled datasets regardless of schedule', + ) + + def handle(self, *args, **options): + updated = update_feed_v2() + if updated: + self.stdout.write(self.style.SUCCESS( + f'Updated {len(updated)} dataset(s): {", ".join(updated)}' + )) + else: + self.stdout.write('No datasets needed updating.') diff --git a/specifyweb/backend/export/schema_terms.json b/specifyweb/backend/export/schema_terms.json new file mode 100644 index 00000000000..600c443b7fe --- /dev/null +++ b/specifyweb/backend/export/schema_terms.json @@ -0,0 +1,1770 @@ +{ + "vocabularies": { + "dwc": { + "name": "Darwin Core", + "abbreviation": "dwc", + "vocabularyURI": "http://rs.tdwg.org/dwc/terms/", + "description": "Darwin Core standard terms for biodiversity data", + "terms": { + "http://rs.tdwg.org/dwc/terms/occurrenceID": { + "name": "occurrenceID", + "description": "An identifier for the Occurrence", + "group": "Occurrence", + "mappingPaths": [ + [ + "CollectionObject", + "guid" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/catalogNumber": { + "name": "catalogNumber", + "description": "An identifier for the record within the data set or collection", + "group": "Occurrence", + "mappingPaths": [ + [ + "CollectionObject", + "catalogNumber" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/recordedBy": { + "name": "recordedBy", + "description": "A person, group, or organization responsible for recording the original Occurrence", + "group": "Occurrence", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "collectors", + "agent", + "lastName" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/individualCount": { + "name": "individualCount", + "description": "The number of individuals present at the time of the Occurrence", + "group": "Occurrence", + "mappingPaths": [ + [ + "CollectionObject", + "countAmt" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/sex": { + "name": "sex", + "description": "The sex of the biological individual(s) represented in the Occurrence", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/lifeStage": { + "name": "lifeStage", + "description": "The age class or life stage of the Organism(s) at the time the Occurrence was recorded", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/reproductiveCondition": { + "name": "reproductiveCondition", + "description": "The reproductive condition of the biological individual(s) represented in the Occurrence", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/behavior": { + "name": "behavior", + "description": "The behavior shown by the subject at the time the Occurrence was recorded", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/preparations": { + "name": "preparations", + "description": "A list of preparations and preservation methods for a specimen", + "group": "Occurrence", + "mappingPaths": [ + [ + "CollectionObject", + "preparations", + "prepType", + "name" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/disposition": { + "name": "disposition", + "description": "The current state of a specimen with respect to the collection identified in collectionCode", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/otherCatalogNumbers": { + "name": "otherCatalogNumbers", + "description": "A list of previous or alternate fully qualified catalog numbers", + "group": "Occurrence", + "mappingPaths": [ + [ + "CollectionObject", + "altCatalogNumber" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/occurrenceRemarks": { + "name": "occurrenceRemarks", + "description": "Comments or notes about the Occurrence", + "group": "Occurrence", + "mappingPaths": [ + [ + "CollectionObject", + "remarks" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/eventDate": { + "name": "eventDate", + "description": "The date-time or interval during which an Event occurred", + "group": "Event", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "startDate" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/eventTime": { + "name": "eventTime", + "description": "The time or interval during which an Event occurred", + "group": "Event", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "startTime" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/startDayOfYear": { + "name": "startDayOfYear", + "description": "The earliest integer day of the year on which the Event occurred", + "group": "Event", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "startDateNumericDay" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/endDayOfYear": { + "name": "endDayOfYear", + "description": "The latest integer day of the year on which the Event occurred", + "group": "Event", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "endDateNumericDay" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/year": { + "name": "year", + "description": "The four-digit year in which the Event occurred", + "group": "Event", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "startDateNumericYear" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/month": { + "name": "month", + "description": "The integer month in which the Event occurred", + "group": "Event", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "startDateNumericMonth" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/day": { + "name": "day", + "description": "The integer day of the month on which the Event occurred", + "group": "Event", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "startDateNumericDay" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/verbatimEventDate": { + "name": "verbatimEventDate", + "description": "The verbatim original representation of the date and time information for an Event", + "group": "Event", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "verbatimDate" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/habitat": { + "name": "habitat", + "description": "A category or description of the habitat in which the Event occurred", + "group": "Event", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "collectingEventAttribute", + "text1" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/samplingProtocol": { + "name": "samplingProtocol", + "description": "The names of, references to, or descriptions of the methods or protocols used during an Event", + "group": "Event", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "method" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/fieldNumber": { + "name": "fieldNumber", + "description": "An identifier given to the event in the field", + "group": "Event", + "mappingPaths": [ + [ + "CollectionObject", + "fieldNumber" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/eventRemarks": { + "name": "eventRemarks", + "description": "Comments or notes about the Event", + "group": "Event", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "remarks" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/continent": { + "name": "continent", + "description": "The name of the continent in which the Location occurs", + "group": "Location", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "locality", + "geography", + "continent" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/country": { + "name": "country", + "description": "The name of the country or major administrative unit in which the Location occurs", + "group": "Location", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "locality", + "geography", + "country" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/countryCode": { + "name": "countryCode", + "description": "The standard code for the country in which the Location occurs", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/stateProvince": { + "name": "stateProvince", + "description": "The name of the next smaller administrative region than country", + "group": "Location", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "locality", + "geography", + "state" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/county": { + "name": "county", + "description": "The full, unabbreviated name of the next smaller administrative region than stateProvince", + "group": "Location", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "locality", + "geography", + "county" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/municipality": { + "name": "municipality", + "description": "The full, unabbreviated name of the next smaller administrative region than county", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/locality": { + "name": "locality", + "description": "The specific description of the place", + "group": "Location", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "locality", + "localityName" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/verbatimLocality": { + "name": "verbatimLocality", + "description": "The original textual description of the place", + "group": "Location", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "locality", + "verbatimLocality" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/minimumElevationInMeters": { + "name": "minimumElevationInMeters", + "description": "The lower limit of the range of elevation", + "group": "Location", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "locality", + "minElevation" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/maximumElevationInMeters": { + "name": "maximumElevationInMeters", + "description": "The upper limit of the range of elevation", + "group": "Location", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "locality", + "maxElevation" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/decimalLatitude": { + "name": "decimalLatitude", + "description": "The geographic latitude in decimal degrees of the geographic center of a Location", + "group": "Location", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "locality", + "latitude1" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/decimalLongitude": { + "name": "decimalLongitude", + "description": "The geographic longitude in decimal degrees of the geographic center of a Location", + "group": "Location", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "locality", + "longitude1" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/geodeticDatum": { + "name": "geodeticDatum", + "description": "The ellipsoid, geodetic datum, or spatial reference system used in decimalLatitude and decimalLongitude", + "group": "Location", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "locality", + "datum" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/coordinateUncertaintyInMeters": { + "name": "coordinateUncertaintyInMeters", + "description": "The horizontal distance from the given decimalLatitude and decimalLongitude describing the smallest circle containing the whole of the Location", + "group": "Location", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "locality", + "latLongAccuracy" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/verbatimCoordinates": { + "name": "verbatimCoordinates", + "description": "The verbatim original spatial coordinates of the Location", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/verbatimLatitude": { + "name": "verbatimLatitude", + "description": "The verbatim original latitude of the Location", + "group": "Location", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "locality", + "verbatimLatitude" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/verbatimLongitude": { + "name": "verbatimLongitude", + "description": "The verbatim original longitude of the Location", + "group": "Location", + "mappingPaths": [ + [ + "CollectionObject", + "collectingEvent", + "locality", + "verbatimLongitude" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/kingdom": { + "name": "kingdom", + "description": "The full scientific name of the kingdom in which the taxon is classified", + "group": "Taxon", + "mappingPaths": [ + [ + "CollectionObject", + "determinations", + "taxon", + "kingdom" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/phylum": { + "name": "phylum", + "description": "The full scientific name of the phylum in which the taxon is classified", + "group": "Taxon", + "mappingPaths": [ + [ + "CollectionObject", + "determinations", + "taxon", + "phylum" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/class": { + "name": "class", + "description": "The full scientific name of the class in which the taxon is classified", + "group": "Taxon", + "mappingPaths": [ + [ + "CollectionObject", + "determinations", + "taxon", + "class" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/order": { + "name": "order", + "description": "The full scientific name of the order in which the taxon is classified", + "group": "Taxon", + "mappingPaths": [ + [ + "CollectionObject", + "determinations", + "taxon", + "order" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/family": { + "name": "family", + "description": "The full scientific name of the family in which the taxon is classified", + "group": "Taxon", + "mappingPaths": [ + [ + "CollectionObject", + "determinations", + "taxon", + "family" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/genus": { + "name": "genus", + "description": "The full scientific name of the genus in which the taxon is classified", + "group": "Taxon", + "mappingPaths": [ + [ + "CollectionObject", + "determinations", + "taxon", + "genus" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/specificEpithet": { + "name": "specificEpithet", + "description": "The name of the first or species epithet of the scientificName", + "group": "Taxon", + "mappingPaths": [ + [ + "CollectionObject", + "determinations", + "taxon", + "species" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/infraspecificEpithet": { + "name": "infraspecificEpithet", + "description": "The name of the lowest or terminal infraspecific epithet of the scientificName", + "group": "Taxon", + "mappingPaths": [ + [ + "CollectionObject", + "determinations", + "taxon", + "subspecies" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/taxonRank": { + "name": "taxonRank", + "description": "The taxonomic rank of the most specific name in the scientificName", + "group": "Taxon", + "mappingPaths": [ + [ + "CollectionObject", + "determinations", + "taxon", + "rankId" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/scientificName": { + "name": "scientificName", + "description": "The full scientific name, with authorship and date information if known", + "group": "Taxon", + "mappingPaths": [ + [ + "CollectionObject", + "determinations", + "taxon", + "fullName" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/scientificNameAuthorship": { + "name": "scientificNameAuthorship", + "description": "The authorship information for the scientificName", + "group": "Taxon", + "mappingPaths": [ + [ + "CollectionObject", + "determinations", + "taxon", + "author" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/vernacularName": { + "name": "vernacularName", + "description": "A common or vernacular name", + "group": "Taxon", + "mappingPaths": [ + [ + "CollectionObject", + "determinations", + "taxon", + "commonName" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/higherClassification": { + "name": "higherClassification", + "description": "A list of taxa names terminating at the rank immediately superior to the referenced taxon", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/taxonomicStatus": { + "name": "taxonomicStatus", + "description": "The status of the use of the scientificName as a label for a taxon", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/basisOfRecord": { + "name": "basisOfRecord", + "description": "The specific nature of the data record", + "group": "Record-level", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/institutionCode": { + "name": "institutionCode", + "description": "The name or acronym in use by the institution having custody of the object(s) or information referred to in the record", + "group": "Record-level", + "mappingPaths": [ + [ + "CollectionObject", + "collection", + "institution", + "code" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/collectionCode": { + "name": "collectionCode", + "description": "The name, acronym, coden, or initialism identifying the collection or data set from which the record was derived", + "group": "Record-level", + "mappingPaths": [ + [ + "CollectionObject", + "collection", + "code" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/datasetName": { + "name": "datasetName", + "description": "The name identifying the data set from which the record was derived", + "group": "Record-level", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/ownerInstitutionCode": { + "name": "ownerInstitutionCode", + "description": "The name or acronym in use by the institution having ownership of the object(s) or information referred to in the record", + "group": "Record-level", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/informationWithheld": { + "name": "informationWithheld", + "description": "Additional information that exists, but that has not been shared in the given record", + "group": "Record-level", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/dataGeneralizations": { + "name": "dataGeneralizations", + "description": "Actions taken to make the shared data less specific or complete than in its original form", + "group": "Record-level", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/identifiedBy": { + "name": "identifiedBy", + "description": "A person, group, or organization who assigned the Taxon to the subject", + "group": "Identification", + "mappingPaths": [ + [ + "CollectionObject", + "determinations", + "determiner", + "lastName" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/dateIdentified": { + "name": "dateIdentified", + "description": "The date on which the subject was determined as representing the Taxon", + "group": "Identification", + "mappingPaths": [ + [ + "CollectionObject", + "determinations", + "determinedDate" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/identificationRemarks": { + "name": "identificationRemarks", + "description": "Comments or notes about the Identification", + "group": "Identification", + "mappingPaths": [ + [ + "CollectionObject", + "determinations", + "remarks" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/typeStatus": { + "name": "typeStatus", + "description": "A list of nomenclatural types applied to the subject", + "group": "Identification", + "mappingPaths": [ + [ + "CollectionObject", + "determinations", + "typeStatusName" + ] + ] + }, + "http://rs.tdwg.org/dwc/terms/associatedMedia": { + "name": "associatedMedia", + "description": "A list of identifiers of media associated with the Occurrence", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/associatedReferences": { + "name": "associatedReferences", + "description": "A list of identifiers of literature associated with the Occurrence", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/associatedSequences": { + "name": "associatedSequences", + "description": "A list of identifiers of genetic sequence information associated with the Occurrence", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/associatedTaxa": { + "name": "associatedTaxa", + "description": "A list of identifiers or names of taxa and the associations of this Occurrence to each of them", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://purl.org/dc/terms/accessRights": { + "name": "accessRights", + "description": "Information about who can access the resource or an indication of its security status.", + "group": "Record-level", + "mappingPaths": [] + }, + "http://purl.org/dc/terms/bibliographicCitation": { + "name": "bibliographicCitation", + "description": "A bibliographic reference for the resource.", + "group": "Record-level", + "mappingPaths": [] + }, + "http://purl.org/dc/elements/1.1/language": { + "name": "language", + "description": "A language of the resource.", + "group": "Record-level", + "mappingPaths": [] + }, + "http://purl.org/dc/terms/license": { + "name": "license", + "description": "A legal document giving official permission to do something with the resource.", + "group": "Record-level", + "mappingPaths": [] + }, + "http://purl.org/dc/terms/modified": { + "name": "modified", + "description": "Date on which the resource was changed.", + "group": "Record-level", + "mappingPaths": [] + }, + "http://purl.org/dc/terms/references": { + "name": "references", + "description": "A related resource that is referenced, cited, or otherwise pointed to by the described resource.", + "group": "Record-level", + "mappingPaths": [] + }, + "http://purl.org/dc/terms/rightsHolder": { + "name": "rightsHolder", + "description": "A person or organization owning or managing rights over the resource.", + "group": "Record-level", + "mappingPaths": [] + }, + "http://purl.org/dc/elements/1.1/type": { + "name": "type", + "description": "The nature or genre of the resource.", + "group": "Record-level", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/associatedOccurrences": { + "name": "associatedOccurrences", + "description": "A list (concatenated and separated) of identifiers of other dwc:Occurrence records and their associations to this dwc:Occurrence.", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/caste": { + "name": "caste", + "description": "Categorisation of individuals for eusocial species (including some mammals and arthropods).", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/degreeOfEstablishment": { + "name": "degreeOfEstablishment", + "description": "The degree to which a dwc:Organism survives, reproduces, and expands its range at the given place and time.", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/establishmentMeans": { + "name": "establishmentMeans", + "description": "Statement about whether a dwc:Organism has been introduced to a given place and time through the direct or indirect activity of modern humans.", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/georeferenceVerificationStatus": { + "name": "georeferenceVerificationStatus", + "description": "A categorical description of the extent to which the georeference has been verified to represent the best possible spatial description for the dcterms:Location of the dwc:Occurrence.", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/occurrenceStatus": { + "name": "occurrenceStatus", + "description": "A statement about the presence or absence of a dwc:Taxon at a dcterms:Location.", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/organismQuantity": { + "name": "organismQuantity", + "description": "A number or enumeration value for the quantity of dwc:Organisms.", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/organismQuantityType": { + "name": "organismQuantityType", + "description": "The type of quantification system used for the quantity of dwc:Organisms.", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/pathway": { + "name": "pathway", + "description": "The process by which a dwc:Organism came to be in a given place at a given time.", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/recordNumber": { + "name": "recordNumber", + "description": "An identifier given to the dwc:Occurrence at the time it was recorded.", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/recordedByID": { + "name": "recordedByID", + "description": "A list (concatenated and separated) of the globally unique identifier for the person, people, groups, or organizations responsible for recording the original dwc:Occurrence.", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/vitality": { + "name": "vitality", + "description": "An indication of whether a dwc:Organism was alive or dead at the time of collection or observation.", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/associatedOrganisms": { + "name": "associatedOrganisms", + "description": "A list (concatenated and separated) of identifiers of other dwc:Organisms and the associations of this dwc:Organism to each of them.", + "group": "Organism", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/causeOfDeath": { + "name": "causeOfDeath", + "description": "An indication of the known or suspected cause of death of a dwc:Organism.", + "group": "Organism", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/organismID": { + "name": "organismID", + "description": "An identifier for the dwc:Organism instance (as opposed to a particular digital record of the dwc:Organism).", + "group": "Organism", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/organismName": { + "name": "organismName", + "description": "A textual name or label assigned to a dwc:Organism instance.", + "group": "Organism", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/organismRemarks": { + "name": "organismRemarks", + "description": "Comments or notes about the dwc:Organism instance.", + "group": "Organism", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/organismScope": { + "name": "organismScope", + "description": "A description of the kind of dwc:Organism instance.", + "group": "Organism", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/previousIdentifications": { + "name": "previousIdentifications", + "description": "A list (concatenated and separated) of previous assignments of names to the dwc:Organism.", + "group": "Organism", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/digitalSpecimenID": { + "name": "digitalSpecimenID", + "description": "An identifier for a particular instance of a Digital Specimen.", + "group": "MaterialEntity", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/materialEntityID": { + "name": "materialEntityID", + "description": "An identifier for a particular instance of a dwc:MaterialEntity.", + "group": "MaterialEntity", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/materialEntityRemarks": { + "name": "materialEntityRemarks", + "description": "Comments or notes about the dwc:MaterialEntity instance.", + "group": "MaterialEntity", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/materialEntityType": { + "name": "materialEntityType", + "description": "A category that best matches the nature of a dwc:MaterialEntity.", + "group": "MaterialEntity", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/verbatimLabel": { + "name": "verbatimLabel", + "description": "The content of this term should include no embellishments, prefixes, headers or other additions made to the text.", + "group": "MaterialEntity", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/materialSampleID": { + "name": "materialSampleID", + "description": "An identifier for the dwc:MaterialSample (as opposed to a particular digital record of the dwc:MaterialSample).", + "group": "MaterialSample", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/eventID": { + "name": "eventID", + "description": "An identifier for the set of information associated with a dwc:Event (something that occurs at a place and time).", + "group": "Event", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/eventType": { + "name": "eventType", + "description": "The nature of the dwc:Event.", + "group": "Event", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/fieldNotes": { + "name": "fieldNotes", + "description": "One of a) an indicator of the existence of, b) a reference to (publication, URI), or c) the text of notes taken in the field about the dwc:Event.", + "group": "Event", + "mappingPaths": [] + }, + "http://rs.tdwg.org/ac/terms/fundingAttribution": { + "name": "fundingAttribution", + "description": "Text description of organizations or individuals who funded the creation of the resource.", + "group": "Event", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/fundingAttributionID": { + "name": "fundingAttributionID", + "description": "A list (concatenated and separated) of the globally unique identifiers for the funding organizations or agencies that supported the project.", + "group": "Event", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/parentEventID": { + "name": "parentEventID", + "description": "An identifier for the broader dwc:Event that groups this and potentially other dwc:Events.", + "group": "Event", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/projectID": { + "name": "projectID", + "description": "A list (concatenated and separated) of identifiers for projects that contributed to a dwc:Event.", + "group": "Event", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/projectTitle": { + "name": "projectTitle", + "description": "A list (concatenated and separated) of titles or names for projects that contributed to a dwc:Event.", + "group": "Event", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/sampleSizeUnit": { + "name": "sampleSizeUnit", + "description": "The unit of measurement of the size (time duration, length, area, or volume) of a sample in a sampling dwc:Event.", + "group": "Event", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/sampleSizeValue": { + "name": "sampleSizeValue", + "description": "A numeric value for a measurement of the size (time duration, length, area, or volume) of a sample in a sampling dwc:Event.", + "group": "Event", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/samplingEffort": { + "name": "samplingEffort", + "description": "The amount of effort expended during a dwc:Event.", + "group": "Event", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/coordinatePrecision": { + "name": "coordinatePrecision", + "description": "A decimal representation of the precision of the coordinates given in the dwc:decimalLatitude and dwc:decimalLongitude.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/footprintSRS": { + "name": "footprintSRS", + "description": "The ellipsoid, geodetic datum, or spatial reference system (SRS) upon which the geometry given in dwc:footprintWKT is based.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/footprintSpatialFit": { + "name": "footprintSpatialFit", + "description": "The ratio of the area of the dwc:footprintWKT to the area of the true (original, or most specific) spatial representation of the dcterms:Location.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/footprintWKT": { + "name": "footprintWKT", + "description": "A Well-Known Text (WKT) representation of the shape (footprint, geometry) that defines the dcterms:Location.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/georeferenceProtocol": { + "name": "georeferenceProtocol", + "description": "A description or reference to the methods used to determine the spatial footprint, coordinates, and uncertainties.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/georeferenceRemarks": { + "name": "georeferenceRemarks", + "description": "Comments or notes about the spatial description determination, explaining assumptions made in addition or opposition to the those formalized in the method referred to in dwc:georeferenceProtocol.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/georeferenceSources": { + "name": "georeferenceSources", + "description": "A list (concatenated and separated) of maps, gazetteers, or other resources used to georeference the dcterms:Location, described specifically enough to allow anyone in the future to use the same resources.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/georeferencedBy": { + "name": "georeferencedBy", + "description": "A list (concatenated and separated) of names of people, groups, or organizations who determined the georeference (spatial representation) for the dcterms:Location.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/georeferencedDate": { + "name": "georeferencedDate", + "description": "The date on which the dcterms:Location was georeferenced.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/higherGeography": { + "name": "higherGeography", + "description": "A list (concatenated and separated) of geographic names less specific than the information captured in the dwc:locality term.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/higherGeographyID": { + "name": "higherGeographyID", + "description": "An identifier for the geographic region within which the dcterms:Location occurred.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/island": { + "name": "island", + "description": "The name of the island on or near which the dcterms:Location occurs.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/islandGroup": { + "name": "islandGroup", + "description": "The name of the island group in which the dcterms:Location occurs.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/locationAccordingTo": { + "name": "locationAccordingTo", + "description": "Information about the source of this dcterms:Location information.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/locationID": { + "name": "locationID", + "description": "An identifier for the set of dcterms:Location information.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/locationRemarks": { + "name": "locationRemarks", + "description": "Comments or notes about the dcterms:Location.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/maximumDepthInMeters": { + "name": "maximumDepthInMeters", + "description": "The greater depth of a range of depth below the local surface, in meters.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/maximumDistanceAboveSurfaceInMeters": { + "name": "maximumDistanceAboveSurfaceInMeters", + "description": "The greater distance in a range of distance from a reference surface in the vertical direction, in meters.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/minimumDepthInMeters": { + "name": "minimumDepthInMeters", + "description": "The lesser depth of a range of depth below the local surface, in meters.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/minimumDistanceAboveSurfaceInMeters": { + "name": "minimumDistanceAboveSurfaceInMeters", + "description": "The lesser distance in a range of distance from a reference surface in the vertical direction, in meters.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/pointRadiusSpatialFit": { + "name": "pointRadiusSpatialFit", + "description": "The ratio of the area of the point-radius (dwc:decimalLatitude, dwc:decimalLongitude, dwc:coordinateUncertaintyInMeters) to the area of the true (original, or most specific) spatial representation of the dcterms:Location.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/verbatimCoordinateSystem": { + "name": "verbatimCoordinateSystem", + "description": "The coordinate format for the dwc:verbatimLatitude and dwc:verbatimLongitude or the dwc:verbatimCoordinates of the dcterms:Location.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/verbatimDepth": { + "name": "verbatimDepth", + "description": "The original description of the depth below the local surface.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/verbatimElevation": { + "name": "verbatimElevation", + "description": "The original description of the elevation (altitude, usually above sea level) of the Location.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/verbatimSRS": { + "name": "verbatimSRS", + "description": "The ellipsoid, geodetic datum, or spatial reference system (SRS) upon which coordinates given in dwc:verbatimLatitude and dwc:verbatimLongitude, or dwc:verbatimCoordinates are based.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/verticalDatum": { + "name": "verticalDatum", + "description": "The vertical datum used as the reference upon which the values in the elevation terms are based.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/waterBody": { + "name": "waterBody", + "description": "The name of the water body in which the dcterms:Location occurs.", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/bed": { + "name": "bed", + "description": "The full name of the lithostratigraphic bed from which the dwc:MaterialEntity was collected.", + "group": "GeologicalContext", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/earliestAgeOrLowestStage": { + "name": "earliestAgeOrLowestStage", + "description": "The full name of the earliest possible geochronologic age or lowest chronostratigraphic stage attributable to the stratigraphic horizon from which the dwc:MaterialEntity was collected.", + "group": "GeologicalContext", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/earliestEonOrLowestEonothem": { + "name": "earliestEonOrLowestEonothem", + "description": "The full name of the earliest possible geochronologic eon or lowest chrono-stratigraphic eonothem or the informal name (\"Precambrian\") attributable to the stratigraphic horizon from which the dwc:MaterialEntity was collected.", + "group": "GeologicalContext", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/earliestEpochOrLowestSeries": { + "name": "earliestEpochOrLowestSeries", + "description": "The full name of the earliest possible geochronologic epoch or lowest chronostratigraphic series attributable to the stratigraphic horizon from which the dwc:MaterialEntity was collected.", + "group": "GeologicalContext", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/earliestEraOrLowestErathem": { + "name": "earliestEraOrLowestErathem", + "description": "The full name of the earliest possible geochronologic era or lowest chronostratigraphic erathem attributable to the stratigraphic horizon from which the dwc:MaterialEntity was collected.", + "group": "GeologicalContext", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/earliestPeriodOrLowestSystem": { + "name": "earliestPeriodOrLowestSystem", + "description": "The full name of the earliest possible geochronologic period or lowest chronostratigraphic system attributable to the stratigraphic horizon from which the dwc:MaterialEntity was collected.", + "group": "GeologicalContext", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/formation": { + "name": "formation", + "description": "The full name of the lithostratigraphic formation from which the dwc:MaterialEntity was collected.", + "group": "GeologicalContext", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/geologicalContextID": { + "name": "geologicalContextID", + "description": "An identifier for the set of information associated with a dwc:GeologicalContext (the location within a geological context, such as stratigraphy).", + "group": "GeologicalContext", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/group": { + "name": "group", + "description": "The full name of the lithostratigraphic group from which the dwc:MaterialEntity was collected.", + "group": "GeologicalContext", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/highestBiostratigraphicZone": { + "name": "highestBiostratigraphicZone", + "description": "The full name of the highest possible geological biostratigraphic zone of the stratigraphic horizon from which the dwc:MaterialEntity was collected.", + "group": "GeologicalContext", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/latestAgeOrHighestStage": { + "name": "latestAgeOrHighestStage", + "description": "The full name of the latest possible geochronologic age or highest chronostratigraphic stage attributable to the stratigraphic horizon from which the dwc:MaterialEntity was collected.", + "group": "GeologicalContext", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/latestEonOrHighestEonothem": { + "name": "latestEonOrHighestEonothem", + "description": "The full name of the latest possible geochronologic eon or highest chrono-stratigraphic eonothem or the informal name (\"Precambrian\") attributable to the stratigraphic horizon from which the dwc:MaterialEntity was collected.", + "group": "GeologicalContext", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/latestEpochOrHighestSeries": { + "name": "latestEpochOrHighestSeries", + "description": "The full name of the latest possible geochronologic epoch or highest chronostratigraphic series attributable to the stratigraphic horizon from which the dwc:MaterialEntity was collected.", + "group": "GeologicalContext", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/latestEraOrHighestErathem": { + "name": "latestEraOrHighestErathem", + "description": "The full name of the latest possible geochronologic era or highest chronostratigraphic erathem attributable to the stratigraphic horizon from which the dwc:MaterialEntity was collected.", + "group": "GeologicalContext", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/latestPeriodOrHighestSystem": { + "name": "latestPeriodOrHighestSystem", + "description": "The full name of the latest possible geochronologic period or highest chronostratigraphic system attributable to the stratigraphic horizon from which the dwc:MaterialEntity was collected.", + "group": "GeologicalContext", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/lithostratigraphicTerms": { + "name": "lithostratigraphicTerms", + "description": "The combination of all lithostratigraphic names for the rock from which the dwc:MaterialEntity was collected.", + "group": "GeologicalContext", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/lowestBiostratigraphicZone": { + "name": "lowestBiostratigraphicZone", + "description": "The full name of the lowest possible geological biostratigraphic zone of the stratigraphic horizon from which the dwc:MaterialEntity was collected.", + "group": "GeologicalContext", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/member": { + "name": "member", + "description": "The full name of the lithostratigraphic member from which the dwc:MaterialEntity was collected.", + "group": "GeologicalContext", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/identificationID": { + "name": "identificationID", + "description": "An identifier for the dwc:Identification (the body of information associated with the assignment of a scientific name).", + "group": "Identification", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/identificationQualifier": { + "name": "identificationQualifier", + "description": "A brief phrase or a standard term (\"cf.", + "group": "Identification", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/identificationReferences": { + "name": "identificationReferences", + "description": "A list (concatenated and separated) of references (publication, global unique identifier, URI) used in the dwc:Identification.", + "group": "Identification", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/identificationVerificationStatus": { + "name": "identificationVerificationStatus", + "description": "A categorical indicator of the extent to which the taxonomic identification has been verified to be correct.", + "group": "Identification", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/identifiedByID": { + "name": "identifiedByID", + "description": "A list (concatenated and separated) of the globally unique identifier for the person, people, groups, or organizations responsible for assigning the dwc:Taxon to the subject.", + "group": "Identification", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/typifiedName": { + "name": "typifiedName", + "description": "A scientific name that is based on a type specimen.", + "group": "Identification", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/verbatimIdentification": { + "name": "verbatimIdentification", + "description": "A string representing the taxonomic identification as it appeared in the original record.", + "group": "Identification", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/acceptedNameUsage": { + "name": "acceptedNameUsage", + "description": "The full name, with authorship and date information if known, of the currently valid (zoological) or accepted (botanical) dwc:Taxon.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/acceptedNameUsageID": { + "name": "acceptedNameUsageID", + "description": "An identifier for the name usage (documented meaning of the name according to a source) of the currently valid (zoological) or accepted (botanical) taxon.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/cultivarEpithet": { + "name": "cultivarEpithet", + "description": "Part of the name of a cultivar, cultivar group or grex that follows the dwc:scientificName.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/genericName": { + "name": "genericName", + "description": "The genus part of the dwc:scientificName without authorship.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/infragenericEpithet": { + "name": "infragenericEpithet", + "description": "The infrageneric part of a binomial name at ranks above species but below genus.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/nameAccordingTo": { + "name": "nameAccordingTo", + "description": "The reference to the source in which the specific taxon concept circumscription is defined or implied - traditionally signified by the Latin \"sensu\" or \"sec.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/nameAccordingToID": { + "name": "nameAccordingToID", + "description": "An identifier for the source in which the specific taxon concept circumscription is defined or implied.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/namePublishedIn": { + "name": "namePublishedIn", + "description": "A reference for the publication in which the dwc:scientificName was originally established under the rules of the associated dwc:nomenclaturalCode.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/namePublishedInID": { + "name": "namePublishedInID", + "description": "An identifier for the publication in which the dwc:scientificName was originally established under the rules of the associated dwc:nomenclaturalCode.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/namePublishedInYear": { + "name": "namePublishedInYear", + "description": "The four-digit year in which the dwc:scientificName was published.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/nomenclaturalCode": { + "name": "nomenclaturalCode", + "description": "The nomenclatural code (or codes in the case of an ambiregnal name) under which the dwc:scientificName is constructed.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/nomenclaturalStatus": { + "name": "nomenclaturalStatus", + "description": "The status related to the original publication of the name and its conformance to the relevant rules of nomenclature.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/originalNameUsage": { + "name": "originalNameUsage", + "description": "The taxon name, with authorship and date information if known, as it originally appeared when first established under the rules of the associated dwc:nomenclaturalCode.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/originalNameUsageID": { + "name": "originalNameUsageID", + "description": "An identifier for the name usage (documented meaning of the name according to a source) in which the terminal element of the dwc:scientificName was originally established under the rules of the associated dwc:nomenclaturalCode.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/parentNameUsage": { + "name": "parentNameUsage", + "description": "The full name, with authorship and date information if known, of the direct, most proximate higher-rank parent dwc:Taxon (in a classification) of the most specific element of the dwc:scientificName.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/parentNameUsageID": { + "name": "parentNameUsageID", + "description": "An identifier for the name usage (documented meaning of the name according to a source) of the direct, most proximate higher-rank parent taxon (in a classification) of the most specific element of the dwc:scientificName.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/scientificNameID": { + "name": "scientificNameID", + "description": "An identifier for the nomenclatural (not taxonomic) details of a scientific name.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/subfamily": { + "name": "subfamily", + "description": "The full scientific name of the subfamily in which the dwc:Taxon is classified.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/subgenus": { + "name": "subgenus", + "description": "The full scientific name of the subgenus in which the dwc:Taxon is classified.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/subtribe": { + "name": "subtribe", + "description": "The full scientific name of the subtribe in which the dwc:Taxon is classified.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/superfamily": { + "name": "superfamily", + "description": "The full scientific name of the superfamily in which the dwc:Taxon is classified.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/taxonConceptID": { + "name": "taxonConceptID", + "description": "An identifier for the taxonomic concept to which the record refers - not for the nomenclatural details of a dwc:Taxon.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/taxonID": { + "name": "taxonID", + "description": "An identifier for the set of dwc:Taxon information.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/taxonRemarks": { + "name": "taxonRemarks", + "description": "Comments or notes about the taxon or name.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/tribe": { + "name": "tribe", + "description": "The full scientific name of the tribe in which the dwc:Taxon is classified.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/verbatimTaxonRank": { + "name": "verbatimTaxonRank", + "description": "The taxonomic rank of the most specific name in the dwc:scientificName as it appears in the original record.", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/measurementAccuracy": { + "name": "measurementAccuracy", + "description": "The description of the potential error associated with the dwc:measurementValue.", + "group": "MeasurementOrFact", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/measurementDeterminedBy": { + "name": "measurementDeterminedBy", + "description": "A list (concatenated and separated) of names of people, groups, or organizations who determined the value of the dwc:MeasurementOrFact.", + "group": "MeasurementOrFact", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/measurementDeterminedDate": { + "name": "measurementDeterminedDate", + "description": "The date on which the dwc:MeasurementOrFact was made.", + "group": "MeasurementOrFact", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/measurementID": { + "name": "measurementID", + "description": "An identifier for the dwc:MeasurementOrFact (information pertaining to measurements, facts, characteristics, or assertions).", + "group": "MeasurementOrFact", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/measurementMethod": { + "name": "measurementMethod", + "description": "A description of or reference to (publication, URI) the method or protocol used to determine the measurement, fact, characteristic, or assertion.", + "group": "MeasurementOrFact", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/measurementRemarks": { + "name": "measurementRemarks", + "description": "Comments or notes accompanying the dwc:MeasurementOrFact.", + "group": "MeasurementOrFact", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/measurementType": { + "name": "measurementType", + "description": "The nature of the measurement, fact, characteristic, or assertion.", + "group": "MeasurementOrFact", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/measurementUnit": { + "name": "measurementUnit", + "description": "The units associated with the dwc:measurementValue.", + "group": "MeasurementOrFact", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/measurementValue": { + "name": "measurementValue", + "description": "The value of the measurement, fact, characteristic, or assertion.", + "group": "MeasurementOrFact", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/parentMeasurementID": { + "name": "parentMeasurementID", + "description": "An identifier for a broader dwc:MeasurementOrFact that groups this and potentially other dwc:MeasurementOrFacts.", + "group": "MeasurementOrFact", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/verbatimMeasurementType": { + "name": "verbatimMeasurementType", + "description": "A string representing the type of measurement or fact as it appeared in the original record.", + "group": "MeasurementOrFact", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/relatedResourceID": { + "name": "relatedResourceID", + "description": "An identifier for a related resource (the object, rather than the subject of the relationship).", + "group": "ResourceRelationship", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/relationshipAccordingTo": { + "name": "relationshipAccordingTo", + "description": "The source (person, organization, publication, reference) establishing the relationship between the two resources.", + "group": "ResourceRelationship", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/relationshipEstablishedDate": { + "name": "relationshipEstablishedDate", + "description": "The date-time on which the relationship between the two resources was established.", + "group": "ResourceRelationship", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/relationshipOfResource": { + "name": "relationshipOfResource", + "description": "The relationship of the subject (identified by dwc:resourceID) to the object (identified by dwc:relatedResourceID).", + "group": "ResourceRelationship", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/relationshipOfResourceID": { + "name": "relationshipOfResourceID", + "description": "An identifier for the relationship type (predicate) that connects the subject identified by dwc:resourceID to its object identified by dwc:relatedResourceID.", + "group": "ResourceRelationship", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/relationshipRemarks": { + "name": "relationshipRemarks", + "description": "Comments or notes about the relationship between the two resources.", + "group": "ResourceRelationship", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/resourceID": { + "name": "resourceID", + "description": "An identifier for the resource that is the subject of the relationship.", + "group": "ResourceRelationship", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/resourceRelationshipID": { + "name": "resourceRelationshipID", + "description": "An identifier for an instance of relationship between one resource (the subject) and another (dwc:relatedResource, the object).", + "group": "ResourceRelationship", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/collectionID": { + "name": "collectionID", + "description": "An identifier for the collection or dataset from which the record was derived.", + "group": "Record-level", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/datasetID": { + "name": "datasetID", + "description": "An identifier for the set of data.", + "group": "Record-level", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/discipline": { + "name": "discipline", + "description": "The primary branch or branches of knowledge represented by the record.", + "group": "Record-level", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/dynamicProperties": { + "name": "dynamicProperties", + "description": "A list of additional measurements, facts, characteristics, or assertions about the record.", + "group": "Record-level", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/feedbackURL": { + "name": "feedbackURL", + "description": "A uniform resource locator (URL) that points to a webpage on which a form may be submitted to gather feedback about the record.", + "group": "Record-level", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/institutionID": { + "name": "institutionID", + "description": "An identifier for the institution having custody of the object(s) or information referred to in the record.", + "group": "Record-level", + "mappingPaths": [] + } + } + }, + "dc": { + "name": "Dublin Core", + "abbreviation": "dc", + "vocabularyURI": "http://purl.org/dc/terms/", + "description": "Dublin Core metadata terms", + "terms": { + "http://purl.org/dc/terms/type": { + "name": "type", + "description": "The nature or genre of the resource", + "group": "Record", + "mappingPaths": [] + }, + "http://purl.org/dc/terms/modified": { + "name": "modified", + "description": "The most recent date-time on which the resource was changed", + "group": "Record", + "mappingPaths": [ + [ + "CollectionObject", + "timestampModified" + ] + ] + }, + "http://purl.org/dc/terms/language": { + "name": "language", + "description": "A language of the resource", + "group": "Record", + "mappingPaths": [] + }, + "http://purl.org/dc/terms/license": { + "name": "license", + "description": "A legal document giving official permission to do something with the resource", + "group": "Record", + "mappingPaths": [] + }, + "http://purl.org/dc/terms/rightsHolder": { + "name": "rightsHolder", + "description": "A person or organization owning or managing rights over the resource", + "group": "Record", + "mappingPaths": [] + }, + "http://purl.org/dc/terms/accessRights": { + "name": "accessRights", + "description": "Information about who can access the resource or an indication of its security status", + "group": "Record", + "mappingPaths": [] + }, + "http://purl.org/dc/terms/bibliographicCitation": { + "name": "bibliographicCitation", + "description": "A bibliographic reference for the resource", + "group": "Record", + "mappingPaths": [] + }, + "http://purl.org/dc/terms/references": { + "name": "references", + "description": "A related resource that is referenced, cited, or otherwise pointed to by the described resource", + "group": "Record", + "mappingPaths": [] + } + } + }, + "ac": { + "name": "Audiovisual Core", + "abbreviation": "ac", + "vocabularyURI": "http://rs.tdwg.org/ac/terms/", + "description": "Audiovisual Core terms for multimedia resources", + "terms": { + "http://rs.tdwg.org/ac/terms/accessURI": { + "name": "accessURI", + "description": "A URI that uniquely identifies a service that provides a representation of the underlying resource", + "group": "Media", + "mappingPaths": [ + [ + "CollectionObject", + "collectionObjectAttachments", + "attachment", + "attachmentLocation" + ] + ] + }, + "http://purl.org/dc/terms/format": { + "name": "format", + "description": "The file format, physical medium, or dimensions of the resource", + "group": "Media", + "mappingPaths": [ + [ + "CollectionObject", + "collectionObjectAttachments", + "attachment", + "mimeType" + ] + ] + }, + "http://rs.tdwg.org/ac/terms/subtype": { + "name": "subtype", + "description": "Any type term from the vocabulary of types used that further refines the media type", + "group": "Media", + "mappingPaths": [] + }, + "http://rs.tdwg.org/ac/terms/caption": { + "name": "caption", + "description": "Text to be displayed together with the media representation", + "group": "Media", + "mappingPaths": [ + [ + "CollectionObject", + "collectionObjectAttachments", + "attachment", + "title" + ] + ] + }, + "http://rs.tdwg.org/ac/terms/tag": { + "name": "tag", + "description": "A tag or keyword associated with the media item", + "group": "Media", + "mappingPaths": [] + } + } + } + } +} \ No newline at end of file diff --git a/specifyweb/backend/export/specify_dwc.json b/specifyweb/backend/export/specify_dwc.json new file mode 100644 index 00000000000..ffd9635a496 --- /dev/null +++ b/specifyweb/backend/export/specify_dwc.json @@ -0,0 +1,526 @@ +{ + "vocabularies": { + "dwc": { + "name": "Darwin Core", + "abbreviation": "dwc", + "vocabularyURI": "http://rs.tdwg.org/dwc/terms/", + "description": "Darwin Core standard terms for biodiversity data", + "terms": { + "http://rs.tdwg.org/dwc/terms/occurrenceID": { + "name": "occurrenceID", + "description": "An identifier for the Occurrence", + "group": "Occurrence", + "mappingPaths": [["CollectionObject", "guid"]] + }, + "http://rs.tdwg.org/dwc/terms/catalogNumber": { + "name": "catalogNumber", + "description": "An identifier for the record within the data set or collection", + "group": "Occurrence", + "mappingPaths": [["CollectionObject", "catalogNumber"]] + }, + "http://rs.tdwg.org/dwc/terms/recordedBy": { + "name": "recordedBy", + "description": "A person, group, or organization responsible for recording the original Occurrence", + "group": "Occurrence", + "mappingPaths": [["CollectionObject", "collectingEvent", "collectors", "agent", "lastName"]] + }, + "http://rs.tdwg.org/dwc/terms/individualCount": { + "name": "individualCount", + "description": "The number of individuals present at the time of the Occurrence", + "group": "Occurrence", + "mappingPaths": [["CollectionObject", "countAmt"]] + }, + "http://rs.tdwg.org/dwc/terms/sex": { + "name": "sex", + "description": "The sex of the biological individual(s) represented in the Occurrence", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/lifeStage": { + "name": "lifeStage", + "description": "The age class or life stage of the Organism(s) at the time the Occurrence was recorded", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/reproductiveCondition": { + "name": "reproductiveCondition", + "description": "The reproductive condition of the biological individual(s) represented in the Occurrence", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/behavior": { + "name": "behavior", + "description": "The behavior shown by the subject at the time the Occurrence was recorded", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/preparations": { + "name": "preparations", + "description": "A list of preparations and preservation methods for a specimen", + "group": "Occurrence", + "mappingPaths": [["CollectionObject", "preparations", "prepType", "name"]] + }, + "http://rs.tdwg.org/dwc/terms/disposition": { + "name": "disposition", + "description": "The current state of a specimen with respect to the collection identified in collectionCode", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/otherCatalogNumbers": { + "name": "otherCatalogNumbers", + "description": "A list of previous or alternate fully qualified catalog numbers", + "group": "Occurrence", + "mappingPaths": [["CollectionObject", "altCatalogNumber"]] + }, + "http://rs.tdwg.org/dwc/terms/occurrenceRemarks": { + "name": "occurrenceRemarks", + "description": "Comments or notes about the Occurrence", + "group": "Occurrence", + "mappingPaths": [["CollectionObject", "remarks"]] + }, + "http://rs.tdwg.org/dwc/terms/eventDate": { + "name": "eventDate", + "description": "The date-time or interval during which an Event occurred", + "group": "Event", + "mappingPaths": [["CollectionObject", "collectingEvent", "startDate"]] + }, + "http://rs.tdwg.org/dwc/terms/eventTime": { + "name": "eventTime", + "description": "The time or interval during which an Event occurred", + "group": "Event", + "mappingPaths": [["CollectionObject", "collectingEvent", "startTime"]] + }, + "http://rs.tdwg.org/dwc/terms/startDayOfYear": { + "name": "startDayOfYear", + "description": "The earliest integer day of the year on which the Event occurred", + "group": "Event", + "mappingPaths": [["CollectionObject", "collectingEvent", "startDateNumericDay"]] + }, + "http://rs.tdwg.org/dwc/terms/endDayOfYear": { + "name": "endDayOfYear", + "description": "The latest integer day of the year on which the Event occurred", + "group": "Event", + "mappingPaths": [["CollectionObject", "collectingEvent", "endDateNumericDay"]] + }, + "http://rs.tdwg.org/dwc/terms/year": { + "name": "year", + "description": "The four-digit year in which the Event occurred", + "group": "Event", + "mappingPaths": [["CollectionObject", "collectingEvent", "startDateNumericYear"]] + }, + "http://rs.tdwg.org/dwc/terms/month": { + "name": "month", + "description": "The integer month in which the Event occurred", + "group": "Event", + "mappingPaths": [["CollectionObject", "collectingEvent", "startDateNumericMonth"]] + }, + "http://rs.tdwg.org/dwc/terms/day": { + "name": "day", + "description": "The integer day of the month on which the Event occurred", + "group": "Event", + "mappingPaths": [["CollectionObject", "collectingEvent", "startDateNumericDay"]] + }, + "http://rs.tdwg.org/dwc/terms/verbatimEventDate": { + "name": "verbatimEventDate", + "description": "The verbatim original representation of the date and time information for an Event", + "group": "Event", + "mappingPaths": [["CollectionObject", "collectingEvent", "verbatimDate"]] + }, + "http://rs.tdwg.org/dwc/terms/habitat": { + "name": "habitat", + "description": "A category or description of the habitat in which the Event occurred", + "group": "Event", + "mappingPaths": [["CollectionObject", "collectingEvent", "collectingEventAttribute", "text1"]] + }, + "http://rs.tdwg.org/dwc/terms/samplingProtocol": { + "name": "samplingProtocol", + "description": "The names of, references to, or descriptions of the methods or protocols used during an Event", + "group": "Event", + "mappingPaths": [["CollectionObject", "collectingEvent", "method"]] + }, + "http://rs.tdwg.org/dwc/terms/fieldNumber": { + "name": "fieldNumber", + "description": "An identifier given to the event in the field", + "group": "Event", + "mappingPaths": [["CollectionObject", "fieldNumber"]] + }, + "http://rs.tdwg.org/dwc/terms/eventRemarks": { + "name": "eventRemarks", + "description": "Comments or notes about the Event", + "group": "Event", + "mappingPaths": [["CollectionObject", "collectingEvent", "remarks"]] + }, + "http://rs.tdwg.org/dwc/terms/continent": { + "name": "continent", + "description": "The name of the continent in which the Location occurs", + "group": "Location", + "mappingPaths": [["CollectionObject", "collectingEvent", "locality", "geography", "continent"]] + }, + "http://rs.tdwg.org/dwc/terms/country": { + "name": "country", + "description": "The name of the country or major administrative unit in which the Location occurs", + "group": "Location", + "mappingPaths": [["CollectionObject", "collectingEvent", "locality", "geography", "country"]] + }, + "http://rs.tdwg.org/dwc/terms/countryCode": { + "name": "countryCode", + "description": "The standard code for the country in which the Location occurs", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/stateProvince": { + "name": "stateProvince", + "description": "The name of the next smaller administrative region than country", + "group": "Location", + "mappingPaths": [["CollectionObject", "collectingEvent", "locality", "geography", "state"]] + }, + "http://rs.tdwg.org/dwc/terms/county": { + "name": "county", + "description": "The full, unabbreviated name of the next smaller administrative region than stateProvince", + "group": "Location", + "mappingPaths": [["CollectionObject", "collectingEvent", "locality", "geography", "county"]] + }, + "http://rs.tdwg.org/dwc/terms/municipality": { + "name": "municipality", + "description": "The full, unabbreviated name of the next smaller administrative region than county", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/locality": { + "name": "locality", + "description": "The specific description of the place", + "group": "Location", + "mappingPaths": [["CollectionObject", "collectingEvent", "locality", "localityName"]] + }, + "http://rs.tdwg.org/dwc/terms/verbatimLocality": { + "name": "verbatimLocality", + "description": "The original textual description of the place", + "group": "Location", + "mappingPaths": [["CollectionObject", "collectingEvent", "locality", "verbatimLocality"]] + }, + "http://rs.tdwg.org/dwc/terms/minimumElevationInMeters": { + "name": "minimumElevationInMeters", + "description": "The lower limit of the range of elevation", + "group": "Location", + "mappingPaths": [["CollectionObject", "collectingEvent", "locality", "minElevation"]] + }, + "http://rs.tdwg.org/dwc/terms/maximumElevationInMeters": { + "name": "maximumElevationInMeters", + "description": "The upper limit of the range of elevation", + "group": "Location", + "mappingPaths": [["CollectionObject", "collectingEvent", "locality", "maxElevation"]] + }, + "http://rs.tdwg.org/dwc/terms/decimalLatitude": { + "name": "decimalLatitude", + "description": "The geographic latitude in decimal degrees of the geographic center of a Location", + "group": "Location", + "mappingPaths": [["CollectionObject", "collectingEvent", "locality", "latitude1"]] + }, + "http://rs.tdwg.org/dwc/terms/decimalLongitude": { + "name": "decimalLongitude", + "description": "The geographic longitude in decimal degrees of the geographic center of a Location", + "group": "Location", + "mappingPaths": [["CollectionObject", "collectingEvent", "locality", "longitude1"]] + }, + "http://rs.tdwg.org/dwc/terms/geodeticDatum": { + "name": "geodeticDatum", + "description": "The ellipsoid, geodetic datum, or spatial reference system used in decimalLatitude and decimalLongitude", + "group": "Location", + "mappingPaths": [["CollectionObject", "collectingEvent", "locality", "datum"]] + }, + "http://rs.tdwg.org/dwc/terms/coordinateUncertaintyInMeters": { + "name": "coordinateUncertaintyInMeters", + "description": "The horizontal distance from the given decimalLatitude and decimalLongitude describing the smallest circle containing the whole of the Location", + "group": "Location", + "mappingPaths": [["CollectionObject", "collectingEvent", "locality", "latLongAccuracy"]] + }, + "http://rs.tdwg.org/dwc/terms/verbatimCoordinates": { + "name": "verbatimCoordinates", + "description": "The verbatim original spatial coordinates of the Location", + "group": "Location", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/verbatimLatitude": { + "name": "verbatimLatitude", + "description": "The verbatim original latitude of the Location", + "group": "Location", + "mappingPaths": [["CollectionObject", "collectingEvent", "locality", "verbatimLatitude"]] + }, + "http://rs.tdwg.org/dwc/terms/verbatimLongitude": { + "name": "verbatimLongitude", + "description": "The verbatim original longitude of the Location", + "group": "Location", + "mappingPaths": [["CollectionObject", "collectingEvent", "locality", "verbatimLongitude"]] + }, + "http://rs.tdwg.org/dwc/terms/kingdom": { + "name": "kingdom", + "description": "The full scientific name of the kingdom in which the taxon is classified", + "group": "Taxon", + "mappingPaths": [["CollectionObject", "determinations", "taxon", "kingdom"]] + }, + "http://rs.tdwg.org/dwc/terms/phylum": { + "name": "phylum", + "description": "The full scientific name of the phylum in which the taxon is classified", + "group": "Taxon", + "mappingPaths": [["CollectionObject", "determinations", "taxon", "phylum"]] + }, + "http://rs.tdwg.org/dwc/terms/class": { + "name": "class", + "description": "The full scientific name of the class in which the taxon is classified", + "group": "Taxon", + "mappingPaths": [["CollectionObject", "determinations", "taxon", "class"]] + }, + "http://rs.tdwg.org/dwc/terms/order": { + "name": "order", + "description": "The full scientific name of the order in which the taxon is classified", + "group": "Taxon", + "mappingPaths": [["CollectionObject", "determinations", "taxon", "order"]] + }, + "http://rs.tdwg.org/dwc/terms/family": { + "name": "family", + "description": "The full scientific name of the family in which the taxon is classified", + "group": "Taxon", + "mappingPaths": [["CollectionObject", "determinations", "taxon", "family"]] + }, + "http://rs.tdwg.org/dwc/terms/genus": { + "name": "genus", + "description": "The full scientific name of the genus in which the taxon is classified", + "group": "Taxon", + "mappingPaths": [["CollectionObject", "determinations", "taxon", "genus"]] + }, + "http://rs.tdwg.org/dwc/terms/specificEpithet": { + "name": "specificEpithet", + "description": "The name of the first or species epithet of the scientificName", + "group": "Taxon", + "mappingPaths": [["CollectionObject", "determinations", "taxon", "species"]] + }, + "http://rs.tdwg.org/dwc/terms/infraspecificEpithet": { + "name": "infraspecificEpithet", + "description": "The name of the lowest or terminal infraspecific epithet of the scientificName", + "group": "Taxon", + "mappingPaths": [["CollectionObject", "determinations", "taxon", "subspecies"]] + }, + "http://rs.tdwg.org/dwc/terms/taxonRank": { + "name": "taxonRank", + "description": "The taxonomic rank of the most specific name in the scientificName", + "group": "Taxon", + "mappingPaths": [["CollectionObject", "determinations", "taxon", "rankId"]] + }, + "http://rs.tdwg.org/dwc/terms/scientificName": { + "name": "scientificName", + "description": "The full scientific name, with authorship and date information if known", + "group": "Taxon", + "mappingPaths": [["CollectionObject", "determinations", "taxon", "fullName"]] + }, + "http://rs.tdwg.org/dwc/terms/scientificNameAuthorship": { + "name": "scientificNameAuthorship", + "description": "The authorship information for the scientificName", + "group": "Taxon", + "mappingPaths": [["CollectionObject", "determinations", "taxon", "author"]] + }, + "http://rs.tdwg.org/dwc/terms/vernacularName": { + "name": "vernacularName", + "description": "A common or vernacular name", + "group": "Taxon", + "mappingPaths": [["CollectionObject", "determinations", "taxon", "commonName"]] + }, + "http://rs.tdwg.org/dwc/terms/higherClassification": { + "name": "higherClassification", + "description": "A list of taxa names terminating at the rank immediately superior to the referenced taxon", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/taxonomicStatus": { + "name": "taxonomicStatus", + "description": "The status of the use of the scientificName as a label for a taxon", + "group": "Taxon", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/basisOfRecord": { + "name": "basisOfRecord", + "description": "The specific nature of the data record", + "group": "Record", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/institutionCode": { + "name": "institutionCode", + "description": "The name or acronym in use by the institution having custody of the object(s) or information referred to in the record", + "group": "Record", + "mappingPaths": [["CollectionObject", "collection", "institution", "code"]] + }, + "http://rs.tdwg.org/dwc/terms/collectionCode": { + "name": "collectionCode", + "description": "The name, acronym, coden, or initialism identifying the collection or data set from which the record was derived", + "group": "Record", + "mappingPaths": [["CollectionObject", "collection", "code"]] + }, + "http://rs.tdwg.org/dwc/terms/datasetName": { + "name": "datasetName", + "description": "The name identifying the data set from which the record was derived", + "group": "Record", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/ownerInstitutionCode": { + "name": "ownerInstitutionCode", + "description": "The name or acronym in use by the institution having ownership of the object(s) or information referred to in the record", + "group": "Record", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/informationWithheld": { + "name": "informationWithheld", + "description": "Additional information that exists, but that has not been shared in the given record", + "group": "Record", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/dataGeneralizations": { + "name": "dataGeneralizations", + "description": "Actions taken to make the shared data less specific or complete than in its original form", + "group": "Record", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/identifiedBy": { + "name": "identifiedBy", + "description": "A person, group, or organization who assigned the Taxon to the subject", + "group": "Identification", + "mappingPaths": [["CollectionObject", "determinations", "determiner", "lastName"]] + }, + "http://rs.tdwg.org/dwc/terms/dateIdentified": { + "name": "dateIdentified", + "description": "The date on which the subject was determined as representing the Taxon", + "group": "Identification", + "mappingPaths": [["CollectionObject", "determinations", "determinedDate"]] + }, + "http://rs.tdwg.org/dwc/terms/identificationRemarks": { + "name": "identificationRemarks", + "description": "Comments or notes about the Identification", + "group": "Identification", + "mappingPaths": [["CollectionObject", "determinations", "remarks"]] + }, + "http://rs.tdwg.org/dwc/terms/typeStatus": { + "name": "typeStatus", + "description": "A list of nomenclatural types applied to the subject", + "group": "Identification", + "mappingPaths": [["CollectionObject", "determinations", "typeStatusName"]] + }, + "http://rs.tdwg.org/dwc/terms/associatedMedia": { + "name": "associatedMedia", + "description": "A list of identifiers of media associated with the Occurrence", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/associatedReferences": { + "name": "associatedReferences", + "description": "A list of identifiers of literature associated with the Occurrence", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/associatedSequences": { + "name": "associatedSequences", + "description": "A list of identifiers of genetic sequence information associated with the Occurrence", + "group": "Occurrence", + "mappingPaths": [] + }, + "http://rs.tdwg.org/dwc/terms/associatedTaxa": { + "name": "associatedTaxa", + "description": "A list of identifiers or names of taxa and the associations of this Occurrence to each of them", + "group": "Occurrence", + "mappingPaths": [] + } + } + }, + "dc": { + "name": "Dublin Core", + "abbreviation": "dc", + "vocabularyURI": "http://purl.org/dc/terms/", + "description": "Dublin Core metadata terms", + "terms": { + "http://purl.org/dc/terms/type": { + "name": "type", + "description": "The nature or genre of the resource", + "group": "Record", + "mappingPaths": [] + }, + "http://purl.org/dc/terms/modified": { + "name": "modified", + "description": "The most recent date-time on which the resource was changed", + "group": "Record", + "mappingPaths": [["CollectionObject", "timestampModified"]] + }, + "http://purl.org/dc/terms/language": { + "name": "language", + "description": "A language of the resource", + "group": "Record", + "mappingPaths": [] + }, + "http://purl.org/dc/terms/license": { + "name": "license", + "description": "A legal document giving official permission to do something with the resource", + "group": "Record", + "mappingPaths": [] + }, + "http://purl.org/dc/terms/rightsHolder": { + "name": "rightsHolder", + "description": "A person or organization owning or managing rights over the resource", + "group": "Record", + "mappingPaths": [] + }, + "http://purl.org/dc/terms/accessRights": { + "name": "accessRights", + "description": "Information about who can access the resource or an indication of its security status", + "group": "Record", + "mappingPaths": [] + }, + "http://purl.org/dc/terms/bibliographicCitation": { + "name": "bibliographicCitation", + "description": "A bibliographic reference for the resource", + "group": "Record", + "mappingPaths": [] + }, + "http://purl.org/dc/terms/references": { + "name": "references", + "description": "A related resource that is referenced, cited, or otherwise pointed to by the described resource", + "group": "Record", + "mappingPaths": [] + } + } + }, + "ac": { + "name": "Audiovisual Core", + "abbreviation": "ac", + "vocabularyURI": "http://rs.tdwg.org/ac/terms/", + "description": "Audiovisual Core terms for multimedia resources", + "terms": { + "http://rs.tdwg.org/ac/terms/accessURI": { + "name": "accessURI", + "description": "A URI that uniquely identifies a service that provides a representation of the underlying resource", + "group": "Media", + "mappingPaths": [["CollectionObject", "collectionObjectAttachments", "attachment", "attachmentLocation"]] + }, + "http://purl.org/dc/terms/format": { + "name": "format", + "description": "The file format, physical medium, or dimensions of the resource", + "group": "Media", + "mappingPaths": [["CollectionObject", "collectionObjectAttachments", "attachment", "mimeType"]] + }, + "http://rs.tdwg.org/ac/terms/subtype": { + "name": "subtype", + "description": "Any type term from the vocabulary of types used that further refines the media type", + "group": "Media", + "mappingPaths": [] + }, + "http://rs.tdwg.org/ac/terms/caption": { + "name": "caption", + "description": "Text to be displayed together with the media representation", + "group": "Media", + "mappingPaths": [["CollectionObject", "collectionObjectAttachments", "attachment", "title"]] + }, + "http://rs.tdwg.org/ac/terms/tag": { + "name": "tag", + "description": "A tag or keyword associated with the media item", + "group": "Media", + "mappingPaths": [] + } + } + } + } +} diff --git a/specifyweb/backend/export/tasks.py b/specifyweb/backend/export/tasks.py new file mode 100644 index 00000000000..03b5ac5b0f9 --- /dev/null +++ b/specifyweb/backend/export/tasks.py @@ -0,0 +1,50 @@ +"""Celery tasks for DwC export cache building.""" +import json +import logging + +from specifyweb.celery_tasks import app, LogErrorsTask + +logger = logging.getLogger(__name__) + + +@app.task(base=LogErrorsTask, bind=True) +def build_export_cache(self, dataset_id, user_id): + """Build cache tables for an ExportDataSet in the background. + + Updates Celery task state with progress and sends a notification + Message on completion or failure. + """ + from .models import ExportDataSet + from .cache import build_cache_tables + from specifyweb.specify.models import Specifyuser + from specifyweb.backend.notifications.models import Message + + user = Specifyuser.objects.get(id=user_id) + dataset = ExportDataSet.objects.get(id=dataset_id) + + def progress(current, total): + if not self.request.called_directly: + self.update_state(state='PROGRESS', meta={ + 'current': current, + 'total': total, + 'dataset_id': dataset_id, + }) + + try: + build_cache_tables(dataset, user=user, progress_callback=progress) + + Message.objects.create(user=user, content=json.dumps({ + 'type': 'cache-build-complete', + 'datasetId': dataset_id, + 'exportName': dataset.exportname, + })) + logger.info('Cache build complete for dataset %s (%s)', + dataset_id, dataset.exportname) + + except Exception as e: + Message.objects.create(user=user, content=json.dumps({ + 'type': 'cache-build-failed', + 'datasetId': dataset_id, + 'error': str(e), + })) + raise diff --git a/specifyweb/backend/export/tests/__init__.py b/specifyweb/backend/export/tests/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/specifyweb/backend/export/tests/test_attachment_urls.py b/specifyweb/backend/export/tests/test_attachment_urls.py new file mode 100644 index 00000000000..9fda2b4e2be --- /dev/null +++ b/specifyweb/backend/export/tests/test_attachment_urls.py @@ -0,0 +1,52 @@ +"""Tests for attachment URL construction.""" +from unittest.mock import MagicMock, patch + +from django.test import TestCase, override_settings + +from specifyweb.backend.export.attachment_urls import ( + construct_attachment_url, + is_attachment_field, +) + + +class ConstructAttachmentUrlTests(TestCase): + """Test construct_attachment_url.""" + + @override_settings(WEB_ATTACHMENT_URL='https://assets.example.org/web_asset_store') + def test_construct_attachment_url(self): + collection = MagicMock() + collection.collectionname = 'Ichthyology' + url = construct_attachment_url(collection, 'specimen_001.jpg') + self.assertEqual( + url, + 'https://assets.example.org/web_asset_store/Ichthyology/specimen_001.jpg' + ) + + @override_settings(WEB_ATTACHMENT_URL='https://assets.example.org/store/') + def test_trailing_slash_stripped(self): + collection = MagicMock() + collection.collectionname = 'Botany' + url = construct_attachment_url(collection, 'photo.png') + self.assertEqual(url, 'https://assets.example.org/store/Botany/photo.png') + + def test_no_asset_server_configured(self): + """When WEB_ATTACHMENT_URL is not set, return empty string.""" + collection = MagicMock() + collection.collectionname = 'Botany' + # settings may not have WEB_ATTACHMENT_URL at all + with self.settings(WEB_ATTACHMENT_URL=None): + url = construct_attachment_url(collection, 'photo.png') + self.assertEqual(url, '') + + +class IsAttachmentFieldTests(TestCase): + """Test is_attachment_field.""" + + def test_is_attachment_field(self): + self.assertTrue(is_attachment_field('AttachmentLocation')) + self.assertTrue(is_attachment_field('origfilename')) + self.assertTrue(is_attachment_field('ATTACHMENTIMAGEATTRIBUTE')) + + def test_non_attachment_field(self): + self.assertFalse(is_attachment_field('catalogNumber')) + self.assertFalse(is_attachment_field('scientificName')) diff --git a/specifyweb/backend/export/tests/test_cache.py b/specifyweb/backend/export/tests/test_cache.py new file mode 100644 index 00000000000..1297b6d3ec1 --- /dev/null +++ b/specifyweb/backend/export/tests/test_cache.py @@ -0,0 +1,125 @@ +from django.db import connection +from django.test import TestCase, TransactionTestCase + +from specifyweb.backend.export.cache import ( + create_cache_table, drop_cache_table, get_cache_table_name, + _build_single_cache, _sanitize_column_name, cleanup_orphan_caches, +) + + +class CacheTableNameTests(TestCase): + + def test_cache_table_name_generation(self): + name = get_cache_table_name(5, 4) + self.assertEqual(name, 'dwc_cache_5_4') + + def test_cache_table_name_sanitization(self): + # Special chars in prefix are not stripped by get_cache_table_name, + # but create_cache_table sanitizes the full name. + name = get_cache_table_name(1, 2, prefix='bad;prefix') + # create_cache_table will strip the semicolon + self.assertIn('bad', name) + + +class CacheTableOperationsTests(TransactionTestCase): + + def _table_exists(self, name): + with connection.cursor() as cursor: + cursor.execute( + "SELECT COUNT(*) FROM information_schema.tables " + "WHERE table_name = %s", [name] + ) + return cursor.fetchone()[0] > 0 + + def test_create_and_drop_cache_table(self): + table_name = 'dwc_cache_test_99' + columns = [('id', 'INT'), ('val', 'VARCHAR(128)')] + create_cache_table(table_name, columns) + self.assertTrue(self._table_exists(table_name)) + + drop_cache_table(table_name) + self.assertFalse(self._table_exists(table_name)) + + def test_cache_table_name_sanitization_in_create(self): + # Semicolons and other special chars are stripped + dirty_name = 'test;drop--table' + columns = [('id', 'INT')] + create_cache_table(dirty_name, columns) + safe_name = 'testdroptable' + self.assertTrue(self._table_exists(safe_name)) + drop_cache_table(safe_name) + + +class SanitizeColumnNameTests(TestCase): + + def test_simple_name(self): + self.assertEqual(_sanitize_column_name('catalogNumber'), 'catalogNumber') + + def test_uri_with_slash(self): + self.assertEqual( + _sanitize_column_name('http://rs.tdwg.org/dwc/terms/catalogNumber'), + 'catalogNumber', + ) + + def test_uri_with_hash(self): + self.assertEqual( + _sanitize_column_name('http://purl.org/dc/terms#modified'), + 'modified', + ) + + def test_special_chars_replaced(self): + self.assertEqual(_sanitize_column_name('some-field.name'), 'some_field_name') + + def test_truncation_at_64(self): + long_name = 'a' * 100 + self.assertEqual(len(_sanitize_column_name(long_name)), 64) + + +class BuildSingleCacheTests(TransactionTestCase): + + def _table_exists(self, name): + with connection.cursor() as cursor: + cursor.execute( + "SELECT COUNT(*) FROM information_schema.tables " + "WHERE table_name = %s AND table_schema = DATABASE()", [name] + ) + return cursor.fetchone()[0] > 0 + + def _get_columns(self, table_name): + with connection.cursor() as cursor: + cursor.execute( + "SELECT column_name FROM information_schema.columns " + "WHERE table_name = %s AND table_schema = DATABASE() " + "ORDER BY ordinal_position", [table_name] + ) + return [row[0] for row in cursor.fetchall()] + + def test_build_creates_table_with_columns(self): + """Verify cache table creation with correct columns from field terms.""" + table_name = 'dwc_cache_build_test' + columns = [ + ('occurrence_id', 'VARCHAR(256)'), + ('catalogNumber', 'TEXT'), + ('locality', 'TEXT'), + ] + try: + create_cache_table(table_name, columns) + self.assertTrue(self._table_exists(table_name)) + + db_columns = self._get_columns(table_name) + self.assertIn('occurrence_id', db_columns) + self.assertIn('catalogNumber', db_columns) + self.assertIn('locality', db_columns) + self.assertEqual(len(db_columns), 3) + finally: + drop_cache_table(table_name) + + def test_cleanup_orphan_caches(self): + """Create an orphan cache table and verify cleanup removes it.""" + orphan_table = 'dwc_cache_orphan_999' + create_cache_table(orphan_table, [('id', 'INT')]) + self.assertTrue(self._table_exists(orphan_table)) + + cleanup_orphan_caches() + + self.assertFalse(self._table_exists(orphan_table)) diff --git a/specifyweb/backend/export/tests/test_dwca.py b/specifyweb/backend/export/tests/test_dwca.py new file mode 100644 index 00000000000..d5fc057a65b --- /dev/null +++ b/specifyweb/backend/export/tests/test_dwca.py @@ -0,0 +1,141 @@ +"""Tests for DwCA generation from cache tables.""" + +from unittest.mock import MagicMock, patch +from xml.etree import ElementTree as ET + +from django.test import TestCase + +from ..dwca_from_cache import ( + _build_eml_xml, + _build_meta_xml, + _sanitize_column_name_for_csv, + _table_to_csv, +) + + +class TestSanitizeColumnNameForCsv(TestCase): + """Test IRI -> term name extraction.""" + + def test_simple_name(self): + self.assertEqual(_sanitize_column_name_for_csv('catalogNumber'), 'catalogNumber') + + def test_full_iri(self): + self.assertEqual( + _sanitize_column_name_for_csv('http://rs.tdwg.org/dwc/terms/catalogNumber'), + 'catalogNumber', + ) + + def test_hash_iri(self): + self.assertEqual( + _sanitize_column_name_for_csv('http://purl.org/dc/terms#modified'), + 'modified', + ) + + def test_trailing_slash(self): + self.assertEqual( + _sanitize_column_name_for_csv('http://example.org/terms/locality'), + 'locality', + ) + + +class TestBuildMetaXml(TestCase): + """Test meta.xml generation.""" + + NS = '{http://rs.tdwg.org/dwc/text/}' + + def test_core_structure(self): + core_fields = [ + {'term': 'http://rs.tdwg.org/dwc/terms/occurrenceID', 'column_name': 'occurrenceID', 'field_name': 'occurrenceID'}, + {'term': 'http://rs.tdwg.org/dwc/terms/catalogNumber', 'column_name': 'catalogNumber', 'field_name': 'catalogNumber'}, + {'term': '', 'column_name': 'internalField', 'field_name': 'internalField'}, + ] + xml_str = _build_meta_xml(core_fields, []) + root = ET.fromstring(xml_str) + + self.assertTrue(root.tag.endswith('archive')) + self.assertEqual(root.get('metadata'), 'eml.xml') + + core = root.find(f'{self.NS}core') + self.assertIsNotNone(core) + self.assertEqual(core.get('rowType'), 'http://rs.tdwg.org/dwc/terms/Occurrence') + self.assertEqual(core.get('ignoreHeaderLines'), '1') + + location = core.find(f'{self.NS}files/{self.NS}location') + self.assertEqual(location.text, 'occurrence.csv') + + id_elem = core.find(f'{self.NS}id') + self.assertIsNotNone(id_elem) + self.assertEqual(id_elem.get('index'), '0') + + field_elems = core.findall(f'{self.NS}field') + self.assertEqual(len(field_elems), 2) + self.assertEqual(field_elems[0].get('term'), 'http://rs.tdwg.org/dwc/terms/occurrenceID') + self.assertEqual(field_elems[1].get('term'), 'http://rs.tdwg.org/dwc/terms/catalogNumber') + + def test_with_extension(self): + core_fields = [ + {'term': 'http://rs.tdwg.org/dwc/terms/occurrenceID', 'column_name': 'occurrenceID', 'field_name': 'occurrenceID'}, + ] + ext_filenames = [{ + 'filename': 'extension_0.csv', + 'fields': [ + {'term': 'http://rs.tdwg.org/dwc/terms/measurementType', 'column_name': 'measurementType', 'field_name': 'measurementType'}, + ], + 'mapping': MagicMock(), + }] + xml_str = _build_meta_xml(core_fields, ext_filenames) + root = ET.fromstring(xml_str) + + extensions = root.findall(f'{self.NS}extension') + self.assertEqual(len(extensions), 1) + self.assertEqual(extensions[0].get('rowType'), 'http://rs.tdwg.org/dwc/terms/MeasurementOrFact') + + coreid = extensions[0].find(f'{self.NS}coreid') + self.assertIsNotNone(coreid) + self.assertEqual(coreid.get('index'), '0') + + +class TestBuildEmlXml(TestCase): + """Test EML metadata generation.""" + + def test_minimal_eml(self): + dataset = MagicMock() + dataset.metadata = None + dataset.exportname = 'Test Export' + dataset.filename = 'test_export.zip' + + xml_str = _build_eml_xml(dataset) + root = ET.fromstring(xml_str) + + self.assertIn('eml', root.tag) + self.assertEqual(root.get('packageId'), 'test_export.zip') + + title = root.find('dataset/title') + self.assertIsNotNone(title) + self.assertEqual(title.text, 'Test Export') + + pubdate = root.find('dataset/pubDate') + self.assertIsNotNone(pubdate) + # Should be a date string like YYYY-MM-DD + self.assertRegex(pubdate.text, r'^\d{4}-\d{2}-\d{2}$') + + abstract = root.find('dataset/abstract/para') + self.assertIsNotNone(abstract) + self.assertIn('Test Export', abstract.text) + + +class TestTableToCsv(TestCase): + """Test CSV generation from cache tables.""" + + def test_empty_table(self): + """When cache table doesn't exist, should return headers only.""" + fields = [ + {'column_name': 'occurrenceID', 'term': 'http://rs.tdwg.org/dwc/terms/occurrenceID', 'field_name': 'occurrenceID'}, + {'column_name': 'catalogNumber', 'term': 'http://rs.tdwg.org/dwc/terms/catalogNumber', 'field_name': 'catalogNumber'}, + ] + csv_output = _table_to_csv('nonexistent_cache_table_xyz', fields) + + lines = csv_output.strip().split('\n') + self.assertEqual(len(lines), 1) # headers only + self.assertIn('occurrenceID', lines[0]) + self.assertIn('catalogNumber', lines[0]) diff --git a/specifyweb/backend/export/tests/test_feed.py b/specifyweb/backend/export/tests/test_feed.py new file mode 100644 index 00000000000..fbfd62a28ac --- /dev/null +++ b/specifyweb/backend/export/tests/test_feed.py @@ -0,0 +1,44 @@ +"""Tests for the v2 RSS feed update logic.""" +from datetime import timedelta +from unittest.mock import patch, MagicMock + +from django.test import TestCase +from django.utils import timezone + + +class UpdateFeedV2Tests(TestCase): + """Test update_feed_v2 scheduling logic.""" + + @patch('specifyweb.backend.export.dwca_from_cache.make_dwca_from_dataset') + @patch('specifyweb.backend.export.cache.build_cache_tables') + @patch('specifyweb.backend.export.models.ExportDataSet') + def test_skips_fresh(self, MockModel, mock_build, mock_dwca): + """Dataset updated recently should be skipped.""" + dataset = MagicMock() + dataset.frequency = 7 + dataset.lastexported = timezone.now() - timedelta(days=1) + dataset.exportname = 'test_fresh' + + MockModel.objects.filter.return_value = [dataset] + from specifyweb.backend.export.feed import update_feed_v2 + updated = update_feed_v2() + + mock_build.assert_not_called() + self.assertEqual(updated, []) + + @patch('specifyweb.backend.export.dwca_from_cache.make_dwca_from_dataset') + @patch('specifyweb.backend.export.cache.build_cache_tables') + @patch('specifyweb.backend.export.models.ExportDataSet') + def test_updates_stale(self, MockModel, mock_build, mock_dwca): + """Dataset overdue for update should be rebuilt.""" + dataset = MagicMock() + dataset.frequency = 7 + dataset.lastexported = timezone.now() - timedelta(days=10) + dataset.exportname = 'test_stale' + + MockModel.objects.filter.return_value = [dataset] + from specifyweb.backend.export.feed import update_feed_v2 + updated = update_feed_v2() + + mock_build.assert_called_once_with(dataset) + self.assertIn('test_stale', updated) diff --git a/specifyweb/backend/export/tests/test_models.py b/specifyweb/backend/export/tests/test_models.py new file mode 100644 index 00000000000..2046133911f --- /dev/null +++ b/specifyweb/backend/export/tests/test_models.py @@ -0,0 +1,246 @@ +from django.db import IntegrityError +from django.test import TestCase + +from specifyweb.specify.tests.test_api import MainSetupTearDown +from specifyweb.specify.models import Spquery, Spqueryfield +from specifyweb.backend.export.models import ( + SchemaMapping, ExportDataSet, ExportDataSetExtension, CacheTableMeta, +) + + +class SchemaMappingTests(MainSetupTearDown, TestCase): + + def _make_query(self, name='test query'): + return Spquery.objects.create( + name=name, + contextname='CollectionObject', + contexttableid=1, + createdbyagent=self.agent, + specifyuser=self.specifyuser, + ) + + def test_create_schema_mapping(self): + query = self._make_query() + mapping = SchemaMapping.objects.create( + query=query, + mappingtype='Core', + name='DwC Core Mapping', + createdbyagent=self.agent, + ) + mapping.refresh_from_db() + self.assertEqual(mapping.query_id, query.pk) + self.assertEqual(mapping.mappingtype, 'Core') + self.assertEqual(mapping.name, 'DwC Core Mapping') + self.assertFalse(mapping.isdefault) + + def test_schema_mapping_query_onetoone(self): + query = self._make_query() + SchemaMapping.objects.create( + query=query, + mappingtype='Core', + name='First', + ) + with self.assertRaises(IntegrityError): + SchemaMapping.objects.create( + query=query, + mappingtype='Extension', + name='Second', + ) + + def test_schema_mapping_cascade_delete(self): + query = self._make_query() + SchemaMapping.objects.create( + query=query, + mappingtype='Core', + name='Cascade Test', + ) + self.assertEqual(SchemaMapping.objects.count(), 1) + query.delete() + self.assertEqual(SchemaMapping.objects.count(), 0) + + def test_spqueryfield_term_nullable(self): + query = self._make_query() + + # Field without DwC term — backward compatible + field_no_term = Spqueryfield.objects.create( + query=query, + fieldname='catalogNumber', + operstart=0, + sorttype=0, + position=0, + startvalue='', + stringid='1.collectionobject.catalogNumber', + tablelist='1', + ) + field_no_term.refresh_from_db() + self.assertIsNone(field_no_term.term) + self.assertFalse(field_no_term.isstatic) + self.assertIsNone(field_no_term.staticvalue) + + # Field with DwC term + field_with_term = Spqueryfield.objects.create( + query=query, + fieldname='catalogNumber', + operstart=0, + sorttype=0, + position=1, + startvalue='', + stringid='1.collectionobject.catalogNumber', + tablelist='1', + term='http://rs.tdwg.org/dwc/terms/catalogNumber', + isstatic=False, + ) + field_with_term.refresh_from_db() + self.assertEqual( + field_with_term.term, + 'http://rs.tdwg.org/dwc/terms/catalogNumber', + ) + + # Static field + field_static = Spqueryfield.objects.create( + query=query, + fieldname='catalogNumber', + operstart=0, + sorttype=0, + position=2, + startvalue='', + stringid='1.collectionobject.catalogNumber', + tablelist='1', + term='http://rs.tdwg.org/dwc/terms/basisOfRecord', + isstatic=True, + staticvalue='PreservedSpecimen', + ) + field_static.refresh_from_db() + self.assertTrue(field_static.isstatic) + self.assertEqual(field_static.staticvalue, 'PreservedSpecimen') + + +class ExportDataSetTests(MainSetupTearDown, TestCase): + + def _make_mapping(self, name='test mapping'): + query = Spquery.objects.create( + name='q', + contextname='CollectionObject', + contexttableid=1, + createdbyagent=self.agent, + specifyuser=self.specifyuser, + ) + return SchemaMapping.objects.create( + query=query, mappingtype='Core', name=name, + ) + + def test_create_export_dataset(self): + mapping = self._make_mapping() + ds = ExportDataSet.objects.create( + exportname='My Export', + filename='export.zip', + coremapping=mapping, + collection=self.collection, + ) + ds.refresh_from_db() + self.assertEqual(ds.exportname, 'My Export') + self.assertEqual(ds.filename, 'export.zip') + self.assertFalse(ds.isrss) + self.assertIsNone(ds.frequency) + self.assertIsNone(ds.lastexported) + self.assertEqual(ds.coremapping_id, mapping.pk) + self.assertEqual(ds.collection_id, self.collection.pk) + self.assertEqual(ds.version, 0) + + def test_export_dataset_extension(self): + core = self._make_mapping('core') + ext_mapping = self._make_mapping('ext') + ds = ExportDataSet.objects.create( + exportname='DS', filename='ds.zip', + coremapping=core, collection=self.collection, + ) + ext = ExportDataSetExtension.objects.create( + exportdataset=ds, schemamapping=ext_mapping, sortorder=1, + ) + ext.refresh_from_db() + self.assertEqual(ext.exportdataset_id, ds.pk) + self.assertEqual(ext.schemamapping_id, ext_mapping.pk) + self.assertEqual(ext.sortorder, 1) + + # unique_together enforced + with self.assertRaises(IntegrityError): + ExportDataSetExtension.objects.create( + exportdataset=ds, schemamapping=ext_mapping, sortorder=2, + ) + + def test_clone_mapping(self): + """Clone endpoint creates new SpQuery, SpQueryFields, and SchemaMapping.""" + original_mapping = self._make_mapping('Original') + original_query = original_mapping.query + + # Add query fields to the original query + Spqueryfield.objects.create( + query=original_query, + fieldname='catalogNumber', + operstart=0, + sorttype=0, + position=0, + startvalue='', + stringid='1.collectionobject.catalogNumber', + tablelist='1', + term='http://rs.tdwg.org/dwc/terms/catalogNumber', + ) + Spqueryfield.objects.create( + query=original_query, + fieldname='locality', + operstart=0, + sorttype=0, + position=1, + startvalue='', + stringid='1.collectionobject.locality', + tablelist='1', + term='http://rs.tdwg.org/dwc/terms/locality', + isstatic=True, + staticvalue='Some Place', + ) + + from django.test import RequestFactory + from specifyweb.backend.export.views import clone_mapping + + factory = RequestFactory() + request = factory.post(f'/export/clone_mapping/{original_mapping.id}/') + request.user = self.specifyuser + request.specify_user = self.specifyuser + + # Mock permission check — in tests, permissions are not configured + from unittest.mock import patch + with patch('specifyweb.backend.export.views.check_permission_targets'): + response = clone_mapping(request, original_mapping.id) + + self.assertEqual(response.status_code, 200) + import json + data = json.loads(response.content) + self.assertIn('id', data) + self.assertEqual(data['name'], 'Copy of Original') + self.assertFalse(data['isDefault']) + self.assertNotEqual(data['queryId'], original_query.id) + + # Verify new query has cloned fields + new_query = Spquery.objects.get(id=data['queryId']) + self.assertEqual(new_query.name, f'Copy of {original_query.name}') + self.assertEqual(new_query.fields.count(), 2) + + # Verify field data was cloned + cloned_field = new_query.fields.get(position=0) + self.assertEqual(cloned_field.term, 'http://rs.tdwg.org/dwc/terms/catalogNumber') + cloned_static = new_query.fields.get(position=1) + self.assertTrue(cloned_static.isstatic) + self.assertEqual(cloned_static.staticvalue, 'Some Place') + + def test_cache_table_meta(self): + mapping = self._make_mapping() + meta = CacheTableMeta.objects.create( + schemamapping=mapping, + tablename='dwc_cache_1_4', + ) + meta.refresh_from_db() + self.assertEqual(meta.schemamapping_id, mapping.pk) + self.assertEqual(meta.tablename, 'dwc_cache_1_4') + self.assertIsNone(meta.lastbuilt) + self.assertIsNone(meta.rowcount) + self.assertEqual(meta.buildstatus, 'idle') diff --git a/specifyweb/backend/export/urls.py b/specifyweb/backend/export/urls.py index 897ad233f44..a00034b4af4 100644 --- a/specifyweb/backend/export/urls.py +++ b/specifyweb/backend/export/urls.py @@ -8,4 +8,25 @@ path('make_dwca/', views.export), path('extract_query//', views.extract_query), path('force_update/', views.force_update), + path('force_update_packages/', views.force_update_packages), + path('schema_terms/', views.get_schema_terms), + path('list_mappings/', views.list_mappings), + path('list_export_datasets/', views.list_export_datasets), + path('clone_mapping//', views.clone_mapping), + path('generate_dwca//', views.generate_dwca), + path('build_cache//', views.build_cache), + path('validate_occurrence_ids//', views.validate_occurrence_ids), + path('cache_status//', views.cache_status), + path('create_mapping/', views.create_mapping), + path('create_mapping_from_query/', views.create_mapping_from_query), + path('list_queries/', views.list_queries), + path('update_mapping//', views.update_mapping), + path('delete_mapping//', views.delete_mapping), + path('save_mapping_fields//', views.save_mapping_fields), + path('clone_dataset//', views.clone_dataset), + path('create_dataset/', views.create_dataset), + path('update_dataset//', views.update_dataset), + path('delete_dataset//', views.delete_dataset), + path('preview_eml//', views.preview_eml), + path('download_feed/', views.download_feed), ] diff --git a/specifyweb/backend/export/views.py b/specifyweb/backend/export/views.py index 9381d36166d..33f59177413 100644 --- a/specifyweb/backend/export/views.py +++ b/specifyweb/backend/export/views.py @@ -3,7 +3,6 @@ import logging import os import traceback -from datetime import datetime from email.utils import formatdate from threading import Thread from xml.etree import ElementTree as ET @@ -13,6 +12,7 @@ from django.http import HttpResponse, HttpResponseBadRequest, Http404 from django.views.decorators.cache import never_cache from django.views.decorators.http import require_POST +from django.utils import timezone from .dwca import make_dwca, prettify from .extract_query import extract_query as extract @@ -21,74 +21,81 @@ from specifyweb.backend.notifications.models import Message from specifyweb.backend.permissions.permissions import PermissionTarget, PermissionTargetAction, \ check_permission_targets -from specifyweb.specify.models import Spquery +from specifyweb.specify.models import Spquery, Spqueryfield from specifyweb.specify.views import login_maybe_required -from specifyweb.middleware.general import require_GET +from specifyweb.middleware.general import require_GET, require_http_methods logger = logging.getLogger(__name__) @require_GET @never_cache def rss_feed(request): - "Returns an RSS XML document of the DwCA exports generated on this server." - feed_resource = get_feed_resource() - if feed_resource is None: - raise Http404 + """Returns an RSS XML document listing all RSS-enabled DwC archives.""" + from .models import ExportDataSet - def_tree = ET.fromstring(feed_resource) + datasets = ExportDataSet.objects.filter(isrss=True) rss_node = ET.Element('rss') - rss_node.set('xmlns:ipt', "http://ipt.gbif.org/") - rss_node.set('version', "2.0") + rss_node.set('xmlns:ipt', 'http://ipt.gbif.org/') + rss_node.set('version', '2.0') chan_node = ET.SubElement(rss_node, 'channel') + ET.SubElement(chan_node, 'title').text = 'Specify DwC Archive Feed' ET.SubElement(chan_node, 'link').text = request.build_absolute_uri() + ET.SubElement(chan_node, 'description').text = 'Darwin Core Archive exports from Specify' + ET.SubElement(chan_node, 'language').text = 'en' - for tag in 'title description language'.split(): - for node in def_tree.findall(tag): - ET.SubElement(chan_node, tag).text = node.text - - for item_def in def_tree.findall('item'): - if 'publish' not in item_def.attrib or item_def.attrib['publish'] != 'true': - continue - - filename = item_def.attrib['filename'] - path = os.path.join(FEED_DIR, filename) + for dataset in datasets: + path = os.path.join(FEED_DIR, dataset.filename) try: mtime = os.path.getmtime(path) - except OSError as e: - if e.errno == errno.ENOENT: - continue - else: - raise + except OSError: + # Archive file doesn't exist yet — skip + continue item_node = ET.SubElement(chan_node, 'item') - - for tag in 'title id guid description'.split(): - for node in item_def.findall(tag): - ET.SubElement(item_node, tag).text = node.text - + ET.SubElement(item_node, 'title').text = dataset.exportname + ET.SubElement(item_node, 'guid').text = str(dataset.id) + ET.SubElement(item_node, 'description').text = ( + f'Darwin Core Archive: {dataset.exportname}' + ) ET.SubElement(item_node, 'link').text = request.build_absolute_uri( - '/static/depository/export_feed/%s' % filename + f'/export/download_feed/{dataset.filename}' ) - ET.SubElement(item_node, 'ipt:eml').text = request.build_absolute_uri( - '/export/extract_eml/%s' % filename + f'/export/extract_eml/{dataset.filename}' ) - ET.SubElement(item_node, 'pubDate').text = formatdate(mtime) - ET.SubElement(item_node, 'type').text = "DWCA" + ET.SubElement(item_node, 'type').text = 'DWCA' return HttpResponse(prettify(rss_node), content_type='text/xml') +@require_GET +@never_cache +def download_feed(request, filename): + """Serve a DwCA file from the export feed directory.""" + safe_filename = os.path.basename(filename) + path = os.path.join(FEED_DIR, safe_filename) + if not os.path.exists(path): + raise Http404 + with open(path, 'rb') as f: + response = HttpResponse(f.read(), content_type='application/zip') + response['Content-Disposition'] = f'attachment; filename="{safe_filename}"' + return response + + @require_GET @never_cache def extract_eml(request, filename): """Return just the EML metadata from the DwCA hosted on this server. Valid file names can be found in the RSS feed. """ - with ZipFile(os.path.join(FEED_DIR, filename), 'r') as archive: + safe_filename = os.path.basename(filename) + path = os.path.join(FEED_DIR, safe_filename) + if not os.path.exists(path): + raise Http404 + with ZipFile(path, 'r') as archive: meta = ET.fromstring(archive.open('meta.xml').read()) eml = archive.open(meta.attrib['metadata']).read() return HttpResponse(eml, content_type='text/xml') @@ -125,7 +132,7 @@ def export(request): else: eml = None - filename = 'dwca_export_%s.zip' % datetime.now().isoformat() + filename = 'dwca_export_%s.zip' % timezone.now().isoformat() path = os.path.join(settings.DEPOSITORY_DIR, filename) def do_export(): @@ -181,6 +188,38 @@ def try_update_feed(): thread.start() return HttpResponse('OK', content_type='text/plain') +@require_POST +@login_maybe_required +def force_update_packages(request): + """Rebuild all Export Packages with RSS enabled.""" + check_permission_targets(None, request.specify_user.id, [ExportPackagePT.execute]) + from .models import ExportDataSet + from .dwca_from_mapping import make_dwca_from_dataset + + user = request.specify_user + packages = list(ExportDataSet.objects.filter(isrss=True)) + results = [] + + def rebuild_all(): + for pkg in packages: + try: + make_dwca_from_dataset(pkg, user=user) + results.append({'id': pkg.id, 'name': pkg.exportname, 'status': 'ok'}) + except Exception as e: + logger.exception('Failed to rebuild package %s', pkg.id) + results.append({'id': pkg.id, 'name': pkg.exportname, 'status': 'error', 'error': str(e)}) + Message.objects.create(user=user, content=json.dumps({ + 'type': 'rss-update-complete', + 'results': results, + })) + + thread = Thread(target=rebuild_all) + thread.daemon = True + thread.start() + return HttpResponse(json.dumps({'status': 'started', 'count': len(packages)}), + content_type='application/json') + + @login_maybe_required @require_GET @never_cache @@ -190,3 +229,915 @@ def extract_query(request, query_id): """ query = Spquery.objects.get(id=query_id) return HttpResponse(extract(query), 'text/xml') + +class SchemaMappingPT(PermissionTarget): + resource = "/export/schema_mapping" + create = PermissionTargetAction() + read = PermissionTargetAction() + update = PermissionTargetAction() + delete = PermissionTargetAction() + +class ExportPackagePT(PermissionTarget): + resource = "/export/export_package" + create = PermissionTargetAction() + read = PermissionTargetAction() + update = PermissionTargetAction() + delete = PermissionTargetAction() + execute = PermissionTargetAction() + +@require_GET +@login_maybe_required +def get_schema_terms(request): + """Serve the DwC schema terms vocabulary as JSON.""" + terms_path = os.path.join(os.path.dirname(__file__), 'schema_terms.json') + with open(terms_path) as f: + return HttpResponse(f.read(), content_type='application/json') + + +@require_GET +@login_maybe_required +def list_mappings(request): + """List all schema mappings. Auto-creates default Core mapping if none exist.""" + check_permission_targets(None, request.specify_user.id, [SchemaMappingPT.read]) + from specifyweb.backend.export.models import SchemaMapping + from .default_mappings import create_default_core_mapping + + # Auto-create default Core mapping if none exist + if not SchemaMapping.objects.filter(isdefault=True, mappingtype='Core').exists(): + try: + create_default_core_mapping( + request.specify_collection, request.specify_user) + except Exception: + logger.exception('Failed to create default mappings') + + mappings = SchemaMapping.objects.all().select_related('query') + result = [] + for m in mappings: + total = m.query.fields.filter(isdisplay=True).count() + mapped = m.query.fields.filter(isdisplay=True).exclude( + term__isnull=True + ).exclude(term='').count() + result.append({ + 'id': m.id, + 'name': m.name, + 'mappingType': m.mappingtype, + 'isDefault': m.isdefault, + 'queryId': m.query_id, + 'vocabulary': m.vocabulary or 'dwc', + 'totalFields': total, + 'unmappedFields': total - mapped, + }) + return HttpResponse(json.dumps(result), content_type='application/json') + + +@require_GET +@login_maybe_required +def list_export_datasets(request): + """List all export datasets.""" + check_permission_targets(None, request.specify_user.id, [ExportPackagePT.read]) + from specifyweb.backend.export.models import ExportDataSet + datasets = ExportDataSet.objects.all().values( + 'id', 'exportname', 'filename', 'isrss', 'frequency', + 'coremapping_id', 'collection_id', 'lastexported', 'metadata_id' + ) + return HttpResponse(json.dumps([ + { + 'id': d['id'], + 'exportName': d['exportname'], + 'fileName': d['filename'], + 'isRss': d['isrss'], + 'frequency': d['frequency'], + 'coreMappingId': d['coremapping_id'], + 'collectionId': d['collection_id'], + 'lastExported': d['lastexported'].isoformat() if d['lastexported'] else None, + 'hasMetadata': d['metadata_id'] is not None, + } + for d in datasets + ]), content_type='application/json') + + +@require_POST +@login_maybe_required +def clone_mapping(request, mapping_id): + """Deep-copy a SchemaMapping: creates new SpQuery with all SpQueryFields, + creates new SchemaMapping pointing to the new query.""" + check_permission_targets(None, request.specify_user.id, [SchemaMappingPT.create]) + + from specifyweb.backend.export.models import SchemaMapping + from specifyweb.specify.models import Spqueryfield + + try: + original = SchemaMapping.objects.select_related('query').get(id=mapping_id) + except SchemaMapping.DoesNotExist: + raise Http404 + + # Clone the SpQuery + old_query = original.query + new_query = Spquery.objects.create( + name=f'Copy of {old_query.name}', + contextname=old_query.contextname, + contexttableid=old_query.contexttableid, + specifyuser=request.specify_user, + isfavorite=False, + ordinal=old_query.ordinal, + searchsynonymy=old_query.searchsynonymy, + selectdistinct=old_query.selectdistinct, + smushed=old_query.smushed, + countonly=old_query.countonly, + ) + + # Clone all query fields + for field in old_query.fields.all(): + Spqueryfield.objects.create( + query=new_query, + fieldname=field.fieldname, + stringid=field.stringid, + tablelist=field.tablelist, + operstart=field.operstart, + startvalue=field.startvalue, + position=field.position, + sorttype=field.sorttype, + isdisplay=field.isdisplay, + isnot=field.isnot, + isrelfld=field.isrelfld, + formatname=field.formatname, + term=field.term, + isstatic=field.isstatic, + staticvalue=field.staticvalue, + ) + + # Clone the SchemaMapping + new_mapping = SchemaMapping.objects.create( + query=new_query, + mappingtype=original.mappingtype, + name=f'Copy of {original.name}', + isdefault=False, + ) + + return HttpResponse(json.dumps({ + 'id': new_mapping.id, + 'name': new_mapping.name, + 'mappingType': new_mapping.mappingtype, + 'isDefault': False, + 'queryId': new_query.id, + }), content_type='application/json') + + +@require_POST +@login_maybe_required +def generate_dwca(request, dataset_id): + """Generate a DwCA from an export dataset and return it as a download. + + Prefers cache tables if available and fresh. Falls back to direct + query execution if no cache exists. + """ + check_permission_targets(None, request.specify_user.id, [ExportPackagePT.execute]) + from .models import ExportDataSet, CacheTableMeta + + try: + dataset = ExportDataSet.objects.get(id=dataset_id) + except ExportDataSet.DoesNotExist: + raise Http404 + + # Ensure Core mappings have an occurrenceID field + if dataset.coremapping.mappingtype == 'Core': + _ensure_occurrence_id_field(dataset.coremapping.query) + + # Validate: all display fields must have a DwC term assigned + unmapped = [] + for f in dataset.coremapping.query.fields.filter(isdisplay=True).order_by('position'): + term = getattr(f, 'term', None) + if not term: + unmapped.append(f.fieldname or f.stringid or '(unnamed)') + if unmapped: + return HttpResponseBadRequest(json.dumps({ + 'error': f'Cannot export: {len(unmapped)} field(s) have no DwC term assigned: {", ".join(unmapped)}. ' + f'Open the mapping in DwC Mapping and assign a term to every field, or remove unmapped fields from the query.' + }), content_type='application/json') + + try: + # Try cache-based export first + cache_meta = CacheTableMeta.objects.filter( + schemamapping=dataset.coremapping + ).first() + + if (cache_meta is not None + and cache_meta.buildstatus == 'idle' + and cache_meta.lastbuilt is not None + and cache_meta.rowcount > 0): + from .dwca_from_cache import make_dwca_from_dataset as make_from_cache + logger.info('Using cache for dataset %s', dataset_id) + path = make_from_cache(dataset) + else: + from .dwca_from_mapping import make_dwca_from_dataset as make_from_query + logger.info('Using direct query for dataset %s (no fresh cache)', dataset_id) + path = make_from_query(dataset, user=request.specify_user) + + with open(path, 'rb') as f: + response = HttpResponse(f.read(), content_type='application/zip') + response['Content-Disposition'] = f'attachment; filename="{dataset.filename}"' + return response + except Exception as e: + logger.exception('DwCA generation failed for dataset %s', dataset_id) + return HttpResponseBadRequest(json.dumps({'error': str(e)}), + content_type='application/json') + + +@require_POST +@login_maybe_required +def build_cache(request, dataset_id): + """Build/rebuild cache tables for an export dataset. + + Dispatches a Celery task for async building. Falls back to synchronous + if Celery worker is not available. + """ + check_permission_targets(None, request.specify_user.id, [ExportPackagePT.execute]) + from .models import ExportDataSet + + try: + dataset = ExportDataSet.objects.get(id=dataset_id) + except ExportDataSet.DoesNotExist: + raise Http404 + + try: + from .tasks import build_export_cache + from specifyweb.celery_tasks import is_worker_alive + if is_worker_alive(): + result = build_export_cache.delay(dataset_id, request.specify_user.id) + return HttpResponse(json.dumps({ + 'status': 'started', + 'taskId': result.id, + 'datasetId': dataset.id, + }), content_type='application/json') + except Exception: + logger.warning('Celery not available, falling back to synchronous build') + + # Synchronous fallback + try: + from .cache import build_cache_tables + build_cache_tables(dataset, user=request.specify_user) + return HttpResponse(json.dumps({ + 'status': 'ok', 'datasetId': dataset.id, + 'exportName': dataset.exportname, + }), content_type='application/json') + except Exception as e: + logger.exception('Cache build failed for dataset %s', dataset_id) + return HttpResponseBadRequest(json.dumps({'error': str(e)}), + content_type='application/json') + + +@require_GET +@login_maybe_required +def validate_occurrence_ids(request, mapping_id): + """Validate occurrenceID uniqueness for a core mapping's query results.""" + check_permission_targets(None, request.specify_user.id, [SchemaMappingPT.read]) + from .models import SchemaMapping + from django.db import connection + + try: + mapping = SchemaMapping.objects.get(id=mapping_id) + except SchemaMapping.DoesNotExist: + raise Http404 + + # Check for duplicate GUIDs in the collection + # occurrenceID = CollectionObject.guid, which should be unique + with connection.cursor() as cursor: + cursor.execute( + 'SELECT guid, COUNT(*) as cnt FROM collectionobject ' + 'WHERE CollectionMemberID = %s AND guid IS NOT NULL ' + 'GROUP BY guid HAVING cnt > 1 LIMIT 20', + [request.specify_collection.id] + ) + duplicates = [row[0] for row in cursor.fetchall()] + + return HttpResponse(json.dumps({ + 'valid': len(duplicates) == 0, + 'duplicates': duplicates, + 'totalDuplicates': len(duplicates), + }), content_type='application/json') + + +@require_GET +@login_maybe_required +def cache_status(request, dataset_id): + """Get cache build status for an export dataset.""" + check_permission_targets(None, request.specify_user.id, [ExportPackagePT.read]) + from .models import ExportDataSet, CacheTableMeta + + try: + dataset = ExportDataSet.objects.get(id=dataset_id) + except ExportDataSet.DoesNotExist: + raise Http404 + + meta = CacheTableMeta.objects.filter(schemamapping=dataset.coremapping).first() + return HttpResponse(json.dumps({ + 'status': meta.buildstatus if meta else 'idle', + 'lastBuilt': meta.lastbuilt.isoformat() if meta and meta.lastbuilt else None, + 'rowCount': meta.rowcount if meta else None, + }), content_type='application/json') + + +def _mapping_to_json(mapping): + """Serialize a SchemaMapping to a JSON-compatible dict.""" + return { + 'id': mapping.id, + 'name': mapping.name, + 'mappingType': mapping.mappingtype, + 'isDefault': mapping.isdefault, + 'queryId': mapping.query_id, + } + + +def _dataset_to_json(dataset): + """Serialize an ExportDataSet to a JSON-compatible dict.""" + extensions = list( + dataset.extensions.values_list('schemamapping_id', flat=True) + ) + return { + 'id': dataset.id, + 'exportName': dataset.exportname, + 'fileName': dataset.filename, + 'isRss': dataset.isrss, + 'frequency': dataset.frequency, + 'coreMappingId': dataset.coremapping_id, + 'collectionId': dataset.collection_id, + 'lastExported': dataset.lastexported.isoformat() if dataset.lastexported else None, + 'extensionIds': extensions, + } + + +@require_POST +@login_maybe_required +def create_mapping(request): + """Create a new SchemaMapping with a backing SpQuery.""" + check_permission_targets(None, request.specify_user.id, [SchemaMappingPT.create]) + from .models import SchemaMapping + + try: + data = json.loads(request.body) + except (json.JSONDecodeError, ValueError): + return HttpResponseBadRequest(json.dumps({'error': 'Invalid JSON'}), + content_type='application/json') + + name = data.get('name') + mappingtype = data.get('mappingtype') + context_table_id = data.get('query_context_table_id') + + if not name or not mappingtype or context_table_id is None: + return HttpResponseBadRequest(json.dumps({ + 'error': 'name, mappingtype, and query_context_table_id are required' + }), content_type='application/json') + + if mappingtype not in ('Core', 'Extension'): + return HttpResponseBadRequest(json.dumps({ + 'error': 'mappingtype must be Core or Extension' + }), content_type='application/json') + + # Create a new SpQuery for this mapping + query = Spquery.objects.create( + name=name, + contextname='', + contexttableid=context_table_id, + specifyuser=request.specify_user, + isfavorite=False, + ordinal=0, + searchsynonymy=False, + selectdistinct=False, + smushed=False, + countonly=False, + ) + + mapping = SchemaMapping.objects.create( + query=query, + mappingtype=mappingtype, + name=name, + isdefault=False, + vocabulary=data.get('vocabulary', 'dwc'), + ) + + if mappingtype == 'Core': + _ensure_occurrence_id_field(query) + + return HttpResponse(json.dumps(_mapping_to_json(mapping)), + content_type='application/json') + + +def _ensure_occurrence_id_field(query): + """Ensure a Core mapping's query has a GUID field mapped to occurrenceID. + + If no SpQueryField with the occurrenceID term exists, create one + at position 0 (shifting others down). + """ + OCCURRENCE_ID_IRI = 'http://rs.tdwg.org/dwc/terms/occurrenceID' + GUID_STRINGID = '1.collectionobject.guid' + + existing = Spqueryfield.objects.filter( + query=query, term=OCCURRENCE_ID_IRI + ).first() + if existing is not None: + return + + # Shift existing fields down by 1 + from django.db.models import F + query.fields.update(position=F('position') + 1) + + Spqueryfield.objects.create( + query=query, + position=0, + stringid=GUID_STRINGID, + fieldname='guid', + isdisplay=False, + isrelfld=False, + isnot=False, + isprompt=False, + allownulls=True, + alwaysfilter=False, + sorttype=0, + operstart=0, + startvalue='', + term=OCCURRENCE_ID_IRI, + ) + + +@require_GET +@login_maybe_required +def list_queries(request): + """List SpQuery objects available for mapping to a DwC export. + + Returns queries owned by the current user that are rooted on + CollectionObject (table id 1) — the standard base table for + occurrence-based DwC exports. Queries already backing a + SchemaMapping are excluded to prevent accidental sharing. + """ + from .models import SchemaMapping + used_query_ids = set( + SchemaMapping.objects.values_list('query_id', flat=True) + ) + queries = ( + Spquery.objects + .filter(specifyuser=request.specify_user, contexttableid=1) + .order_by('-id') + .values('id', 'name', 'contexttableid') + ) + result = [ + {'id': q['id'], 'name': q['name'] or f"Query #{q['id']}", + 'contextTableId': q['contexttableid']} + for q in queries + if q['id'] not in used_query_ids + ] + return HttpResponse(json.dumps(result), content_type='application/json') + + +@require_POST +@login_maybe_required +def create_mapping_from_query(request): + """Create a SchemaMapping backed by an existing SpQuery.""" + check_permission_targets(None, request.specify_user.id, [SchemaMappingPT.create]) + from .models import SchemaMapping + + try: + data = json.loads(request.body) + except (json.JSONDecodeError, ValueError): + return HttpResponseBadRequest(json.dumps({'error': 'Invalid JSON'}), + content_type='application/json') + + name = data.get('name') + mappingtype = data.get('mappingtype') + query_id = data.get('query_id') + + if not name or not mappingtype or query_id is None: + return HttpResponseBadRequest(json.dumps({ + 'error': 'name, mappingtype, and query_id are required' + }), content_type='application/json') + + if mappingtype not in ('Core', 'Extension'): + return HttpResponseBadRequest(json.dumps({ + 'error': 'mappingtype must be Core or Extension' + }), content_type='application/json') + + try: + query = Spquery.objects.get(id=query_id) + except Spquery.DoesNotExist: + return HttpResponseBadRequest(json.dumps({ + 'error': f'Query {query_id} not found' + }), content_type='application/json') + + # Prevent reusing a query already backing another mapping + if SchemaMapping.objects.filter(query=query).exists(): + return HttpResponseBadRequest(json.dumps({ + 'error': 'This query is already used by another mapping' + }), content_type='application/json') + + mapping = SchemaMapping.objects.create( + query=query, + mappingtype=mappingtype, + name=name, + isdefault=False, + vocabulary=data.get('vocabulary', 'dwc'), + ) + + if mappingtype == 'Core': + _ensure_occurrence_id_field(query) + + return HttpResponse(json.dumps(_mapping_to_json(mapping)), + content_type='application/json') + + +@require_http_methods(['PUT']) +@login_maybe_required +def update_mapping(request, mapping_id): + """Update a SchemaMapping's name and/or isdefault.""" + check_permission_targets(None, request.specify_user.id, [SchemaMappingPT.update]) + from .models import SchemaMapping + + try: + mapping = SchemaMapping.objects.get(id=mapping_id) + except SchemaMapping.DoesNotExist: + raise Http404 + + try: + data = json.loads(request.body) + except (json.JSONDecodeError, ValueError): + return HttpResponseBadRequest(json.dumps({'error': 'Invalid JSON'}), + content_type='application/json') + + if 'name' in data: + mapping.name = data['name'] + if 'isdefault' in data: + mapping.isdefault = data['isdefault'] + + mapping.timestampmodified = timezone.now() + mapping.save() + + return HttpResponse(json.dumps(_mapping_to_json(mapping)), + content_type='application/json') + + +@require_http_methods(['DELETE']) +@login_maybe_required +def delete_mapping(request, mapping_id): + """Delete a SchemaMapping and its backing SpQuery.""" + check_permission_targets(None, request.specify_user.id, [SchemaMappingPT.delete]) + from .models import ExportDataSet, SchemaMapping + + try: + mapping = SchemaMapping.objects.select_related('query').get(id=mapping_id) + except SchemaMapping.DoesNotExist: + raise Http404 + + if mapping.isdefault: + return HttpResponseBadRequest(json.dumps({ + 'error': 'Default mappings cannot be deleted. Clone it to create your own version.' + }), content_type='application/json') + + # Check if any export packages reference this mapping + referencing = ExportDataSet.objects.filter(coremapping=mapping) + if referencing.exists(): + names = list(referencing.values_list('exportname', flat=True)[:10]) + return HttpResponseBadRequest(json.dumps({ + 'error': 'in_use', + 'message': f'This mapping is used by {referencing.count()} export package(s) and cannot be deleted.', + 'packages': names, + }), content_type='application/json') + + query = mapping.query + mapping.delete() + query.delete() + + return HttpResponse(json.dumps({'status': 'ok'}), + content_type='application/json') + + +@require_POST +@login_maybe_required +def save_mapping_fields(request, mapping_id): + """Save DwC term assignments for all fields in a mapping's query.""" + check_permission_targets(None, request.specify_user.id, [SchemaMappingPT.update]) + from .models import SchemaMapping + + try: + mapping = SchemaMapping.objects.get(id=mapping_id) + except SchemaMapping.DoesNotExist: + raise Http404 + + try: + data = json.loads(request.body) + except (json.JSONDecodeError, ValueError): + return HttpResponseBadRequest(json.dumps({'error': 'Invalid JSON'}), + content_type='application/json') + + fields = data.get('fields') + if not isinstance(fields, list): + return HttpResponseBadRequest(json.dumps({'error': 'fields must be a list'}), + content_type='application/json') + + query_field_ids = set( + mapping.query.fields.values_list('id', flat=True) + ) + + updated = 0 + for field_spec in fields: + field_id = field_spec.get('fieldid') + if field_id is None or field_id not in query_field_ids: + continue + + update_kwargs = {} + if 'term' in field_spec: + update_kwargs['term'] = field_spec['term'] + if 'isstatic' in field_spec: + update_kwargs['isstatic'] = field_spec['isstatic'] + if 'staticvalue' in field_spec: + update_kwargs['staticvalue'] = field_spec['staticvalue'] + + if update_kwargs: + update_kwargs['timestampmodified'] = timezone.now() + Spqueryfield.objects.filter(id=field_id).update(**update_kwargs) + updated += 1 + + return HttpResponse(json.dumps({'status': 'ok', 'updated': updated}), + content_type='application/json') + + +@require_POST +@login_maybe_required +def create_dataset(request): + """Create a new ExportDataSet.""" + check_permission_targets(None, request.specify_user.id, [ExportPackagePT.create]) + from .models import ExportDataSet, ExportDataSetExtension, SchemaMapping + + try: + data = json.loads(request.body) + except (json.JSONDecodeError, ValueError): + return HttpResponseBadRequest(json.dumps({'error': 'Invalid JSON'}), + content_type='application/json') + + exportname = data.get('exportname') + filename = data.get('filename') + coremapping_id = data.get('coremapping_id') + + if not exportname or not filename or coremapping_id is None: + return HttpResponseBadRequest(json.dumps({ + 'error': 'exportname, filename, and coremapping_id are required' + }), content_type='application/json') + + try: + coremapping = SchemaMapping.objects.get(id=coremapping_id) + except SchemaMapping.DoesNotExist: + return HttpResponseBadRequest(json.dumps({ + 'error': f'SchemaMapping {coremapping_id} not found' + }), content_type='application/json') + + # Handle EML upload if provided + metadata_resource = None + eml_xml = data.get('eml_xml') + if eml_xml: + from specifyweb.specify.models import Spappresource, Spappresourcedata, Spappresourcedir + app_dir = Spappresourcedir.objects.filter( + specifyuser=request.specify_user, + collection=request.specify_collection, + ).first() + if app_dir is None: + app_dir = Spappresourcedir.objects.filter( + collection=request.specify_collection, + ).first() + if app_dir is None: + app_dir = Spappresourcedir.objects.first() + resource = Spappresource.objects.create( + name=f'eml_{exportname}', + mimetype='text/xml', + level=0, + specifyuser=request.specify_user, + spappresourcedir=app_dir, + ) + Spappresourcedata.objects.create( + spappresource=resource, + data=eml_xml, + ) + metadata_resource = resource + + dataset = ExportDataSet.objects.create( + exportname=exportname, + filename=filename, + coremapping=coremapping, + collection=request.specify_collection, + isrss=data.get('isrss', False), + frequency=data.get('frequency'), + metadata=metadata_resource, + ) + + # Create extension associations + extension_ids = data.get('extension_ids', []) + for i, ext_id in enumerate(extension_ids): + try: + ext_mapping = SchemaMapping.objects.get(id=ext_id) + ExportDataSetExtension.objects.create( + exportdataset=dataset, + schemamapping=ext_mapping, + sortorder=i, + ) + except SchemaMapping.DoesNotExist: + pass + + return HttpResponse(json.dumps(_dataset_to_json(dataset)), + content_type='application/json') + + +@require_http_methods(['PUT']) +@login_maybe_required +def update_dataset(request, dataset_id): + """Update an ExportDataSet.""" + check_permission_targets(None, request.specify_user.id, [ExportPackagePT.update]) + from .models import ExportDataSet, ExportDataSetExtension, SchemaMapping + + try: + dataset = ExportDataSet.objects.get(id=dataset_id) + except ExportDataSet.DoesNotExist: + raise Http404 + + try: + data = json.loads(request.body) + except (json.JSONDecodeError, ValueError): + return HttpResponseBadRequest(json.dumps({'error': 'Invalid JSON'}), + content_type='application/json') + + if 'exportname' in data: + dataset.exportname = data['exportname'] + if 'filename' in data: + dataset.filename = data['filename'] + if 'isrss' in data: + dataset.isrss = data['isrss'] + if 'frequency' in data: + dataset.frequency = data['frequency'] + if 'coremapping_id' in data: + try: + coremapping = SchemaMapping.objects.get(id=data['coremapping_id']) + dataset.coremapping = coremapping + except SchemaMapping.DoesNotExist: + return HttpResponseBadRequest(json.dumps({ + 'error': f'SchemaMapping {data["coremapping_id"]} not found' + }), content_type='application/json') + + # Handle EML upload/replace + eml_xml = data.get('eml_xml') + if eml_xml: + from specifyweb.specify.models import Spappresource, Spappresourcedata, Spappresourcedir + # Delete old metadata resource if it exists + if dataset.metadata is not None: + Spappresourcedata.objects.filter(spappresource=dataset.metadata).delete() + dataset.metadata.delete() + # Find a valid app resource dir for this user + app_dir = Spappresourcedir.objects.filter( + specifyuser=request.specify_user, + collection=request.specify_collection, + ).first() + if app_dir is None: + app_dir = Spappresourcedir.objects.filter( + collection=request.specify_collection, + ).first() + if app_dir is None: + app_dir = Spappresourcedir.objects.first() + resource = Spappresource.objects.create( + name=f'eml_{dataset.exportname}', + mimetype='text/xml', + level=0, + specifyuser=request.specify_user, + spappresourcedir=app_dir, + ) + Spappresourcedata.objects.create( + spappresource=resource, + data=eml_xml, + ) + dataset.metadata = resource + + dataset.timestampmodified = timezone.now() + dataset.save() + + # Update extensions if provided + if 'extension_ids' in data: + dataset.extensions.all().delete() + for i, ext_id in enumerate(data['extension_ids']): + try: + ext_mapping = SchemaMapping.objects.get(id=ext_id) + ExportDataSetExtension.objects.create( + exportdataset=dataset, + schemamapping=ext_mapping, + sortorder=i, + ) + except SchemaMapping.DoesNotExist: + pass + + return HttpResponse(json.dumps(_dataset_to_json(dataset)), + content_type='application/json') + + +@require_POST +@login_maybe_required +def clone_dataset(request, dataset_id): + """Clone an ExportDataSet.""" + check_permission_targets(None, request.specify_user.id, [ExportPackagePT.create]) + from .models import ExportDataSet, ExportDataSetExtension + + try: + source = ExportDataSet.objects.get(id=dataset_id) + except ExportDataSet.DoesNotExist: + raise Http404 + + clone = ExportDataSet.objects.create( + exportname=f'Copy of {source.exportname}', + filename=f'copy_{source.filename}', + coremapping=source.coremapping, + collection=source.collection, + isrss=source.isrss, + frequency=source.frequency, + ) + + for ext in source.extensions.all(): + ExportDataSetExtension.objects.create( + exportdataset=clone, + schemamapping=ext.schemamapping, + sortorder=ext.sortorder, + ) + + return HttpResponse(json.dumps(_dataset_to_json(clone)), + content_type='application/json') + + +@require_http_methods(['DELETE']) +@login_maybe_required +def delete_dataset(request, dataset_id): + """Delete an ExportDataSet.""" + check_permission_targets(None, request.specify_user.id, [ExportPackagePT.delete]) + from .models import ExportDataSet + + try: + dataset = ExportDataSet.objects.get(id=dataset_id) + except ExportDataSet.DoesNotExist: + raise Http404 + + dataset.delete() + + return HttpResponse(json.dumps({'status': 'ok'}), + content_type='application/json') + + +@require_GET +@login_maybe_required +def preview_eml(request, dataset_id): + """Return the EML metadata for an export dataset as JSON summary.""" + check_permission_targets(None, request.specify_user.id, [ExportPackagePT.read]) + from .models import ExportDataSet + + try: + dataset = ExportDataSet.objects.get(id=dataset_id) + except ExportDataSet.DoesNotExist: + raise Http404 + + if dataset.metadata is None: + return HttpResponse(json.dumps({'hasMetadata': False}), + content_type='application/json') + + from specifyweb.specify.models import Spappresourcedata + data = Spappresourcedata.objects.filter( + spappresource=dataset.metadata + ).first() + if data is None or not data.data: + return HttpResponse(json.dumps({'hasMetadata': False}), + content_type='application/json') + + content = data.data + if isinstance(content, bytes): + content = content.decode('utf-8') + + # Parse key fields for preview + from xml.etree import ElementTree as ET + preview = {'hasMetadata': True, 'raw': content} + try: + ns = {'eml': 'eml://ecoinformatics.org/eml-2.1.1'} + root = ET.fromstring(content) + ds = root.find('dataset', ns) or root.find('dataset') + if ds is not None: + title_el = ds.find('title', ns) or ds.find('title') + preview['title'] = title_el.text if title_el is not None else None + + abstract_el = ds.find('.//abstract/para', ns) or ds.find('.//abstract/para') + preview['abstract'] = abstract_el.text if abstract_el is not None else None + + creator = ds.find('creator', ns) or ds.find('creator') + if creator is not None: + org = creator.find('organizationName', ns) or creator.find('organizationName') + given = creator.find('.//givenName', ns) or creator.find('.//givenName') + sur = creator.find('.//surName', ns) or creator.find('.//surName') + parts = [] + if given is not None and given.text: + parts.append(given.text) + if sur is not None and sur.text: + parts.append(sur.text) + preview['creator'] = ' '.join(parts) if parts else None + preview['organization'] = org.text if org is not None else None + + license_el = ds.find('.//intellectualRights//citetitle', ns) or ds.find('.//intellectualRights//citetitle') + preview['license'] = license_el.text if license_el is not None else None + except Exception: + pass + + return HttpResponse(json.dumps(preview), content_type='application/json') diff --git a/specifyweb/backend/stored_queries/execution.py b/specifyweb/backend/stored_queries/execution.py index e20f4f8b61d..496619ff6ca 100644 --- a/specifyweb/backend/stored_queries/execution.py +++ b/specifyweb/backend/stored_queries/execution.py @@ -71,6 +71,7 @@ class BuildQueryProps(NamedTuple): search_synonymy: bool = False implicit_or: bool = True formatter_props: ObjectFormatterProps = DefaultQueryFormatterProps() + date_format_override: str | None = None def set_group_concat_max_len(connection): @@ -314,6 +315,7 @@ def query_to_csv( distinct=False, delimiter=",", bom=False, + date_format_override=None, ): """Build a sqlalchemy query using the QueryField objects given by field_specs and send the results to a CSV file at the given @@ -328,7 +330,8 @@ def query_to_csv( user, tableid, field_specs, - BuildQueryProps(recordsetid=recordsetid, replace_nulls=True, distinct=distinct), + BuildQueryProps(recordsetid=recordsetid, replace_nulls=True, distinct=distinct, + date_format_override=date_format_override), ) query = apply_special_post_query_processing(query, tableid, field_specs, collection, user, should_list_query=False) @@ -356,7 +359,7 @@ def query_to_csv( ] csv_writer.writerow(encoded) else: - for row in query.yield_per(1): + for row in query.yield_per(2000): if row_filter is not None and not row_filter(row): continue encoded = [ @@ -438,7 +441,7 @@ def query_to_kml( ) documentElement.appendChild(placemarkElement) else: - for row in query.yield_per(1): + for row in query.yield_per(2000): if row_has_geocoords(coord_cols, row): placemarkElement = createPlacemark( kmlDoc, row, coord_cols, table, captions, host @@ -945,14 +948,21 @@ def build_query( else: query_construct_query = session.query(id_field) + formatter = ObjectFormatter( + collection, + user, + props.replace_nulls, + props=props.formatter_props, + ) + if props.date_format_override is not None: + formatter.date_format = props.date_format_override + from .format import MYSQL_TO_YEAR, MYSQL_TO_MONTH + formatter.date_format_year = MYSQL_TO_YEAR.get(props.date_format_override, '%Y') + formatter.date_format_month = MYSQL_TO_MONTH.get(props.date_format_override, '%Y-%m') + query = QueryConstruct( collection=collection, - objectformatter=ObjectFormatter( - collection, - user, - props.replace_nulls, - props=props.formatter_props, - ), + objectformatter=formatter, query=query_construct_query ) diff --git a/specifyweb/backend/stored_queries/format.py b/specifyweb/backend/stored_queries/format.py index 8f6d250b1d5..87bce6586c3 100644 --- a/specifyweb/backend/stored_queries/format.py +++ b/specifyweb/backend/stored_queries/format.py @@ -214,6 +214,14 @@ def make_expr(self, formatter_field_spec.get_field(), new_expr ) + elif specify_field.is_temporal() and do_blank_null: + # When full field formatting is disabled, still apply + # precision-aware date formatting for temporal fields + # in display context (do_blank_null=True). Skip for + # ORDER BY context (do_blank_null=False) so aggregation + # sorts by raw timestamp, not formatted string. + # See: https://github.com/specify/specify7/issues/7376 + new_expr = self._dateformat(specify_field, new_expr) # Helper function to apply only string-ish transforms with no numeric casts def apply_stringish(expr): @@ -240,7 +248,17 @@ def apply_stringish(expr): sep = fieldNodeAttrib.get('sep') if sep is not None: - e = concat(sep, e) + # For relationship fields, blank_nulls converts NULL to ''. + # A plain concat(sep, '') would leak the separator even when + # the related object is absent. Use CASE to suppress the + # separator when the formatted value is empty. See #6406. + if formatter_field_spec.is_relationship(): + e = case( + (cast(e, types.String()) != literal(''), concat(sep, e)), + else_=e + ) + else: + e = concat(sep, e) return e @@ -418,7 +436,8 @@ def _dateformat(self, specify_field, field): if specify_field.type == "java.sql.Timestamp": return func.date_format(field, "%Y-%m-%dT%H:%i:%s") - prec_fld = getattr(field.class_, specify_field.name + 'Precision', None) + field_class = getattr(field, 'class_', None) + prec_fld = getattr(field_class, specify_field.name + 'Precision', None) if field_class is not None else None # format_expr = ( # case( diff --git a/specifyweb/settings/__init__.py b/specifyweb/settings/__init__.py index 9aa9820ee1e..b4bfcc5b7ec 100644 --- a/specifyweb/settings/__init__.py +++ b/specifyweb/settings/__init__.py @@ -88,9 +88,10 @@ DATABASES['default'] = deepcopy(DATABASES[DB_ALIAS]) def get_sa_db_url(db_name): + from urllib.parse import quote_plus return 'mysql://{}:{}@{}:{}/{}?charset=utf8'.format( - MASTER_NAME, - MASTER_PASSWORD, + quote_plus(MASTER_NAME), + quote_plus(MASTER_PASSWORD), DATABASE_HOST, DATABASE_PORT or 3306, db_name)