Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions datajoint/external.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,13 +276,25 @@ def upload_filepath(self, local_filepath):
uuid = uuid_from_buffer(
init_string=relative_filepath
) # hash relative path, not contents
contents_hash = uuid_from_file(local_filepath)

# Check if checksum should be skipped based on file size limit
file_size = Path(local_filepath).stat().st_size
size_limit = config.get("filepath_checksum_size_limit_insert")
skip_checksum = size_limit is not None and file_size > size_limit

if skip_checksum:
contents_hash = None
logger.warning(
f"Skipping checksum for '{relative_filepath}' ({file_size} bytes > {size_limit} byte limit)"
)
else:
contents_hash = uuid_from_file(local_filepath)

# check if the remote file already exists and verify that it matches
check_hash = (self & {"hash": uuid}).fetch("contents_hash")
if check_hash.size:
# the tracking entry exists, check that it's the same file as before
if contents_hash != check_hash[0]:
if not skip_checksum and contents_hash != check_hash[0]:
raise DataJointError(
f"A different version of '{relative_filepath}' has already been placed."
)
Expand All @@ -291,15 +303,15 @@ def upload_filepath(self, local_filepath):
self._upload_file(
local_filepath,
self._make_external_filepath(relative_filepath),
metadata={"contents_hash": str(contents_hash)},
metadata={"contents_hash": str(contents_hash) if contents_hash else ""},
)
self.connection.query(
"INSERT INTO {tab} (hash, size, filepath, contents_hash) VALUES (%s, {size}, '{filepath}', %s)".format(
tab=self.full_table_name,
size=Path(local_filepath).stat().st_size,
size=file_size,
filepath=relative_filepath,
),
args=(uuid.bytes, contents_hash.bytes),
args=(uuid.bytes, contents_hash.bytes if contents_hash else None),
)
return uuid

Expand Down
5 changes: 4 additions & 1 deletion datajoint/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,11 @@
"database.use_tls": None,
"enable_python_native_blobs": True, # python-native/dj0 encoding support
"add_hidden_timestamp": False,
# file size limit for when to disable checksums
# file size limits for when to disable checksums (in bytes)
# filepath_checksum_size_limit: skip checksum verification on fetch for large files
"filepath_checksum_size_limit": None,
# filepath_checksum_size_limit_insert: skip checksum computation on insert for large files
"filepath_checksum_size_limit_insert": None,
}
)

Expand Down
Loading