diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 54fdadf..af3f7d4 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7, 3.8, 3.9] + python-version: ["3.10", 3.11, 3.12, 3.13, 3.14] # versions in active or security support as of 18/12/25 fail-fast: false steps: diff --git a/pyscicat/client.py b/pyscicat/client.py index e3ab48d..773c071 100644 --- a/pyscicat/client.py +++ b/pyscicat/client.py @@ -19,13 +19,10 @@ RawDataset, DerivedDataset, UpdateDataset, - UpdateRawDataset, - UpdateDerivedDataset, Instrument, OrigDatablock, Proposal, Sample, - Ownable, MongoQueryable, ) @@ -101,11 +98,10 @@ def __init__( raise ScicatCommError("Token not provided") self._headers["Authorization"] = "Bearer {}".format(self._token) - def _send_to_scicat( - self, - cmd: str, - endpoint: str, + self, + cmd: str, + endpoint: str, data: BaseModel = None, exclude_fields: set = {}, ): @@ -114,7 +110,11 @@ def _send_to_scicat( return requests.request( method=cmd, url=urljoin(self._base_url, endpoint), - json=data.dict(exclude=exclude_fields,exclude_none=True,exclude_unset=True) if data is not None else None, + json=data.dict( + exclude=exclude_fields, + exclude_none=True, + exclude_unset=True + ) if data is not None else None, params={"access_token": self._token}, headers=self._headers, timeout=self._timeout_seconds, @@ -122,7 +122,6 @@ def _send_to_scicat( verify=True, ) - def _call_endpoint( self, cmd: str, @@ -133,8 +132,6 @@ def _call_endpoint( exclude_fields: set = {}, ) -> Optional[dict]: response = self._send_to_scicat(cmd=cmd, endpoint=endpoint, data=data, exclude_fields=exclude_fields) - #print(response) - #print(response.text) if not response.ok: result = response.json() err = result.get("error", {}) @@ -218,9 +215,9 @@ def datasets_create(self, dataset: Dataset) -> str: Raises if a non-20x message is returned """ return self._call_endpoint( - cmd="post", - endpoint="Datasets", - data=dataset, + cmd="post", + endpoint="Datasets", + data=dataset, operation="datasets_create", exclude_fields=self._exclude_fields['default'], ) @@ -405,7 +402,7 @@ def datasets_origdatablock_create(self, origdatablock: OrigDatablock) -> dict: endpoint=endpoint, data=origdatablock, operation="datasets_origdatablock_create", - exclude_fields={'id','datasetId','ownerGroup','accessGroups'}, + exclude_fields={'id', 'datasetId', 'ownerGroup', 'accessGroups'}, ) """ @@ -708,7 +705,7 @@ def datasets_find( find_datasets_full_query = datasets_find def datasets_get_many( - self, + self, full_filter: Optional[dict] = None, filter_fields: Optional[dict] = None, where: Optional[dict] = None, @@ -759,10 +756,10 @@ def datasets_get_many( - skip : number indicating how many items needs to be skipped in the beginning of the list - order : enumeration (ascending or descending) indicating the order of the results """ - - #if not filter_fields: - # filter_fields = {} - #filter_fields = json.dumps(filter_fields) + + # if not filter_fields: + # filter_fields = {} + # filter_fields = json.dumps(filter_fields) filter_dict = {} if full_filter: @@ -772,20 +769,19 @@ def datasets_get_many( filter_dict['where'] = where elif filter_fields: filter_dict['where'] = filter_fields - + if fields: filter_dict['fields'] = fields - + if limits: filter_dict['limits'] = limits filter_string = json.dumps(filter_dict) if filter_dict else "" endpoint = 'Datasets' + f'?filter={filter_string}' if filter_string else "" - #print(endpoint) return self._call_endpoint( - cmd="get", - endpoint=endpoint, - operation="datasets_get_many", + cmd="get", + endpoint=endpoint, + operation="datasets_get_many", allow_404=True ) @@ -1012,7 +1008,7 @@ def origdatablocks_create(self, origdatablock: OrigDatablock) -> dict: Raises if a non-20x message is returned """ - endpoint = f"origdatablocks" + endpoint = "origdatablocks" return self._call_endpoint( cmd="post", endpoint=endpoint, diff --git a/pyscicat/model.py b/pyscicat/model.py index a551b73..49b65bc 100644 --- a/pyscicat/model.py +++ b/pyscicat/model.py @@ -4,16 +4,19 @@ from typing import List, Dict, Optional from pydantic import BaseModel -from pydantic.main import ModelMetaclass +from pydantic._internal._model_construction import ModelMetaclass -# # creates update models where all the fields are optional +# Might want to consider alternative approaches for better maintainability: +# https://github.com/pydantic/pydantic/issues/6381 + + class _AllOptional(ModelMetaclass): def __new__(self, name, bases, namespaces, **kwargs): annotations = namespaces.get('__annotations__', {}) for base in bases: - #annotations.update(base.__annotations__) + # annotations.update(base.__annotations__) for base_ in base.__mro__: if base_ is BaseModel: break @@ -26,6 +29,7 @@ def __new__(self, name, bases, namespaces, **kwargs): namespaces['__annotations__'] = annotations return super().__new__(self, name, bases, namespaces, **kwargs) + class DatasetType(str, enum.Enum): """type of Dataset""" @@ -36,18 +40,18 @@ class DatasetType(str, enum.Enum): class MongoQueryable(BaseModel): """Many objects in SciCat are mongo queryable""" - createdBy: Optional[str] - updatedBy: Optional[str] - updatedAt: Optional[str] - createdAt: Optional[str] + createdBy: Optional[str] = None + updatedBy: Optional[str] = None + updatedAt: Optional[str] = None + createdAt: Optional[str] = None class Ownable(MongoQueryable): """Many objects in SciCat are ownable""" ownerGroup: str - accessGroups: Optional[List[str]] - instrumentGroup: Optional[str] + accessGroups: Optional[List[str]] = None + instrumentGroup: Optional[str] = None class User(BaseModel): @@ -67,19 +71,19 @@ class Proposal(Ownable): """ proposalId: str - pi_email: Optional[str] - pi_firstname: Optional[str] - pi_lastname: Optional[str] email: str - firstname: Optional[str] - lastname: Optional[str] - title: Optional[str] # required in next backend version - abstract: Optional[str] - startTime: Optional[str] - endTime: Optional[str] + pi_email: Optional[str] = None + pi_firstname: Optional[str] = None + pi_lastname: Optional[str] = None + firstname: Optional[str] = None + lastname: Optional[str] = None + title: Optional[str] = None # required in next backend version + abstract: Optional[str] = None + startTime: Optional[str] = None + endTime: Optional[str] = None MeasurementPeriodList: Optional[ List[dict] - ] # may need updating with the measurement period model + ] = None # may need updating with the measurement period model class Sample(Ownable): @@ -88,10 +92,10 @@ class Sample(Ownable): Raw datasets should be linked to such sample definitions. """ - sampleId: Optional[str] - owner: Optional[str] - description: Optional[str] - sampleCharacteristics: Optional[dict] + sampleId: Optional[str] = None + owner: Optional[str] = None + description: Optional[str] = None + sampleCharacteristics: Optional[dict] = None isPublished: bool = False @@ -103,15 +107,15 @@ class Job(MongoQueryable): track of analysis jobs e.g. for automated analysis workflows """ - id: Optional[str] emailJobInitiator: str type: str - creationTime: Optional[str] # not sure yet which ones are optional or not. - executionTime: Optional[str] - jobParams: Optional[dict] - jobStatusMessage: Optional[str] - datasetList: Optional[dict] # documentation says dict, but should maybe be list? - jobResultObject: Optional[dict] # ibid. + id: Optional[str] = None + creationTime: Optional[str] = None # not sure yet which ones are optional or not. + executionTime: Optional[str] = None + jobParams: Optional[dict] = None + jobStatusMessage: Optional[str] = None + datasetList: Optional[dict] = None # documentation says dict, but should maybe be list? + jobResultObject: Optional[dict] = None # ibid. class Instrument(MongoQueryable): @@ -119,10 +123,10 @@ class Instrument(MongoQueryable): Instrument class, most of this is flexibly definable in customMetadata """ - pid: Optional[str] uniqueName: str name: str - customMetadata: Optional[dict] + pid: Optional[str] = None + customMetadata: Optional[dict] = None class Dataset(Ownable): @@ -130,48 +134,51 @@ class Dataset(Ownable): A dataset in SciCat, base class for derived and raw datasets """ - pid: Optional[str] - classification: Optional[str] contactEmail: str creationTime: str # datetime - datasetName: Optional[str] - description: Optional[str] - history: Optional[List[dict]] # list of foreigh key ids to the Messages table - instrumentId: Optional[str] - isPublished: Optional[bool] = False - keywords: Optional[List[str]] - license: Optional[str] - numberOfFiles: Optional[int] - numberOfFilesArchived: Optional[int] - orcidOfOwner: Optional[str] - packedSize: Optional[int] owner: str - ownerEmail: Optional[str] - sharedWith: Optional[List[str]] - size: Optional[int] sourceFolder: str - sourceFolderHost: Optional[str] - techniques: Optional[List[dict]] # with {'pid':pid, 'name': name} as entries type: DatasetType - validationStatus: Optional[str] - version: Optional[str] - scientificMetadata: Optional[Dict] + pid: Optional[str] = None + classification: Optional[str] = None + datasetName: Optional[str] = None + description: Optional[str] = None + history: Optional[List[dict]] = None # list of foreigh key ids to the Messages table + instrumentId: Optional[str] = None + isPublished: Optional[bool] = False + keywords: Optional[List[str]] = None + license: Optional[str] = None + numberOfFiles: Optional[int] = None + numberOfFilesArchived: Optional[int] = None + orcidOfOwner: Optional[str] = None + packedSize: Optional[int] = None + ownerEmail: Optional[str] = None + sharedWith: Optional[List[str]] = None + size: Optional[int] = None + sourceFolderHost: Optional[str] = None + techniques: Optional[List[dict]] = None # with {'pid':pid, 'name': name} as entries + validationStatus: Optional[str] = None + version: Optional[str] = None + scientificMetadata: Optional[Dict] = None + class UpdateDataset(Dataset, metaclass=_AllOptional): pass + class RawDataset(Dataset): """ Raw datasets from which derived datasets are... derived. """ - principalInvestigator: Optional[str] - creationLocation: Optional[str] type: DatasetType = DatasetType.raw - dataFormat: Optional[str] - endTime: Optional[str] # datetime - sampleId: Optional[str] - proposalId: Optional[str] + principalInvestigator: Optional[str] = None + creationLocation: Optional[str] = None + dataFormat: Optional[str] = None + endTime: Optional[str] = None # datetime + sampleId: Optional[str] = None + proposalId: Optional[str] = None + class UpdateRawDataset(Dataset, metaclass=_AllOptional): pass @@ -185,13 +192,15 @@ class DerivedDataset(Dataset): investigator: str inputDatasets: List[str] usedSoftware: List[str] - jobParameters: Optional[dict] - jobLogData: Optional[str] type: DatasetType = DatasetType.derived + jobParameters: Optional[dict] = None + jobLogData: Optional[str] = None + class UpdateDerivedDataset(DerivedDataset, metaclass=_AllOptional): pass + class DataFile(MongoQueryable): """ A reference to a file in SciCat. Path is relative @@ -201,8 +210,8 @@ class DataFile(MongoQueryable): path: str size: int - time: Optional[str] - chk: Optional[str] + time: Optional[str] = None + chk: Optional[str] = None uid: Optional[str] = None gid: Optional[str] = None perm: Optional[str] = None @@ -213,15 +222,15 @@ class Datablock(Ownable): A Datablock maps between a Dataset and contains DataFiles """ - id: Optional[str] # archiveId: str = None listed in catamel model, but comes back invalid? size: int - packedSize: Optional[int] - chkAlg: Optional[int] version: str = None - instrumentGroup: Optional[str] dataFileList: List[DataFile] datasetId: str + id: Optional[str] = None + packedSize: Optional[int] = None + chkAlg: Optional[int] = None + instrumentGroup: Optional[str] = None class OrigDatablock(Ownable): @@ -229,12 +238,12 @@ class OrigDatablock(Ownable): An Original Datablock maps between a Dataset and contains DataFiles """ - id: Optional[str] - # archiveId: str = None listed in catamel model, but comes back invalid? size: int chkAlg: str dataFileList: List[DataFile] datasetId: str + id: Optional[str] = None + # archiveId: str = None listed in catamel model, but comes back invalid? class Attachment(Ownable): @@ -242,10 +251,10 @@ class Attachment(Ownable): Attachments can be any base64 encoded string...thumbnails are attachments """ - id: Optional[str] thumbnail: str - caption: Optional[str] datasetId: str + id: Optional[str] = None + caption: Optional[str] = None class PublishedData: @@ -259,18 +268,18 @@ class PublishedData: publisher: str publicationYear: int title: str - url: Optional[str] abstract: str dataDescription: str resourceType: str - numberOfFiles: Optional[int] - sizeOfArchive: Optional[int] pidArray: List[str] authors: List[str] registeredTime: str status: str - thumbnail: Optional[str] createdBy: str updatedBy: str createdAt: str updatedAt: str + url: Optional[str] = None + numberOfFiles: Optional[int] = None + sizeOfArchive: Optional[int] = None + thumbnail: Optional[str] = None diff --git a/pyscicat/tests/test_client.py b/pyscicat/tests/test_client.py index 6d036f0..e77d080 100644 --- a/pyscicat/tests/test_client.py +++ b/pyscicat/tests/test_client.py @@ -32,7 +32,7 @@ def add_mock_requests(mock_request): json={"id": "a_token"}, ) - mock_request.post(local_url + "Instruments", json={"pid": "earth"}) + mock_request.post(local_url + "Instruments", json={"uniqueName": "magictelescope", "pid": "earth"}) mock_request.post(local_url + "Proposals", json={"proposalId": "deepthought"}) mock_request.post(local_url + "Samples", json={"sampleId": "gargleblaster"}) mock_request.patch(local_url + "Instruments/earth", json={"pid": "earth"}) @@ -82,11 +82,19 @@ def test_scicat_ingest(): # Instrument instrument = Instrument( - pid="earth", name="Earth", customMetadata={"a": "field"} + uniqueName="magictelescope", pid="earth", name="Earth", customMetadata={"a": "field"} ) - assert scicat.upload_instrument(instrument) == "earth" - assert scicat.instruments_create(instrument) == "earth" - assert scicat.instruments_update(instrument) == "earth" + assert scicat.upload_instrument(instrument) == { + "uniqueName": "magictelescope", + "pid": "earth", + } + assert scicat.instruments_create(instrument) == { + "uniqueName": "magictelescope", + "pid": "earth", + } + assert scicat.instruments_update(instrument) == { + "pid": "earth", + } # Proposal proposal = Proposal( @@ -95,9 +103,9 @@ def test_scicat_ingest(): email="deepthought@viltvodle.com", **ownable.dict() ) - assert scicat.upload_proposal(proposal) == "deepthought" - assert scicat.proposals_create(proposal) == "deepthought" - assert scicat.proposals_update(proposal) == "deepthought" + assert scicat.upload_proposal(proposal) == {"proposalId": "deepthought"} + assert scicat.proposals_create(proposal) == {"proposalId": "deepthought"} + assert scicat.proposals_update(proposal) == {"proposalId": "deepthought"} # Sample sample = Sample( @@ -106,9 +114,9 @@ def test_scicat_ingest(): sampleCharacteristics={"a": "field"}, **ownable.dict() ) - assert scicat.upload_sample(sample) == "gargleblaster" - assert scicat.samples_create(sample) == "gargleblaster" - assert scicat.samples_update(sample) == "gargleblaster" + assert scicat.upload_sample(sample) == {"sampleId": "gargleblaster"} + assert scicat.samples_create(sample) == {"sampleId": "gargleblaster"} + assert scicat.samples_update(sample) == {"sampleId": "gargleblaster"} # RawDataset dataset = RawDataset( @@ -129,19 +137,19 @@ def test_scicat_ingest(): **ownable.dict() ) dataset_id = scicat.upload_raw_dataset(dataset) - assert dataset_id == "42" + assert dataset_id == {"pid": "42"} # Update record dataset.principalInvestigator = "B. Turtle" - dataset_id_2 = scicat.update_dataset(dataset, dataset_id) + dataset_id_2 = scicat.update_dataset(dataset, dataset_id["pid"]) assert dataset_id_2 == dataset_id # Datablock with DataFiles data_file = DataFile(path="/foo/bar", size=42) data_block = Datablock( size=42, - version=1, - datasetId=dataset_id, + version="1", + datasetId=dataset_id["pid"], dataFileList=[data_file], **ownable.dict() ) @@ -149,7 +157,7 @@ def test_scicat_ingest(): # Attachment attachment = Attachment( - datasetId=dataset_id, + datasetId=dataset_id["pid"], thumbnail=encode_thumbnail(thumb_path), caption="scattering image", **ownable.dict() diff --git a/pyscicat/tests/test_suite_2.py b/pyscicat/tests/test_suite_2.py index 8384dfa..271c930 100644 --- a/pyscicat/tests/test_suite_2.py +++ b/pyscicat/tests/test_suite_2.py @@ -25,7 +25,7 @@ def set_up_test_environment(mock_request): - global test_datasets + # global test_datasets # load test data for name, path in test_dataset_files.items(): @@ -113,15 +113,16 @@ def test_scicat_ingest_raw_dataset(): dataset = RawDataset(**data["dataset"], **ownable.dict()) created_dataset_pid = scicat.create_dataset(dataset) - assert created_dataset_pid == data["id"] + assert created_dataset_pid == data["dataset"] | {"pid": data["id"]} # origDatablock with DataFiles origDataBlock = OrigDatablock( size=data["orig_datablock"]["size"], - datasetId=created_dataset_pid, + datasetId=created_dataset_pid["pid"], dataFileList=[ DataFile(**file) for file in data["orig_datablock"]["dataFileList"] ], + chkAlg="sha2", **ownable.dict() ) created_origdatablock = scicat.create_dataset_origdatablock(origDataBlock) @@ -149,15 +150,16 @@ def test_scicat_ingest_derived_dataset(): dataset = RawDataset(**data["dataset"], **ownable.dict()) created_dataset_pid = scicat.create_dataset(dataset) - assert created_dataset_pid == data["id"] + assert created_dataset_pid == data["dataset"] | {"pid": data["id"]} # origDatablock with DataFiles origDataBlock = OrigDatablock( size=data["orig_datablock"]["size"], - datasetId=created_dataset_pid, + datasetId=created_dataset_pid["pid"], dataFileList=[ DataFile(**file) for file in data["orig_datablock"]["dataFileList"] ], + chkAlg="sha2", **ownable.dict() ) created_origdatablock = scicat.create_dataset_origdatablock(origDataBlock) diff --git a/requirements.txt b/requirements.txt index 903705e..9916944 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -pydantic +pydantic >= 2.0.0 requests diff --git a/versioneer.py b/versioneer.py index 2b54540..8b24e23 100644 --- a/versioneer.py +++ b/versioneer.py @@ -343,9 +343,9 @@ def get_config_from_root(root): # configparser.NoOptionError (if it lacks "VCS="). See the docstring at # the top of versioneer.py for instructions on writing your setup.cfg . setup_cfg = os.path.join(root, "setup.cfg") - parser = configparser.SafeConfigParser() + parser = configparser.ConfigParser() with open(setup_cfg, "r") as f: - parser.readfp(f) + parser.read_file(f) VCS = parser.get("versioneer", "VCS") # mandatory def get(parser, name):