Source code for climind.data_manager.metadata
# Climate indicator manager - a package for managing and building climate indicator dashboards.
# Copyright (c) 2022 John Kennedy
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
These metadata classes contain all the information about the datasets that are manipulated
by the packages. The :class:`BaseMetadata` class contains much of the functionality, with
:class:`CollectionMetadata` and :class:`.DatasetMetadata` inheriting that functionality and
differing chiefly in the schemas used to validate their contents. The :class:`CombinedMetadata`
class comprises a :class:`CollectionMetadata` object and a :class:`.DatasetMetadata` object.
"""
import json
from pathlib import Path
from jsonschema import validate, RefResolver
from climind.definitions import ROOT_DIR
[docs]
def list_match(list_to_match: list, attribute: str) -> bool:
"""
If attribute matches any item in list_to_match return True, otherwise False
Parameters
----------
list_to_match: list
List of metadata to match
attribute: str
attribute to check against
Returns
-------
bool
Set to True if attribute matches element in list_to_match, False otherwise
"""
return attribute in list_to_match
[docs]
class BaseMetadata:
"""
Simple class to store metadata and find matches. Metadata items can be set and recovered using a
dictionary-like syntax:
`metadata_object['key'] = value`
`value = metadata_object['key']`
And testing if a key-value pair exists is also dict-like
`key in metadata_object`
"""
def __init__(self, metadata: dict):
"""
Create a :class:`BaseMetadata` object from a dictionary containing the metaadata in key-value pairs.
Parameters
----------
metadata : dict
Dictionary containing the metadata
Attributes
----------
metadata: dict
Contains the metadata information in key value pairs
"""
self.metadata = metadata
def __getitem__(self, key):
if key in self.metadata:
return self.metadata[key]
else:
raise KeyError
def __setitem__(self, key, item):
self.metadata[key] = item
def __contains__(self, key):
if key in self.metadata:
return True
else:
return False
def __str__(self):
out_str = ''
for key in self.metadata:
out_str += f"{key}: {str(self[key])}\n"
return out_str
[docs]
def match_metadata(self, metadata_to_match: dict) -> bool:
"""
Check if metadata match contents of dictionary, metadata_to_match. Only
definite non-matches are rejected. If a key is not found in the
dictionary this is not counted as a non-match.
Parameters
----------
metadata_to_match : dict
Key-value or key-list pairs for match
"""
match = True
common_keys = metadata_to_match.keys() & self.metadata.keys()
for key in common_keys:
mtm = metadata_to_match[key]
att = self.metadata[key]
if isinstance(mtm, list):
if not list_match(mtm, att):
match = False
else:
if mtm != att:
match = False
return match
[docs]
def fill_string(self, string_to_replace: str, replacement: str):
"""
Replace string_to_replace with the replacement value in all elements of the metadata. This is
used to replace placeholder substrings like "YYYY" with the year, or "MMMM" with the month, or
"VVVV" with a version number.
Parameters
----------
string_to_replace: str
string to be replaced in metadata elements
replacement: str
replacement string
Returns
-------
None
"""
for key in self.metadata:
item = self.metadata[key]
if isinstance(item, str):
item = item.replace(string_to_replace, replacement)
self.metadata[key] = item
elif isinstance(item, list):
replacement_list = []
for entry in item:
entry = entry.replace(string_to_replace, replacement)
replacement_list.append(entry)
self.metadata[key] = replacement_list
[docs]
class CollectionMetadata(BaseMetadata):
"""
Class to store collection-level metadata, containing information that refers to all
data sets in the collection.
"""
def __init__(self, metadata: dict):
"""
Create :class:`CollectionMetadata` from a dictionary containing metadata. Metadata are
validated using the metadata_schema.json file.
Parameters
----------
metadata: dict
Dictionary containing metadata in key value pairs.
"""
schema_path = Path(ROOT_DIR) / 'climind' / 'data_manager' / 'metadata_schema.json'
with open(schema_path) as f:
metadata_schema = json.load(f)
resolver = RefResolver(schema_path.as_uri(), metadata_schema)
validate(metadata, metadata_schema, resolver=resolver)
super().__init__(metadata)
[docs]
class DatasetMetadata(BaseMetadata):
"""
Class to store dataset-level metadata, containing information that refers specifically
to a single data set.
"""
def __init__(self, metadata: dict):
"""
Create :class:`DatasetMetadata` from a dictionary containing metadata. Metadata are
validated using the dataset_schema.json file.
Parameters
----------
metadata: dict
Dictionary containing metadata in key value pairs.
"""
schema_path = Path(ROOT_DIR) / 'climind' / 'data_manager' / 'dataset_schema.json'
with open(schema_path) as f:
metadata_schema = json.load(f)
validate(metadata, metadata_schema)
super().__init__(metadata)
[docs]
def creation_message(self) -> None:
"""
Add creation message to the history.
Returns
-------
None
"""
download_message = f"Data set created from file {self.metadata['filename']} " \
f"downloaded from {self.metadata['url']} " \
f"at {self.metadata['last_modified']}"
self.metadata['history'].append(download_message)
[docs]
class CombinedMetadata:
"""
:class:`CombinedMetadata` combines :class:`DatasetMetadata` and :class:`CollectionMetadata` in one single
object so that both sets of metadata elements are available in one container.
"""
def __init__(self, dataset: DatasetMetadata, collection: CollectionMetadata):
self.dataset = dataset
self.collection = collection
def __getitem__(self, key):
if key in self.dataset:
return self.dataset[key]
elif key in self.collection:
return self.collection[key]
else:
raise KeyError
def __setitem__(self, key, value):
if key in self.dataset:
self.dataset[key] = value
elif key in self.collection:
self.collection[key] = value
else:
raise KeyError
def __contains__(self, key):
if key in self.dataset:
return True
elif key in self.collection:
return True
else:
return False
def __str__(self):
outstr = ''
outstr += str(self.collection)
outstr += '\n'
outstr += str(self.dataset)
outstr += '\n'
return outstr
[docs]
def match_metadata(self, metadata_to_match: dict) -> bool:
"""
Test to see if metadata matches metadata to match. Returns True unless there is a mismatch
between the required metadata_to_match and the metadata.
Parameters
----------
metadata_to_match: dict
Dictionary of metadata terms to match
Returns
-------
bool
Return True unless an element in metadata_to_match conflicts with an entry in the metadata
"""
test1 = self.collection.match_metadata(metadata_to_match)
test2 = self.dataset.match_metadata(metadata_to_match)
if test1 and test2:
return True
else:
return False
[docs]
def write_metadata(self, filename: Path) -> None:
"""
Write out the metadata in json format to a file specified by filename
Parameters
----------
filename: Path
Path of filename to be created
Returns
-------
None
"""
rebuilt = self.collection.metadata
rebuilt['datasets'] = [self.dataset.metadata]
schema_path = Path(ROOT_DIR) / 'climind' / 'data_manager' / 'metadata_schema.json'
with open(schema_path) as f:
metadata_schema = json.load(f)
resolver = RefResolver(schema_path.as_uri(), metadata_schema)
validate(rebuilt, metadata_schema, resolver=resolver)
with open(filename, 'w') as out_json:
json.dump(rebuilt, out_json, indent=4)
[docs]
def creation_message(self) -> None:
"""
Add a creation message to the dataset history and populate the wildcards in the metadata,
such as AAAA (last modified/download time), YYYY (year), VVVV (version number).
Returns
-------
None
"""
self.dataset.creation_message()
last_modified = self['last_modified'][0]
self.dataset.fill_string('AAAA', last_modified)
self.collection.fill_string('AAAA', last_modified)
last_year = last_modified[0:4]
self.dataset.fill_string('YYYY', last_year)
self.collection.fill_string('YYYY', last_year)
version = self['version']
self.dataset.fill_string('VVVV', version)
self.collection.fill_string('VVVV', version)