Source code for climind.readers.generic_reader

#  Climate indicator manager - a package for managing and building climate indicator dashboards.
#  Copyright (c) 2022 John Kennedy
#
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.

from pathlib import Path
from datetime import datetime
from typing import Union, Optional
import copy
from climind.data_manager.metadata import CombinedMetadata
from climind.data_types.timeseries import TimeSeriesAnnual, TimeSeriesMonthly
from climind.data_types.grid import GridMonthly


[docs] def get_reader_script_name(metadata: CombinedMetadata, **kwargs) -> Optional[str]: """ Get the name of the reader function for the provided metadata combination Parameters ---------- metadata: CombinedMetadata contains the metadata required to chose the reader script kwargs: list of keyword arguments Returns ------- Optional[str] Returns the name of the reader function that will read that combination of metadata, or None """ chosen_reader_script = None if metadata['time_resolution'] not in ['annual', 'monthly', 'irregular']: return chosen_reader_script if metadata['type'] == 'timeseries': chosen_reader_script = f"read_{metadata['time_resolution']}_ts" elif metadata['type'] == 'gridded': chosen_reader_script = f"read_{metadata['time_resolution']}_grid" if 'grid_resolution' in kwargs: if kwargs['grid_resolution'] == 5: chosen_reader_script = 'read_monthly_5x5_grid' if kwargs['grid_resolution'] == 1: chosen_reader_script = 'read_monthly_1x1_grid' return chosen_reader_script
[docs] def get_module(package_name: str, script_name: str): """ Get the module from the package name and the script name Parameters ---------- package_name: str String containing the package path as a dot separated string script_name: str Name of the script to import Returns ------- module Returns module specified by the package name and script name """ ext = '.'.join([package_name, script_name]) module = __import__(ext, fromlist=[None]) return module
[docs] def get_last_modified_time(file: Path) -> Optional[str]: """ Get the update time of file if it exists, else None Parameters ---------- file: Path File path of the file for which the last modified time is required Returns ------- Optional[str] string containing last updated time for the file or None if it does not exist """ last_updated = None if file.exists(): last_updated = file.stat().st_mtime last_updated = datetime.fromtimestamp(last_updated).strftime("%Y-%m-%d %H:%M:%S") return last_updated
[docs] def read_ts(out_dir: Path, metadata: CombinedMetadata, **kwargs) -> Union[ TimeSeriesMonthly, TimeSeriesAnnual, GridMonthly]: """ Generic reader for the data sets. This works out which reader is needed, imports and runs it. If a particular reader is not available (e.g. because the data is only a timeseries and not a grid) then it raises a not implemented error. Parameters ---------- out_dir: Path Path of the directory in which the data are to be found metadata: CombinedMetadata Metadata describing the required dataset kwargs: dict Optional arguments as required for particular data sets Returns ------- Union[TimeSeriesMonthly, TimeSeriesAnnual, GridMonthly] Returns a TimeSeries or Grid of some kind """ script_name = metadata['reader'] module = get_module('climind.readers', script_name) filename = [] last_modified_times = [] for name in metadata['filename']: file = out_dir / name filename.append(file) last_modified_times.append(get_last_modified_time(file)) construction_metadata = copy.deepcopy(metadata) construction_metadata.dataset['last_modified'] = last_modified_times chosen_reader_script = get_reader_script_name(metadata, **kwargs) if chosen_reader_script is None: raise RuntimeError("Reader does not exist for this combination of metadata") if hasattr(module, chosen_reader_script): return getattr(module, chosen_reader_script)(filename, construction_metadata, **kwargs) else: raise NotImplementedError(f"Reader {chosen_reader_script} not implemented for this data set {metadata['name']}")