Source code for climind.fetchers.fetcher_jra3q_grid

#  Climate indicator manager - a package for managing and building climate indicator dashboards.
#  Copyright (c) 2022 John Kennedy
#
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""
Set of scripts to download the JRA-55 gridded data. Adapted from the scripts
provided by UCAR. Data are stored by year up till a certain point and by
month for near-real time data thereafter. Credentials are needed (see fetch function)
"""
import itertools
from pathlib import Path
import sys
import os
import requests
from dotenv import load_dotenv
from typing import List
import xarray as xa
from climind.config.config import DATA_DIR
from urllib.request import build_opener



[docs]
def make_realtime_file_list(first_year: int, final_year: int) -> List[str]:
    """
    Make a list of monthly real-time filenames between the two specified years.

    Parameters
    ----------
    first_year: int
        Year to start generation
    final_year: int
        Year to end generation

    Returns
    -------
    List[str]
        List of filenames for real-time data between the specified years
    """
    filelist = []
    for year, month in itertools.product(range(first_year, final_year + 1), range(1, 13)):
        filelist.append(f'anl_surf125/{year}{month:02d}/anl_surf125.{year}{month:02d}')
    return filelist




[docs]
def make_file_list(first_year, final_year) -> List[str]:
    """
    Make a list of annual archived filenames between the two specified years.

    Parameters
    ----------
    first_year: int
        Year to start generation
    final_year: int
        Year to end generation

    Returns
    -------
    List[str]
        List of filenames for archived data between the specified years
    """
    filelist = []
    month_lengths = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
    for year, month in itertools.product(range(first_year, final_year + 1), range(1, 13)):
        month_length = month_lengths[month - 1]
        if month == 2 and year in [1948, 1952, 1956, 1960, 1964, 1968, 1972, 1976, 1980, 1984, 1988, 1992, 1996, 2000, 2004, 2008, 2012, 2016, 2020]:
            month_length = 29
        filelist.append(
            f'anl_surf125/{year}{month:02d}/jra3q.anl_surf125.0_0_0.tmp2m-hgt-an-ll125.{year}{month:02d}0100_{year}{month:02d}{month_length:02d}18.nc')
    return filelist




[docs]
def process_file(file_base: str) -> None:
    """
    Read in the monthly files and take a time mean

    Parameters
    ----------
    file_base: filename of file to be processed

    Returns
    -------
    None
    """
    if Path(file_base).exists():
        ds = xa.open_dataset(file_base)
        ds = ds.mean('time')
        ds.encoding = {'time':'ignore'}
        ds.to_netcdf(file_base)




[docs]
def download_file(filename: str, file_base: str, process: bool) -> None:
    """
    Download a file.

    Parameters
    ----------
    filename: str
        URL of the file to be downloaded
    file_base: str
        Name of the output file to which the data will be written

    Returns
    -------
    None
    """
    opener = build_opener()
    ofile = file_base
    infile = opener.open(filename)
    with open(ofile, "wb") as outfile:
        outfile.write(infile.read())
        outfile.close()

    if process:
        process_file(file_base)




[docs]
def get_files(filelist: List[str], web_path: str, process: bool = False, output_filelist=None) -> None:
    """
    For each file in a file list, check if it already exists on the system and if it does
    not, attempt to download it.

    Parameters
    ----------
    filelist: List[str]
        List of files to be downloaded
    web_path: str
        URL of the directory that contains the files.

    Returns
    -------
    None
    """
    for i, file in enumerate(filelist):
        filename = web_path + file
        if output_filelist is None:
            file_base = DATA_DIR / "ManagedData" / "Data" / "JRA-3Q" / os.path.basename(file)
        else:
            file_base = DATA_DIR / "ManagedData" / "Data" / "JRA-3Q" / os.path.basename(output_filelist[i])

        if file_base.exists():
            print(f"File already downloaded {file_base}")
        else:
            print(f'Downloading {filename} to {file_base}')
            try:
                download_file(filename, file_base, process)
            except Exception as e:
                print(f'Failed to download or process {filename} to {file_base}. Exception: {e}')




[docs]
def fetch(_, out_dir: Path, _filename) -> None:
    """
    Get JRA-55 files from UCAR. Requires the credentials:

    * username, specified by entry in .env UCAR_USER
    * password, specified by entry in .env UCAR_PSWD


    Parameters
    ----------
    _:
        dummy input to match interface.
    out_dir: Path
        Path of the directory to which the output will be written.
    _filename: str
        Unused filename argument

    Returns
    -------
    None
    """
    # Real time
    web_path = 'https://data-osdf.rda.ucar.edu/ncar/rda/d640003/'
    new_web_path = "https://osdf-director.osg-htc.org/ncar/gdex/d640003/"
    filelist = make_realtime_file_list(2022, 2025)
    get_files(filelist, new_web_path, process=False)  # These are already monthly, so don't process

    # Archive
    web_path = 'https://data.rda.ucar.edu/ds640.0/'
    new_web_path = 'https://amst-fiona.nationalresearchplatform.org:8443/ncar/gdex/d640000/'
    filelist = make_file_list(1948, 2021)
    output_filelist = make_realtime_file_list(1948, 2021)
    output_filelist = [i+'.nc' for i in output_filelist]
    get_files(filelist, new_web_path, process=True, output_filelist=output_filelist)  # These are not monthly, so process