Source code for climind.fetchers.fetcher_jra3q_grid

#  Climate indicator manager - a package for managing and building climate indicator dashboards.
#  Copyright (c) 2022 John Kennedy
#
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""
Set of scripts to download the JRA-55 gridded data. Adapted from the scripts
provided by UCAR. Data are stored by year up till a certain point and by
month for near-real time data thereafter. Credentials are needed (see fetch function)
"""
import itertools
from pathlib import Path
import sys
import os
import requests
from dotenv import load_dotenv
from typing import List
import xarray as xa
from climind.config.config import DATA_DIR
from urllib.request import build_opener


[docs] def make_realtime_file_list(first_year: int, final_year: int) -> List[str]: """ Make a list of monthly real-time filenames between the two specified years. Parameters ---------- first_year: int Year to start generation final_year: int Year to end generation Returns ------- List[str] List of filenames for real-time data between the specified years """ filelist = [] for year, month in itertools.product(range(first_year, final_year + 1), range(1, 13)): filelist.append(f'anl_surf125/{year}{month:02d}/anl_surf125.{year}{month:02d}') return filelist
[docs] def make_file_list(first_year, final_year) -> List[str]: """ Make a list of annual archived filenames between the two specified years. Parameters ---------- first_year: int Year to start generation final_year: int Year to end generation Returns ------- List[str] List of filenames for archived data between the specified years """ filelist = [] month_lengths = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] for year, month in itertools.product(range(first_year, final_year + 1), range(1, 13)): month_length = month_lengths[month - 1] if month == 2 and year in [1948, 1952, 1956, 1960, 1964, 1968, 1972, 1976, 1980, 1984, 1988, 1992, 1996, 2000, 2004, 2008, 2012, 2016, 2020]: month_length = 29 filelist.append( f'anl_surf125/{year}{month:02d}/jra3q.anl_surf125.0_0_0.tmp2m-hgt-an-ll125.{year}{month:02d}0100_{year}{month:02d}{month_length:02d}18.nc') return filelist
[docs] def process_file(file_base: str) -> None: """ Read in the monthly files and take a time mean Parameters ---------- file_base: filename of file to be processed Returns ------- None """ if Path(file_base).exists(): ds = xa.open_dataset(file_base) ds = ds.mean('time') ds.encoding = {'time':'ignore'} ds.to_netcdf(file_base)
[docs] def download_file(filename: str, file_base: str, process: bool) -> None: """ Download a file. Parameters ---------- filename: str URL of the file to be downloaded file_base: str Name of the output file to which the data will be written Returns ------- None """ opener = build_opener() ofile = file_base infile = opener.open(filename) with open(ofile, "wb") as outfile: outfile.write(infile.read()) outfile.close() if process: process_file(file_base)
[docs] def get_files(filelist: List[str], web_path: str, process: bool = False, output_filelist=None) -> None: """ For each file in a file list, check if it already exists on the system and if it does not, attempt to download it. Parameters ---------- filelist: List[str] List of files to be downloaded web_path: str URL of the directory that contains the files. Returns ------- None """ for i, file in enumerate(filelist): filename = web_path + file if output_filelist is None: file_base = DATA_DIR / "ManagedData" / "Data" / "JRA-3Q" / os.path.basename(file) else: file_base = DATA_DIR / "ManagedData" / "Data" / "JRA-3Q" / os.path.basename(output_filelist[i]) if file_base.exists(): print(f"File already downloaded {file_base}") else: print(f'Downloading {filename} to {file_base}') try: download_file(filename, file_base, process) except Exception as e: print(f'Failed to download or process {filename} to {file_base}. Exception: {e}')
[docs] def fetch(_, out_dir: Path, _filename) -> None: """ Get JRA-55 files from UCAR. Requires the credentials: * username, specified by entry in .env UCAR_USER * password, specified by entry in .env UCAR_PSWD Parameters ---------- _: dummy input to match interface. out_dir: Path Path of the directory to which the output will be written. _filename: str Unused filename argument Returns ------- None """ # Real time web_path = 'https://data-osdf.rda.ucar.edu/ncar/rda/d640003/' new_web_path = "https://osdf-director.osg-htc.org/ncar/gdex/d640003/" filelist = make_realtime_file_list(2022, 2025) get_files(filelist, new_web_path, process=False) # These are already monthly, so don't process # Archive web_path = 'https://data.rda.ucar.edu/ds640.0/' new_web_path = 'https://amst-fiona.nationalresearchplatform.org:8443/ncar/gdex/d640000/' filelist = make_file_list(1948, 2021) output_filelist = make_realtime_file_list(1948, 2021) output_filelist = [i+'.nc' for i in output_filelist] get_files(filelist, new_web_path, process=True, output_filelist=output_filelist) # These are not monthly, so process