Source code for climind.fetchers.fetcher_gpcc_quantile

#  Climate indicator manager - a package for managing and building climate indicator dashboards.
#  Copyright (c) 2023 John Kennedy
#
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.
import itertools
import re
from datetime import datetime
from pathlib import Path
import requests
import shutil
from bs4 import BeautifulSoup, SoupStrainer

from climind.fetchers.fetcher_utils import dir_and_filename_from_url, url_from_filename, get_n_months_back, \
    fill_year_month


[docs] def get_time_span(filled_url): for back in [1, 3, 6, 9, 12, 99]: if f'{back}month' in filled_url: break if back == 99: raise ValueError("Filled URL does not match one of the allowed time spans") return back
[docs] def get_file(filled_url, out_path): try: r = requests.get(filled_url, stream=True, headers={'User-agent': 'Mozilla/5.0'}) if r.status_code == 200: with open(out_path, 'wb') as f: r.raw.decode_content = True shutil.copyfileobj(r.raw, f) except requests.exceptions.ConnectionError: print(f"Couldn't connect to {filled_url}")
[docs] def fetch(url: str, outdir: Path, _) -> None: """ Fetch GPCC quantile data. The script scrapes the directory specified in the URL for a file that matches the pattern specified in the URL. Parameters ---------- url: str URL of the file to be downloaded, containing wildcards for information that needs to be matched on a case by case basis. outdir: Path Path of the directory to which the output will be written Returns ------- None """ # substitute YYYY and MMMM now = datetime.now() this_year = now.year for y, m in itertools.product(range(1982, this_year + 1), range(1, 13)): # Construct the filename for the year and month which covers a 12 month period filled_url = fill_year_month(url, y, m) back = get_time_span(filled_url) y2, m2 = get_n_months_back(y, m, back=back) filled_url = filled_url.replace('*', f'{y2}{m2:02d}') _, filename = dir_and_filename_from_url(filled_url) out_path = outdir / filename # Need to scoop up the past two years to make sure we get any updates from first guess to monitoring if not (out_path.exists()) or (y >= this_year - 1): get_file(filled_url, out_path)