Source code for climind.fetchers.fetcher_utils

#  Climate indicator manager - a package for managing and building climate indicator dashboards.
#  Copyright (c) 2022 John Kennedy
#
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""
Contains a set of routines used by the fetchers to perform various standard tasks such as extracting the filename from
a URL.
"""
import os
from urllib.parse import urlparse
from datetime import datetime
from typing import Tuple, List


[docs] def filename_from_url(url: str) -> str: """ Given an url, return the filename or an empty string if there is no filename Parameters ---------- url: str URL to be parsed Returns ------- str Return the filename part of the URL """ parsed_url = urlparse(url) filename = os.path.basename(parsed_url.path) return filename
[docs] def dir_and_filename_from_url(url: str) -> Tuple[str, str]: """ Get the filename and url up to, but not including the filename Parameters ---------- url : str Returns ------- str, str Return directory name and filename """ parsed_url = urlparse(url) filename = os.path.basename(parsed_url.path) dirname = parsed_url._replace(path=os.path.dirname(parsed_url.path)).geturl() return dirname, filename
[docs] def url_from_filename(url: str, filename: str) -> str: """ Given an url and filename, replace the filename in the URL with the input filename Parameters ---------- url : str URL specifying a file, for which the filename will be changed. filename : str New filename to be use in output URL Returns ------- str Returns the URL with the new filename """ parsed_url = urlparse(url) dirname = os.path.dirname(parsed_url.path) path = f'{dirname}/{filename}' outurl = parsed_url._replace(path=path).geturl() return outurl
[docs] def get_ftp_host_and_directory_from_url(url: str) -> Tuple[str, List[str]]: """ From a url, extract the host name and the directory, the directory being broken down into a list of subdirectories Parameters ---------- url: str URL to extract information from Returns ------- str The host name list A list of directories """ parsed_url = urlparse(url) working_directory = parsed_url.path working_directory = working_directory.split('/') working_directory = working_directory[1:-1] host = parsed_url.hostname return host, working_directory
[docs] def get_n_months_back(y: int, m: int, back: int = 12) -> Tuple[int, int]: """ Get the year and month that, including the specified month, makes n months Parameters ---------- y: int Year m: int Month back: int Number of months to include Returns ------- Tuple(int, int) """ if back > 12 or back < 1: raise ValueError(f'Back {back} is larger than 12 or less than 1') if m - back + 1 >= 1: n_months_back = y else: n_months_back = y - 1 n_months_back_month = (m + 1 - back) % 12 if n_months_back_month == 0: n_months_back_month = 12 return n_months_back, n_months_back_month
[docs] def fill_year_month(instr, y, m): filled_url = instr.replace('YYYY', f'{y}') filled_url = filled_url.replace('MMMM', f'{m:02d}') return filled_url
[docs] def time_tag_string(instr): now = datetime.today() outstr = f'{now.year}{now.month:02d}{now.day:02d}.{instr}' return outstr