Source code for climind.fetchers.fetcher_url_with_backsearch

#  Climate indicator manager - a package for managing and building climate indicator dashboards.
#  Copyright (c) 2022 John Kennedy
#
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.

import os
from pathlib import Path
from urllib.parse import urlparse
import requests
import shutil
from datetime import datetime



[docs]
def filename_from_url(url: str) -> str:
    """
    Extract just the filename from a URL.

    Parameters
    ----------
    url: str
        URL of a file
    Returns
    -------
    str
        The filename of the file specified by the URL
    """
    parsed_url = urlparse(url)
    filename = os.path.basename(parsed_url.path)

    return filename




[docs]
def fetch(url: str, out_dir: Path, _) -> None:
    """
    Fetch file but using a backsearch. Backsearching starts with the most recent month, creates a filename using
    that month to fill the year (YYYY) and month (MMMM) placeholders in the specified URL and then tries to download
    that file. Search proceeds backwards for 24 months from today's date.

    Parameters
    ----------
    url: str
        URL of the file containing placeholders for the year (YYYY) and month (MMMM)
    out_dir: Path
        Path to which the output will be written
    Returns
    -------
    None
    """
    now = datetime.now()
    y = now.year
    m = now.month

    nsteps = 24

    for _ in range(1, nsteps + 1):

        ly = y
        lm = m - 1
        if lm == 0:
            lm = 12
            ly = y - 1

        filled_url = url.replace('YYYY', f'{y}')
        filled_url = filled_url.replace('MMMM', f'{m:02d}')

        filled_url = filled_url.replace('MLML', f'{lm:02d}')
        filled_url = filled_url.replace('YLYL', f'{ly:04d}')

        filled_url = filled_url.replace('VVVV', '')

        print(filled_url)

        filename = filename_from_url(filled_url)
        out_path = out_dir / filename

        print(out_path)

        r = requests.get(filled_url, stream=True, headers={'User-agent': 'Mozilla/5.0'})
        if r.status_code == 200:
            with open(out_path, 'wb') as f:
                r.raw.decode_content = True
                shutil.copyfileobj(r.raw, f)

        m -= 1
        if m == 0:
            y -= 1
            m = 12