Source code for climind.web.dashboard

#  Climate indicator manager - a package for managing and building climate indicator dashboards.
#  Copyright (c) 2022 John Kennedy
#
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.

import json
import hashlib
import pkg_resources
from datetime import datetime
from typing import Union, List
from pathlib import Path
from zipfile import ZipFile
from jinja2 import Environment, FileSystemLoader, select_autoescape

from climind.data_types.timeseries import TimeSeriesMonthly, TimeSeriesAnnual, TimeSeriesIrregular, \
    write_dataset_summary_file_with_metadata
import climind.plotters.plot_types as pt
import climind.stats.paragraphs as pa
from climind.data_manager.processing import DataArchive
from climind.definitions import ROOT_DIR
from climind.config.config import DATA_DIR

DATA_DIR = DATA_DIR / "ManagedData" / "Data"



[docs]
def process_single_dataset(ds: Union[TimeSeriesAnnual, TimeSeriesMonthly, TimeSeriesIrregular],
                           processing_steps: List[dict]) -> Union[TimeSeriesAnnual, TimeSeriesMonthly, TimeSeriesIrregular]:
    """
    Process the input data set using the methods and arguments provided in a list of processing steps.
    Each processing step is a dictionary containing a 'method' and an 'arguments' entry.

    Parameters
    ----------
    ds: Union[TimeSeriesAnnual, TimeSeriesMonthly, TimeSeriesIrregular]
        Data set to be processed
    processing_steps: List
        List of steps. Each step must be a dictionary containing a 'method' entry that corresponds to
        the name of a method in the timeseries class and a 'arguments' entry which contains a list
        of arguments for that method

    Returns
    -------
    Union[TimeSeriesAnnual, TimeSeriesMonthly, TimeSeriesIrregular]
    """
    for step in processing_steps:
        method = step['method']
        arguments = step['args']

        # Apply the method for the object and give it the unrolled arguments
        output = getattr(ds, method)(*arguments)

        if output is not None:
            ds = output

    return ds




[docs]
class WebComponent:

    def __init__(self, component_metadata: dict):
        self.metadata = component_metadata
        self.datasets = []

    def __getitem__(self, key):
        return self.metadata[key]

    def __setitem__(self, key, value):
        self.metadata[key] = value


[docs]
    def select_and_read_data(self, data_dir: Path, archive: DataArchive):
        """
        Using the specified DataArchive, select the appropriate subset of data as specified in the Card
        metadata and read in the data sets from the data_dir directory.

        Parameters
        ----------
        data_dir: Path
            Path of the directory in which the data are to be found
        archive: DataArchive
            Archive of data used to select and populate the data sets

        Returns
        -------
        None
        """
        selection_metadata = self['selecting']
        selected = archive.select(selection_metadata)
        self.datasets = selected.read_datasets(data_dir)



[docs]
    def process_datasets(self):
        """
        Apply the processing steps specified in the 'processing' section of the metadata file to
        all the data sets.

        Returns
        -------
        None

        Raises
        ------
        RuntimeError
            If any of the processing steps fails to run
        """
        processed_datasets = []
        for ds in self.datasets:
            try:
                ds = process_single_dataset(ds, self['processing'])
            except Exception as e:
                raise RuntimeError(f"Failed to process {ds.metadata['name']} with error {e}")
            else:
                processed_datasets.append(ds)

        self.datasets = processed_datasets





[docs]
class Paragraph(WebComponent):

    def __init__(self, paragraph_metadata: dict):
        super().__init__(paragraph_metadata)


[docs]
    def process_paragraph(self, data_dir: Path, archive: DataArchive, focus_year: int = 2021) -> None:
        """
        Process and ultimately render the Paragraph object

        Parameters
        ----------
        data_dir: Path
            Path of the directory containing the data
        archive: DataArchive
            DataArchive object with the metadata describing all the datasets
        focus_year: int
            Year which the paragraph will focus on, usually the most recent year, though it needn't be.

        Returns
        -------
        None
        """
        self.select_and_read_data(data_dir, archive)
        self.process_datasets()
        self.render(focus_year)



[docs]
    def process_datasets(self):
        """
        Run the processing specified in the paragraph metadata on each of the data sets.

        Returns
        -------
        None
        """
        super().process_datasets()
        self['dataset_metadata'] = self.datasets[0].metadata



[docs]
    def render(self, year: int = 2021):
        """
        Render out the text of the paragraph specified in the paragraph metadata.

        Parameters
        ----------
        year: int
            Year which is the focus of the paragraph.

        Returns
        -------
        None
        """
        # Plot the output and add figure name to card
        paragraph_function = self['writing']['function']

        if 'kwargs' in self['writing']:
            kwargs = self['writing']['kwargs']
            paragraph_text = getattr(pa, paragraph_function)(self.datasets, year, **kwargs)
        else:
            paragraph_text = getattr(pa, paragraph_function)(self.datasets, year)

        self['text'] = paragraph_text
        now = datetime.now()
        self['updated'] = f'{now}'[0:16]  # Don't need precise time





[docs]
class Card(WebComponent):

    def __init__(self, card_metadata: dict):
        """
        A card is a single panel in a dashboard web page. The Card class manages the metadata
        associated with the card, and generates the files that are associated with it. These
        include the figure (in multiple formats) and the data files (in a zip archive)

        Parameters
        ----------
        card_metadata: dict
            dictionary containing the metadata for the card
        """
        if 'format' not in card_metadata:
            card_metadata['format'] = 'svg'
        super().__init__(card_metadata)


[docs]
    def process_card(self, data_dir: Path, figure_dir: Path, formatted_data_dir: Path, archive: DataArchive):
        """
        Process the datasets, plot them and write out the data based on the metadata in the Card

        Parameters
        ----------
        data_dir: Path
            Path of the directory in which the data are found.
        figure_dir: Path
            Path of the directory to which the figures will be written.
        formatted_data_dir: Path
            Path of the directory to which the formatted data will be written.
        archive: DataArchive
            DataArchive object containing the descriptive metadata.

        Returns
        -------
        None
        """
        self.select_and_read_data(data_dir, archive)
        self.process_datasets()
        self.plot(figure_dir)
        self.make_zip_file(formatted_data_dir)



[docs]
    def process_datasets(self):
        """
        Run the processing specified in the card metadata on each of the data sets.

        Returns
        -------
        None
        """
        super().process_datasets()
        pro_metadata = []
        for ds in self.datasets:
            pro_metadata.append(
                {
                    'name': ds.metadata['name'],
                    'display_name': ds.metadata['display_name'],
                    'url': ds.metadata['url'],
                    'citation': ds.metadata['citation'],
                    'citation_url': ds.metadata['citation_url'],
                    'data_citation': ds.metadata['data_citation'],
                    'acknowledgement': ds.metadata['acknowledgement'],
                    'notes': ds.metadata['notes'],
                    'history': ds.metadata['history']
                }
            )

        self['dataset_metadata'] = pro_metadata



[docs]
    def plot(self, figure_dir):
        """
        Plot the figure specified in the card metadata, output to the figure_dir directory

        Parameters
        ----------
        figure_dir: Path
            Path of the directory to which the figure should be written

        Returns
        -------
        None
        """
        # Plot the output and add figure name to card
        figure_name = f"{self['title']}.png".replace(" ", "_")
        plot_function = self['plotting']['function']
        plot_title = self['plotting']['title']

        if 'kwargs' in self['plotting']:
            kwargs = self['plotting']['kwargs']
            caption = getattr(pt, plot_function)(figure_dir, self.datasets, figure_name, plot_title, **kwargs)
        else:
            caption = getattr(pt, plot_function)(figure_dir, self.datasets, figure_name, plot_title)

        self['figure_name'] = figure_name
        self['caption'] = caption



[docs]
    def make_csv_files(self, formatted_data_dir: Path) -> List[Path]:
        """
        Make a csv file in the standard format for each data set in the Card and return a list of all their names
        of the csv files.

        Parameters
        ----------
        formatted_data_dir: Path
            Path of the directory to which the csv files will be written

        Returns
        -------
        List[Path]
            List containing a Path for each csv file written
        """
        csv_paths = []
        for ds in self.datasets:
            if (
                    isinstance(ds, TimeSeriesMonthly) or
                    isinstance(ds, TimeSeriesAnnual) or
                    isinstance(ds, TimeSeriesIrregular)
            ):
                csv_filename = f"{ds.metadata['variable']}_{ds.metadata['name']}.csv".replace(" ", "_")
                csv_path = formatted_data_dir / csv_filename
                ds.write_csv(csv_path)
                csv_paths.append(csv_path)

        if len(self.datasets) > 1 and isinstance(ds, (TimeSeriesAnnual, TimeSeriesMonthly)):
            csv_filename = f"{ds.metadata['variable']}_summary.csv".replace(" ", "_")
            csv_path = formatted_data_dir / csv_filename
            write_dataset_summary_file_with_metadata(self.datasets, csv_path)
            csv_paths.append(csv_path)

        return csv_paths



[docs]
    def make_zip_file(self, formatted_data_dir: Path):
        """
        Create a formatted data file for each data set and zip these into a zip file. Adds a metadata element
        'csv_name' with the names of the zip file once it is created.

        Parameters
        ----------
        formatted_data_dir: Path
            Path of the directory to which the zip file and data files should be written

        Returns
        -------
        None
        """
        csv_paths = self.make_csv_files(formatted_data_dir)

        zipfile_name = f"{self['title']}_data_files.zip".replace(" ", "_")
        with ZipFile(formatted_data_dir / zipfile_name, 'w') as zip_archive:
            for csv_path in csv_paths:
                csv_filename = csv_path.name
                zip_archive.write(csv_path, arcname=csv_filename)
                csv_path.unlink()

        with open(formatted_data_dir / zipfile_name, "rb") as f:
            gobbled_bytes = f.read()  # read file as bytes
            checksum = hashlib.md5(gobbled_bytes).hexdigest()

        self['csv_checksum'] = checksum
        self['csv_name'] = zipfile_name





[docs]
class Page:
    def __init__(self, metadata: dict):
        """
        A Page from a dashboard. A page contains multiple Cards, which are used to render a
        jinja2 template.

        Parameters
        ----------
        metadata: dict
            Dictionary containing the page metadata
        """
        self.metadata = metadata

    def __getitem__(self, key):
        return self.metadata[key]

    def __setitem__(self, key, value):
        self.metadata[key] = value

    def _process_cards(self, data_dir: Path, figure_dir: Path,
                       formatted_data_dir: Path, archive: DataArchive) -> List[Card]:
        """
        Process each of the cards on the page

        Parameters
        ----------
        data_dir: Path
            Path of the directory containing the data
        figure_dir: Path
            Path of directory to which figures will be written
        formatted_data_dir: Path
            Path of directory to which formatted data will be written
        archive: DataArchive
            Archive which contains all the metadata for this selection

        Returns
        -------
        List[Card]
            List of the processed Cards
        """
        processed_cards = []
        for card_metadata in self['cards']:
            this_card = Card(card_metadata)
            try:
                this_card.process_card(data_dir, figure_dir, formatted_data_dir, archive)
            except Exception as e:
                print(f"Card processing failed {this_card['title']} with error {e}")
            else:
                if 'hidden' not in card_metadata:
                    processed_cards.append(this_card)

        return processed_cards

    def _process_paragraphs(self, data_dir: Path, archive: DataArchive, focus_year: int = 2021) -> List[Paragraph]:
        """
        Process each of the paragraphs on the page

        Parameters
        ----------
        data_dir: Path
            Path of the directory containing the data
        archive: DataArchive
            Archive which contains all the metadata for this selection
        focus_year: int
            Year to focus on

        Returns
        -------
        List[Paragraph]
            List of the processed Cards
        """
        processed_paragraphs = []
        for paragraph_metadata in self['paragraphs']:
            this_paragraph = Paragraph(paragraph_metadata)
            try:
                this_paragraph.process_paragraph(data_dir, archive, focus_year=focus_year)
            except Exception as e:
                print(f"Paragraph processing failed with error {e}.")
            else:
                processed_paragraphs.append(this_paragraph)

        return processed_paragraphs


[docs]
    def build(self, build_dir: Path, data_dir: Path, archive: DataArchive,
              focus_year: int = 2021, menu_items: List[List[str]] = []):
        """
        Build the Page, processing all the Card and Paragraph objects, then populating the template
        to generate a webpage, figures and formatted data.

        Parameters
        ----------
        build_dir: Path
            Path of the directory to which the html, figures and data will be written
        data_dir: Path
            Path of the directory where the data are to be found.
        archive: DataArchive
            DataArchive containing the metadata for the datasets
        focus_year: int
            Year to focus on. Usually, this will be the latest year
        menu_items: List[List[str]]
            List of items to display in the menu. Each item is a two element list, with the name of the webpage as
            the first element (which gets a .html extension) and the title of the page as the second elements. The
            title is used to generate the menu items so should be human readable.

        Returns
        -------
        None
        """
        figure_dir = build_dir / 'figures'
        figure_dir.mkdir(exist_ok=True)

        formatted_data_dir = build_dir / 'formatted_data'
        formatted_data_dir.mkdir(exist_ok=True)

        print(f"Building {self.metadata['id']} using template {self.metadata['template']}")

        processed_cards = self._process_cards(data_dir, figure_dir, formatted_data_dir, archive)
        processed_paragraphs = self._process_paragraphs(data_dir, archive, focus_year=focus_year)

        now = datetime.today()
        climind_version = pkg_resources.get_distribution("climind").version

        self['created'] = f'{now.year}-{now.month:02d}-{now.day:02d}'
        self['code_version'] = f'climind v{climind_version}'

        # populate template to make webpage
        env = Environment(
            loader=FileSystemLoader(ROOT_DIR / "climind" / "web" / "jinja_templates"),
            autoescape=select_autoescape()
        )
        template = env.get_template(f"{self['template']}.html.jinja")
        with open(build_dir / f"{self['id']}.html", 'w') as out_file:
            out_file.write(template.render(cards=processed_cards,
                                           paragraphs=processed_paragraphs,
                                           page_meta=self,
                                           menu_items=menu_items))





[docs]
class Dashboard:

    def __init__(self, metadata: dict, archive: DataArchive):
        """
        Create a dashboard from a set of dashboard metadata

        Parameters
        ----------
        metadata: dict
            Dictionary containing the dashboard metadata
        archive: DataArchive
            Metadata archive that will be used to populate the dashboard
        """
        self.metadata = metadata
        self.archive = archive
        self.data_dir = DATA_DIR

        self.pages = []

        for page_metadata in self.metadata['pages']:
            self.pages.append(Page(page_metadata))


[docs]
    @staticmethod
    def from_json(json_file: Path, archive_dir: Union[Path, List[Path]]):
        """
        Create a Dashboard from a json file and directory containing dataset metadata

        Parameters
        ----------
        json_file: Path
            Path to the json file
        archive_dir: Path
            Path of the directory which contains the dataset metadata

        Returns
        -------
        Dashboard
        """
        with open(json_file) as f:
            metadata = json.load(f)
        archive = DataArchive.from_directory(archive_dir)
        return Dashboard(metadata, archive)



[docs]
    def build(self, build_dir: Path, focus_year: int = 2021):
        """
        Build all the pages in the dashboard. This will create the html, the images,
        the formatted data in a chosen directory

        Parameters
        ----------
        build_dir: Path
            Path of the directory to build the web pages in
        focus_year: int
            Year to focus on. Usually, this will be the latest year
        Returns
        -------
        None
        """
        page_ids = []
        for page in self.pages:
            page_ids.append([page['id'], page['name']])

        for page in self.pages:
            page.build(build_dir, self.data_dir, self.archive,
                       focus_year=focus_year,
                       menu_items=page_ids)