Source code for ismn.custom

"""
Module that handles custom, additional information that can be assigned to the
ismn data by the user.
Sometimes it is convenient to have additional information on at a sensor,
station, or the surroundings, which is not directly provided by the ISMN,
assigned to the ISMN metadata.
This module contains a base class and implementations for certain metadata
formats that the ISMN_Interface class can then use to add additional values to
python_metadata during metadata collection.
"""

from abc import abstractmethod
from typing import Union
import numpy as np
from ismn.meta import MetaData, MetaVar, Depth
import pandas as pd


[docs]class CustomMetaReader: """ Template class for a reader to assign additional metadata to ismn sensors. The `read_metadata` function must be implemented and return the metadata to add to a sensors either as MetaData object (which allows assigning depth information to metadata) or a dictionary (which will be converted later on to MetaData without depth information assigned) Metadata readers take the existing metadata from a sensor, and based on the information there they can extract other metadata. Can return Metadata objects or dicts (which are converted in ISMN package to metadata) Objects based on `CustomMetaReaders` can be passed to :class:`ismn.interface.Ismn_Interface` """
[docs] @abstractmethod def read_metadata(self, meta) -> Union[MetaData, dict]: """ Read metadata from additional sources (that are not provided directly by the ISMN). Uses available information for an ismn sensor for selecting the correct data (usually lat / lon of a sensor). Parameters ---------- meta: MetaData Existing Metadata for a sensor, as collected from csv and .stm files. Contains for each sensor at least: Shared by all sensors at a station: longitude, latitude, elevation, network, station, lc_2010, lc_insitu, climate_KG, climate_insitu Sensor specific: instrument (with depth_from and depth_to) variable, clay_fraction, sand_fraction, organic_carbon, silt_fraction (and depths of dataset layer they were extracted from) Returns ------- ancillary_meta: MetaData or dict Metadata collected by this reader. Dict also works but will be converted to MetaData without depths assigned later on. Metadata is then assigned to the sensor """ ...
[docs]class CustomStationMetadataCsv(CustomMetaReader): """ Allows passing (static) metadata for ISMN stations as a csv file. E.g. if the station specific variables provided by the ISMN are not enough. In this case that the metadata must be stored in a csv file with the following structure: network;station;<var1>;<var1>_depth_from;<var1>_depth_to;<var2>;... - where network and station refer to existing names in the metadata. - where <var1> etc. are the names of the custom metadata variables that are transferred into the python metadata - where <var1>_depth_from and <var1>_depth_to etc are the depths that are assigned to the metadata (if columns exist) """ def __init__(self, station_meta_csv, **kwargs): """ Parameters ---------- station_meta_csv: str Path to the csv file with the above described content kwargs: Additional kwargs as passed to :func:`pandas.read_csv` To use a different separator than the default semicolon, use `sep` """ if 'sep' in kwargs: sep = kwargs.pop('sep') else: sep = ';' self.df = pd.read_csv(station_meta_csv, sep=sep, **kwargs) @staticmethod def _row2var(row: dict) -> list: """ Extract name, value, depth from row. """ vars = [] for k, v in row.items(): if k.endswith('_depth_from') or k.endswith('_depth_to'): continue if f'{k}_depth_from' in row: depth_from = row[f'{k}_depth_from'] else: depth_from = None if f'{k}_depth_to' in row: depth_to = row[f'{k}_depth_to'] else: depth_to = None if (depth_from is None) and (depth_to is None): depth = None else: if depth_from is None: depth_from = -np.inf if depth_to is None: depth_to = np.inf depth = Depth(depth_from, depth_to) vars.append(MetaVar(k, v, depth)) return vars
[docs] def read_metadata(self, meta: MetaData): """ Match passed metadata entries to the csv file to find common stations for which the csv metadata is then added. The network and station names must match between csv file and previously collected metadata. Parameters ---------- meta: MetaData Metadata to which the values from the csv file are added when the station and sensor name matches. Returns ------- meta: dict Additional depth-independent metadata at the location """ cond = (self.df['network'] == meta['network'].val) & \ (self.df['station'] == meta['station'].val) if not np.any(cond): return df = self.df[cond].set_index(['network', 'station']) # drop potential duplicates, keep first df = df[~df.index.duplicated(keep='first')] vars = [] for row in df.to_dict('records'): vars += self._row2var(row) return MetaData(vars)
[docs]class CustomSensorMetadataCsv(CustomStationMetadataCsv): """ Allows passing metadata for ISMN sensors as a csv file. E.g. if the sensor specific variables provided by the ISMN are not enough. In this case that the metadata must be stored in a csv file with the following structure: network;station;instrument;depth_from;depth_to;<var1>;<var1>_depth_from;<var1>_depth_to;<var2> ... where <var1> etc. are the names of the custom metadata variables that are transferred into the python metadata where <var1>_depth_from etc. are the """
[docs] def read_metadata(self, meta: MetaData): """ Match passed metadata entries to the csv file to find common sensors for which the csv metadata is then added. Parameters ---------- meta: MetaData Metadata that the csv values are added to for sensors where the network, station, instrument, and instrument depths match. Returns ------- meta: Metadata Additional depth-dependent metadata at the location """ cond = (self.df['network'] == meta['network'].val) & \ (self.df['station'] == meta['station'].val) & \ (self.df['instrument'] == meta['instrument'].val) & \ (self.df['depth_from'] == meta['instrument'].depth[0]) & \ (self.df['depth_to'] == meta['instrument'].depth[1]) df = self.df[cond].set_index( ['network', 'station', 'instrument', 'depth_from', 'depth_to']) # drop potential duplicates, keep first df = df[~df.index.duplicated(keep='first')] vars = [] for row in df.to_dict('records'): vars += self._row2var(row) return MetaData(vars)