Source code for ismn.custom
"""
Module that handles custom, additional information that can be assigned to the
ismn data by the user.
Sometimes it is convenient to have additional information on at a sensor,
station, or the surroundings, which is not directly provided by the ISMN,
assigned to the ISMN metadata.
This module contains a base class and implementations for certain metadata
formats that the ISMN_Interface class can then use to add additional values to
python_metadata during metadata collection.
"""
from abc import abstractmethod
from typing import Union
import numpy as np
from ismn.meta import MetaData, MetaVar, Depth
import pandas as pd
[docs]class CustomMetaReader:
"""
Template class for a reader to assign additional metadata to ismn sensors.
The `read_metadata` function must be implemented and return the metadata
to add to a sensors either as MetaData object (which allows assigning
depth information to metadata) or a dictionary (which will be converted
later on to MetaData without depth information assigned)
Metadata readers take the existing metadata from a sensor, and based on the
information there they can extract other metadata.
Can return Metadata objects or dicts (which are converted in ISMN package
to metadata)
Objects based on `CustomMetaReaders` can be passed to
:class:`ismn.interface.Ismn_Interface`
"""
[docs] @abstractmethod
def read_metadata(self, meta) -> Union[MetaData, dict]:
"""
Read metadata from additional sources (that are not provided directly
by the ISMN). Uses available information for an ismn sensor for
selecting the correct data (usually lat / lon of a sensor).
Parameters
----------
meta: MetaData
Existing Metadata for a sensor, as collected from csv and .stm
files. Contains for each sensor at least:
Shared by all sensors at a station:
longitude, latitude, elevation, network, station,
lc_2010, lc_insitu, climate_KG, climate_insitu
Sensor specific:
instrument (with depth_from and depth_to)
variable, clay_fraction, sand_fraction, organic_carbon,
silt_fraction (and depths of dataset layer they were
extracted from)
Returns
-------
ancillary_meta: MetaData or dict
Metadata collected by this reader. Dict also works but will be
converted to MetaData without depths assigned later on.
Metadata is then assigned to the sensor
"""
...
[docs]class CustomStationMetadataCsv(CustomMetaReader):
"""
Allows passing (static) metadata for ISMN stations as a csv file.
E.g. if the station specific variables provided by the ISMN are not enough.
In this case that the metadata must be stored in a csv file with the
following structure:
network;station;<var1>;<var1>_depth_from;<var1>_depth_to;<var2>;...
- where network and station refer to existing names in the metadata.
- where <var1> etc. are the names of the custom metadata variables that are
transferred into the python metadata
- where <var1>_depth_from and <var1>_depth_to etc are the depths that
are assigned to the metadata (if columns exist)
"""
def __init__(self, station_meta_csv, **kwargs):
"""
Parameters
----------
station_meta_csv: str
Path to the csv file with the above described content
kwargs:
Additional kwargs as passed to :func:`pandas.read_csv`
To use a different separator than the default semicolon, use `sep`
"""
if 'sep' in kwargs:
sep = kwargs.pop('sep')
else:
sep = ';'
self.df = pd.read_csv(station_meta_csv, sep=sep, **kwargs)
@staticmethod
def _row2var(row: dict) -> list:
"""
Extract name, value, depth from row.
"""
vars = []
for k, v in row.items():
if k.endswith('_depth_from') or k.endswith('_depth_to'):
continue
if f'{k}_depth_from' in row:
depth_from = row[f'{k}_depth_from']
else:
depth_from = None
if f'{k}_depth_to' in row:
depth_to = row[f'{k}_depth_to']
else:
depth_to = None
if (depth_from is None) and (depth_to is None):
depth = None
else:
if depth_from is None:
depth_from = -np.inf
if depth_to is None:
depth_to = np.inf
depth = Depth(depth_from, depth_to)
vars.append(MetaVar(k, v, depth))
return vars
[docs] def read_metadata(self, meta: MetaData):
"""
Match passed metadata entries to the csv file to find common stations
for which the csv metadata is then added. The network and station
names must match between csv file and previously collected metadata.
Parameters
----------
meta: MetaData
Metadata to which the values from the csv file are added when
the station and sensor name matches.
Returns
-------
meta: dict
Additional depth-independent metadata at the location
"""
cond = (self.df['network'] == meta['network'].val) & \
(self.df['station'] == meta['station'].val)
if not np.any(cond):
return
df = self.df[cond].set_index(['network', 'station'])
# drop potential duplicates, keep first
df = df[~df.index.duplicated(keep='first')]
vars = []
for row in df.to_dict('records'):
vars += self._row2var(row)
return MetaData(vars)
[docs]class CustomSensorMetadataCsv(CustomStationMetadataCsv):
"""
Allows passing metadata for ISMN sensors as a csv file. E.g. if the
sensor specific variables provided by the ISMN are not enough.
In this case that the metadata must be stored in a csv file with the
following structure:
network;station;instrument;depth_from;depth_to;<var1>;<var1>_depth_from;<var1>_depth_to;<var2> ...
where <var1> etc. are the names of the custom metadata variables that are
transferred into the python metadata
where <var1>_depth_from etc. are the
"""
[docs] def read_metadata(self, meta: MetaData):
"""
Match passed metadata entries to the csv file to find common sensors
for which the csv metadata is then added.
Parameters
----------
meta: MetaData
Metadata that the csv values are added to for sensors where
the network, station, instrument, and instrument depths match.
Returns
-------
meta: Metadata
Additional depth-dependent metadata at the location
"""
cond = (self.df['network'] == meta['network'].val) & \
(self.df['station'] == meta['station'].val) & \
(self.df['instrument'] == meta['instrument'].val) & \
(self.df['depth_from'] == meta['instrument'].depth[0]) & \
(self.df['depth_to'] == meta['instrument'].depth[1])
df = self.df[cond].set_index(
['network', 'station', 'instrument', 'depth_from', 'depth_to'])
# drop potential duplicates, keep first
df = df[~df.index.duplicated(keep='first')]
vars = []
for row in df.to_dict('records'):
vars += self._row2var(row)
return MetaData(vars)