Skip to content
dataset.py 41.1 KiB
Newer Older
Christoph.Knote's avatar
sdf
Christoph.Knote committed
import datetime
import sys
Christoph Knote's avatar
Christoph Knote committed
import collections
Christoph Knote's avatar
Christoph Knote committed
import re
Christoph Knote's avatar
Christoph Knote committed
import warnings
import numpy as np
Christoph.Knote's avatar
sdf
Christoph.Knote committed

Florian Obersteiner's avatar
Florian Obersteiner committed
DEFAULT_NUM_FORMAT = "%g"
"""Default number format for output. Provides the `fmt` parameter of :func:`numpy.savetxt` internally."""

DEFAULT_FIELD_DELIM = ","
"""Default field delimiter"""

DEFAULT_SCALE_FACTOR = 1.0
"""Default variable scale factor"""

DEFAULT_MISSING_VALUE = -9999.0
"""Default variable missing value"""
class Formats(IntEnum):
    """ICARTT File Format Indices (FFI)"""
    FFI1001 = 1001
    FFI2110 = 2110
class VariableType(IntEnum):
    IndependentVariable = 1
    IndependentBoundedVariable = 2
    AuxiliaryVariable = 3
    DependentVariable = 4

class DataStore1001:
Christoph Knote's avatar
Christoph Knote committed
    """Data model for FFI1001"""

    def __init__(self, ivar, dvars):
        self.ivarname = ivar.shortname
        self.varnames = [ivar.shortname] + [x for x in dvars]
        self.missingValues = {x: dvars[x].miss for x in dvars}
        self.missingValues.update({self.ivarname: ivar.miss})

        self.default_dtype = np.float64

        self.dtypes = [(name, self.default_dtype) for name in self.varnames]
        self.data = None

    def __getitem__(self, s=slice(None)):
Florian Obersteiner's avatar
Florian Obersteiner committed
        # we can only slice if we have something, so
        if self.data is not None:
            return self.data[s]
        # returns None implicitly if self.data is None
    def addFromTxt(self, f, delimiter, max_rows=None):
        # genfromtxt would warn if file is empty. We do not want that.
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            newData = np.genfromtxt(
                f,
                names=self.varnames,
                dtype=self.dtypes,
                missing_values=self.missingValues,
                usemask=True,
                delimiter=delimiter,
                max_rows=max_rows,
                deletechars="",
            ).filled(fill_value=np.nan)
        self.add(newData)

    def add(self, newData):
        """(bulk) add data, providing a (structured) numpy array.

        Array has to have shape [ (ivar, dvar, dvar, ...), ... ],
        missing values have to be set to :obj:`numpy.nan`.

        :param newData: data to be added
        :type newData: numpy.ndarray
        """
        if not isinstance(newData, np.ndarray):
            raise TypeError("Input data needs to be numpy ndarray.")
        if newData.dtype.names is None:
            try:
                newData.dtype = [(name, newData.dtype) for name in self.varnames]
            except:
                    "Could not assign names to data structure, are you providing an array containing all variables?"
                )
        if self.data is None:
            self.data = np.append(self.data, newData)
    def denanify(self, d):
        dd = d.copy()
        for k, miss in self.missingValues.items():
            dd[k][np.isnan(dd[k])] = miss
    def write(
        self, f=sys.stdout, fmt=DEFAULT_NUM_FORMAT, delimiter=DEFAULT_FIELD_DELIM
    ):
        # TODO the fact that we need to clean before writing suggests we need to be more careful what to "add" in the first place!
        d = self.denanify(self.data)
        # single line data is 0D if passed as tuple, savetxt cannot work with 0D. Make 1D.
            d = np.array([d])
        # need to squeeze extra dimension added for one liners added as np.array
        if len(d.shape) == 2:
            d = np.squeeze(d, axis=1)

        np.savetxt(f, d, fmt=fmt, delimiter=delimiter)

class DataStore2110(collections.UserDict):
Christoph Knote's avatar
Christoph Knote committed
    """Data model for FFI2110"""
Christoph Knote's avatar
Christoph Knote committed

    def __init__(self, ivar, ibvar, auxvars, dvars):
        self.ivarname = ivar.shortname
        self.ibvarname = ibvar.shortname
        self.auxvarnames = [x for x in auxvars]
        self.dvarnames = [x for x in dvars]
        self.missingValues = {x: dvars[x].miss for x in dvars}
        self.missingValues.update({x: auxvars[x].miss for x in auxvars})
        self.missingValues.update({self.ibvarname: ibvar.miss})
        self.missingValues.update({self.ivarname: ivar.miss})
        self.nauxvarname = self.auxvarnames[0]  # convention!
        self.data = {}
        self.ivar = ivar
        self.auxvars = auxvars
        self.ibvar = ibvar
        self.dvars = dvars
    def __getitem__(self, s=slice(None)):
Florian Obersteiner's avatar
Florian Obersteiner committed
        # we can only slice if we have something, so
        if self.data is not None:
            return self.data[s]
        # returns None implicitly if self.data is None
    def addFromTxt(self, f, delimiter):
        while f:
            auxds = DataStore1001(self.ivar, self.auxvars)
            depds = DataStore1001(self.ibvar, self.dvars)
            try:
                auxds.addFromTxt(f, delimiter, max_rows=1)
            except:
                # we are at the end of the file if this happens
                break
            ndeprows = int(auxds[self.nauxvarname])
            # it is indeed possible to have zero dependent data lines
            if ndeprows > 0:
                try:
                    depds.addFromTxt(f, delimiter, max_rows=ndeprows)
                except:
                    raise IOError("Could not read dependent data lines.")
            ivarValue = float(auxds[self.ivar.shortname])
            self.data[ivarValue] = {"AUX": auxds, "DEP": depds}
Christoph Knote's avatar
Christoph Knote committed
        """(bulk) add data, providing (structured) numpy arrays for both the auxiliary and dependent data line(s)
        for a given ivar value.

        Arrays have to have shape [ (ivar, auxvar, auxvar, ...) ] and
        [ (ibvar, depvar, depvar, ...), ... ] for auxiliary and dependent data line(s), respectively.
        missing values have to be set to :obj:`numpy.nan`.
Christoph Knote's avatar
Christoph Knote committed

        :param newAuxData: auxiliary data line to be added
        :type newAuxData: numpy.ndarray

        :param newDepData: auxiliary data line(s) to be added
        :type newDepData: numpy.ndarray
        """
        auxds = DataStore1001(self.ivar, self.auxvars)
        depds = DataStore1001(self.ibvar, self.dvars)
        ivarValue = float(auxds[self.ivar.shortname])
        self.data[ivarValue] = {"AUX": auxds, "DEP": depds}
Loading
Loading full blame...