Cleanup and extend docs, describe output format

4472d96b · Christoph Knote · 3a96ab2b · 4472d96b · 4472d96b · 4472d96b
Commit 4472d96b authored 3 years ago by Christoph Knote
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -30,7 +30,7 @@ release = "2.0"
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = ["sphinx.ext.autodoc", "enum_tools.autoenum"]
+extensions = ["sphinx.ext.autodoc", "enum_tools.autoenum", "sphinx.ext.intersphinx"]

 # Add any paths that contain templates here, relative to this directory.
 templates_path = ["_templates"]
@@ -40,6 +40,12 @@ templates_path = ["_templates"]
 # This pattern also affects html_static_path and html_extra_path.
 exclude_patterns = []

+# use class header doc as well as __init__ for parameters
+# (https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html#confval-autoclass_content)
+autoclass_content = "both"
+
+# do not resolve constants in function arguments
+autodoc_preserve_defaults = True

 # -- Options for HTML output -------------------------------------------------

@@ -52,3 +58,10 @@ html_theme = "classic"
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = []
+
+# -- intersphinx --
+
+intersphinx_mapping = {
+    "python": ("https://docs.python.org/3", None),
+    "numpy": ("https://numpy.org/doc/stable/", None),
+}
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -7,7 +7,7 @@
 icartt
 ******

-icartt is an ICARTT file format reader and writer for Python
+`icartt` is an ICARTT file format reader and writer for Python

 The ICARTT data format is described here: https://www-air.larc.nasa.gov/missions/etc/IcarttDataFormat.htm

@@ -15,8 +15,8 @@ The ICARTT data format is described here: https://www-air.larc.nasa.gov/missions
   :maxdepth: 2
   :caption: Contents:

-Example
-#######
+Examples
+########

 .. include:: usage.rst

@@ -25,11 +25,18 @@ API

 .. module:: icartt

-Variable
-********
+Constants
+**********

-.. autoclass:: Variable
-   :members:
+Reported here for completeness, do not need to be changed by the user. Can be defined by the user upon :class:`Dataset` / :class:`Variable` creation and when writing to output.
+
+.. autodata:: icartt.dataset.DEFAULT_NUM_FORMAT
+
+.. autodata:: icartt.dataset.DEFAULT_FIELD_DELIM
+
+.. autodata:: icartt.dataset.DEFAULT_SCALE_FACTOR
+
+.. autodata:: icartt.dataset.DEFAULT_MISSING_VALUE

 Dataset
 ********
@@ -40,19 +47,25 @@ Dataset
 DataStore
 *********

-The .data attribute of an ``icartt.Dataset`` is a DataStore, which can be accessed to add data as follows:
+The `.data` attribute of a Dataset is a DataStore, which can be accessed to add data as follows:

 .. autoclass:: DataStore1001
-   :members:
+   :members: add

 .. autoclass:: DataStore2110
-   :members:
+   :members: add

 Formats
 ********

 .. autoenum:: Formats

+Variable
+********
+
+.. autoclass:: Variable
+   :members:
+
 Variable types
 ***************


--- a/docs/usage.rst
+++ b/docs/usage.rst
 Reading an existing dataset
-############################
+***************************

 Simple format (FFI 1001)
-*************************
+========================

 .. literalinclude:: ../tests/usage_examples/read_ffi1001.py

 More complex (FFI 2110)
-*************************
+========================

-Identical to FFI1001, only the data structure is more complex:
+Identical to FFI1001, only the data structure is more complex.

 .. literalinclude:: ../tests/usage_examples/read_ffi2110.py

 Creating a new dataset
-############################
+**********************

 Simple format (FFI 1001)
-*************************
+========================

 .. literalinclude:: ../tests/usage_examples/create_ffi1001.py

 More complex (FFI 2110)
-*************************
+========================

-Again, like for FFI 1001 but more complex data structure
+Again, like for FFI 1001, but using a more complex data structure.

 .. literalinclude:: ../tests/usage_examples/create_ffi2110.py
--- a/src/icartt/dataset.py
+++ b/src/icartt/dataset.py
@@ -8,13 +8,21 @@ from enum import IntEnum

 import numpy as np

-
 DEFAULT_NUM_FORMAT = "%g"
-DEFAULT_FIELD_DELIM = ", "
+"""Default number format for output. Provides the `fmt` parameter of :func:`numpy.savetxt` internally."""
+
+DEFAULT_FIELD_DELIM = ","
+"""Default field delimiter"""
+
+DEFAULT_SCALE_FACTOR = 1.0
+"""Default variable scale factor"""
+
+DEFAULT_MISSING_VALUE = -9999.0
+"""Default variable missing value"""


 class Formats(IntEnum):
-    """File Format Indices (FFI)"""
+    """ICARTT File Format Indices (FFI)"""

    FFI1001 = 1001
    FFI2110 = 2110
@@ -33,6 +41,7 @@ class DataStore1001:
    """Data model for FFI1001"""

    def __init__(self, ivar, dvars):
+
        self.ivarname = ivar.shortname

        self.varnames = [ivar.shortname] + [x for x in dvars]
@@ -51,7 +60,7 @@ class DataStore1001:
            return self.data[s]
        # returns None implicitly if self.data is None

-    def addFromTxt(self, f, splitChar, max_rows=None):
+    def addFromTxt(self, f, delimiter, max_rows=None):
        # genfromtxt would warn if file is empty. We do not want that.
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
@@ -61,7 +70,7 @@ class DataStore1001:
                dtype=self.dtypes,
                missing_values=self.missingValues,
                usemask=True,
-                delimiter=splitChar,
+                delimiter=delimiter,
                max_rows=max_rows,
                deletechars="",
            ).filled(fill_value=np.nan)
@@ -71,7 +80,7 @@ class DataStore1001:
        """(bulk) add data, providing a (structured) numpy array.

        Array has to have shape [ (ivar, dvar, dvar, ...), ... ],
-        missing values have to be set to np.nan.
+        missing values have to be set to :obj:`numpy.nan`.

        :param newData: data to be added
        :type newData: numpy.ndarray
@@ -116,6 +125,7 @@ class DataStore2110(collections.UserDict):
    """Data model for FFI2110"""

    def __init__(self, ivar, ibvar, auxvars, dvars):
+
        self.ivarname = ivar.shortname
        self.ibvarname = ibvar.shortname

@@ -142,12 +152,12 @@ class DataStore2110(collections.UserDict):
            return self.data[s]
        # returns None implicitly if self.data is None

-    def addFromTxt(self, f, splitChar):
+    def addFromTxt(self, f, delimiter):
        while f:
            auxds = DataStore1001(self.ivar, self.auxvars)
            depds = DataStore1001(self.ibvar, self.dvars)
            try:
-                auxds.addFromTxt(f, splitChar, max_rows=1)
+                auxds.addFromTxt(f, delimiter, max_rows=1)
            except:
                # we are at the end of the file if this happens
                break
@@ -157,7 +167,7 @@ class DataStore2110(collections.UserDict):
            # it is indeed possible to have zero dependent data lines
            if ndeprows > 0:
                try:
-                    depds.addFromTxt(f, splitChar, max_rows=ndeprows)
+                    depds.addFromTxt(f, delimiter, max_rows=ndeprows)
                except:
                    raise IOError("Could not read dependent data lines.")

@@ -171,7 +181,7 @@ class DataStore2110(collections.UserDict):

        Arrays have to have shape [ (ivar, auxvar, auxvar, ...) ] and
        [ (ibvar, depvar, depvar, ...), ... ] for auxiliary and dependent data line(s), respectively.
-        missing values have to be set to np.nan.
+        missing values have to be set to :obj:`numpy.nan`.

        :param newAuxData: auxiliary data line to be added
        :type newAuxData: numpy.ndarray
@@ -190,11 +200,16 @@ class DataStore2110(collections.UserDict):
        self.data[ivarValue] = {"AUX": auxds, "DEP": depds}

    def write(
-        self, f=sys.stdout, fmt=DEFAULT_NUM_FORMAT, delimiter=DEFAULT_FIELD_DELIM
+        self,
+        f=sys.stdout,
+        fmt_aux=DEFAULT_NUM_FORMAT,
+        fmt_dep=DEFAULT_NUM_FORMAT,
+        delimiter_aux=DEFAULT_FIELD_DELIM,
+        delimiter_dep=DEFAULT_FIELD_DELIM,
    ):
        for ivarvalue in self.data:
-            self.data[ivarvalue]["AUX"].write(f, fmt=fmt, delimiter=delimiter)
-            self.data[ivarvalue]["DEP"].write(f, fmt=fmt, delimiter=delimiter)
+            self.data[ivarvalue]["AUX"].write(f, fmt=fmt_aux, delimiter=delimiter_aux)
+            self.data[ivarvalue]["DEP"].write(f, fmt=fmt_dep, delimiter=delimiter_dep)


 class KeywordComment:
@@ -302,33 +317,14 @@ class StandardNormalComments(collections.UserList):


 class Variable:
-    """An ICARTT variable description with name, units, scale and missing value.
-
-    :param shortname: Short name of the variable
-    :type shortname: str
-
-    :param units: Units of the variable
-    :type units: str
-
-    :param standardname: Standard name of the variable
-    :type standardname: str
-
-    :param longname: Long name of the variable
-    :type longname: str
-
-    :param vartype: Variable type (unbounded/bounded independent or dependent)
-    :type vartype: enum:`icartt.Formats`, defaults to VariableType.dependentVariable
-
-    :param scale: Scaling factor for the variable
-    :type scale: float, defaults to 1.0
-
-    :param miss: Missing value for the variable
-    :type miss: float, defaults to -99999.0
-    """
+    """An ICARTT variable description with name, units, scale and missing value."""

-    def desc(self, splitChar=", "):
+    def desc(self, delimiter=DEFAULT_FIELD_DELIM):
        """Variable description string as it appears in an ICARTT file

+        :param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM
+        :type delimiter: str, optional
+
        :return: description string
        :rtype: str
        """
@@ -337,7 +333,7 @@ class Variable:
            descstr += [str(self.standardname)]
        if self.longname is not None:
            descstr += [str(self.longname)]
-        return splitChar.join(descstr)
+        return delimiter.join(descstr)

    def isValidVariablename(self, name):  # TODO: this could be a 'utils' function
        # ICARTT Standard v2 2.1.1 2)
@@ -362,10 +358,31 @@ class Variable:
        standardname,
        longname,
        vartype=VariableType.DependentVariable,
-        scale=1.0,
-        miss=-99999.0,
+        scale=DEFAULT_SCALE_FACTOR,
+        miss=DEFAULT_MISSING_VALUE,
    ):
-        """Constructor method"""
+        """
+        :param shortname: Short name of the variable
+        :type shortname: str
+
+        :param units: Units of the variable
+        :type units: str
+
+        :param standardname: Standard name of the variable
+        :type standardname: str
+
+        :param longname: Long name of the variable
+        :type longname: str
+
+        :param vartype: Variable type (unbounded/bounded independent or dependent), defaults to `VariableType.dependentVariable`
+        :type vartype: VariableType, optional
+
+        :param scale: Scaling factor for the variable, defaults to DEFAULT_SCALE_FACTOR
+        :type scale: float, optional
+
+        :param miss: Missing value for the variable, defaults to DEFAULT_MISSING_VALUE
+        :type miss: float, optional
+        """
        if not self.isValidVariablename(shortname):
            warnings.warn(
                f"Variable short name {str(shortname)} does not comply with ICARTT standard v2"
@@ -390,17 +407,6 @@ class Variable:
 class Dataset:
    """An ICARTT dataset that can be created from scratch or read from a file,
    manipulated, and then written to a file.
-
-    :param f: file path or file handle to use
-    :type f: str or file handle or stream object, defaults to None
-
-    :param loadData: load data as well (or only header if False)?
-    :type loadData: bool, defaults to "True"
-
-    :param splitChar: splitting character used to separate fields in a line
-    :type splitChar: str, defaults to ","
-
-    :param format:
    """

    @property
@@ -434,7 +440,7 @@ class Dataset:
    def times(self):
        """Time steps of the data

-        :return: numpy array of time steps
+        :return: array of time steps
        :rtype: numpy.ndarray
        """

@@ -470,7 +476,7 @@ class Dataset:
        """Variables (independent + dependent + auxiliary)

        :return: dictionary of all variables
-        :rtype: dict of Variable(s)
+        :rtype: dict
        """
        variables = {}

@@ -485,27 +491,31 @@ class Dataset:

        return variables

-    def readHeader(self, splitChar=","):
-        """Read the ICARTT header (from file)"""
+    def readHeader(self, delimiter=DEFAULT_FIELD_DELIM):
+        """Read the ICARTT header (from file)
+
+        :param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM
+        :type delimiter: str, optional
+        """

        class FilehandleWithLinecounter:  # TODO: this could be a 'utils' class
-            def __init__(self, f, splitChar):
+            def __init__(self, f, delimiter):
                self.f = f
                self.line = 0
-                self.splitChar = splitChar
+                self.delimiter = delimiter

            def readline(self, doSplit=True):
                self.line += 1
                dmp = self.f.readline().replace("\n", "").replace("\r", "")
                if doSplit:
-                    dmp = [word.strip(" ") for word in dmp.split(self.splitChar)]
+                    dmp = [word.strip(" ") for word in dmp.split(self.delimiter)]
                return dmp

        if self.inputFhandle:
            if self.inputFhandle.closed:
                self.inputFhandle = open(self.inputFhandle.name, encoding="utf-8")

-            f = FilehandleWithLinecounter(self.inputFhandle, splitChar)
+            f = FilehandleWithLinecounter(self.inputFhandle, delimiter)
            self._readHeader(f)
            self.inputFhandle.close()

@@ -697,8 +707,12 @@ class Dataset:
                f"Number of header lines suggested in line 1 ({int(nHeaderSuggested)}) do not match actual header lines read ({int(self.nHeader)})"
            )

-    def readData(self, splitChar=","):
-        """Read ICARTT data (from file)"""
+    def readData(self, delimiter=DEFAULT_FIELD_DELIM):
+        """Read ICARTT data (from file)
+
+        :param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM
+        :type delimiter: str, optional
+        """
        if self.inputFhandle:
            if self.inputFhandle.closed:
                self.inputFhandle = open(self.inputFhandle.name, encoding="utf-8")
@@ -706,23 +720,27 @@ class Dataset:
            for _ in range(self.nHeaderFile):
                self.inputFhandle.readline()

-            self.data.addFromTxt(self.inputFhandle, splitChar)
+            self.data.addFromTxt(self.inputFhandle, delimiter)
            self.inputFhandle.close()

-    def read(self, splitChar=","):
-        """Read ICARTT data and header"""
-        self.readHeader(splitChar)
+    def read(self, delimiter=DEFAULT_FIELD_DELIM):
+        """Read ICARTT data and header
+
+        :param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM
+        :type delimiter: str, optional
+        """
+        self.readHeader(delimiter)
        self.endDefineMode()
-        self.readData(splitChar)
+        self.readData(delimiter)

    def makeFileName(self, dateFormat="%Y%m%d"):
        """Create ICARTT-compliant file name based on the information contained in the dataset

-        :param dateFormat: date format to use when parsing
-        :type dateFormat: str, defaults to '%Y%m%d'
+        :param dateFormat: date format to use when parsing, defaults to '%Y%m%d'
+        :type dateFormat: str, optional

        :return: file name generated
-        :rtype: string
+        :rtype: str
        """
        fn = (
            self.dataID
@@ -744,11 +762,19 @@ class Dataset:
        return fn + ".ict"

    def isValidFileName(self, name):  # TODO: this could be a 'utils' function
-        # ICARTT standard v2 2.1.1 3)
-        # Filename: Uppercase and lowercase ASCII alphanumeric
-        # characters (i.e. A-Z, a-z, 0-9), underscore, period,
-        # and hyphen. File names can be a maximum 127
-        # characters in length.
+        """test whether file name complies with ICARTT standard:
+
+        ICARTT standard v2 2.1.1 3)
+
+        Filename: Uppercase and lowercase ASCII alphanumeric characters (i.e. A-Z, a-z, 0-9), underscore, period, and hyphen. File names can be a maximum 127 characters in length.
+
+        :param name: file name
+        :type name: str
+
+        :return: is file name valid according to ICARTT standard?
+        :rtype: bool
+        """
+
        def isAsciiAlpha(x):  # TODO: this could be a 'utils' function
            return re.match("[a-zA-Z0-9-_.]", x)

@@ -760,8 +786,11 @@ class Dataset:
    def writeHeader(self, f=sys.stdout, delimiter=DEFAULT_FIELD_DELIM):
        """Write header

-        :param f: handle to write to
-        :type f: file handle or StringIO stream, defaults to sys.stdout
+        :param f: `file object <https://docs.python.org/3/glossary.html#term-file-object>`_ to write to, defaults to sys.stdout
+        :type f: handle, optional
+
+        :param delimiter: field delimiter character(s) for output, defaults to DEFAULT_FIELD_DELIM
+        :type delimiter: str, optional
        """

        def write_to_file(txt):
@@ -859,18 +888,41 @@ class Dataset:
    ):
        """Write data

-        :param f: handle to write to
-        :type f: file handle or StringIO stream, defaults to sys.stdout
+        :param f: `file object <https://docs.python.org/3/glossary.html#term-file-object>`_ to write to, defaults to sys.stdout
+        :type f: handle, optional
+
+        :param fmt: format string for output, accepts anything :func:`numpy.savetxt` would, defaults to DEFAULT_NUM_FORMAT
+        :type fmt: str or sequence of str, optional
+
+        :param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM
+        :type delimiter: str, optional
        """
-        self.data.write(f=f, fmt=fmt, delimiter=delimiter)
+        if self.format == Formats.FFI1001:
+            self.data.write(f=f, fmt=fmt, delimiter=delimiter)
+        elif self.format == Formats.FFI2110:
+            self.data.write(
+                f=f,
+                fmt_aux=fmt,
+                delimiter_aux=delimiter,
+                fmt_dep=fmt,
+                delimiter_dep=delimiter,
+            )
+        else:
+            raise NotImplementedError("Unknown FFI!")

    def write(
        self, f=sys.stdout, fmt=DEFAULT_NUM_FORMAT, delimiter=DEFAULT_FIELD_DELIM
    ):
        """Write header and data

-        :param f: handle to write to
-        :type f: file handle or StringIO stream, defaults to sys.stdout
+        :param f: `file object <https://docs.python.org/3/glossary.html#term-file-object>`_ to write to, defaults to sys.stdout
+        :type f: handle, optional
+
+        :param fmt: format string for output, accepts anything :func:`numpy.savetxt` would, defaults to DEFAULT_NUM_FORMAT
+        :type fmt: str or sequence of str, optional
+
+        :param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM
+        :type delimiter: str, optional
        """
        self.writeHeader(f=f, delimiter=delimiter)
        self.writeData(f=f, fmt=fmt, delimiter=delimiter)
@@ -905,8 +957,27 @@ class Dataset:
    def __str__(self):
        return f"ICARTT Dataset {self.makeFileName()}"

-    def __init__(self, f=None, loadData=True, splitChar=",", format=Formats.FFI1001):
-        """Constructor method"""
+    def __init__(
+        self,
+        f=None,
+        loadData=True,
+        delimiter=DEFAULT_FIELD_DELIM,
+        format=Formats.FFI1001,
+    ):
+        """
+        :param f: file path or file handle to use, defaults to None
+        :type f: str or `file object <https://docs.python.org/3/glossary.html#term-file-object>`_, optional
+
+        :param loadData: whether to load data as well (or only header if False), defaults to `True`
+        :type loadData: bool, optional
+
+        :param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM
+        :type delimiter: str, optional
+
+        :param format: ICARTT file format to create, defaults to 1001
+        :type format: Formats, optional
+        """
+
        self.format = format
        self.version = None

@@ -956,7 +1027,7 @@ class Dataset:
            if not self.isValidFileName(pathlib.Path(f).name):
                warnings.warn(f"{pathlib.Path(f).name} is not a valid ICARTT filename")

-            self.readHeader(splitChar)
+            self.readHeader(delimiter)
            if loadData:
                self.endDefineMode()
-                self.readData(splitChar)
+                self.readData(delimiter)
--- a/tests/read_and_write.py
+++ b/tests/read_and_write.py
-from argparse import ArgumentParser
-
-parser = ArgumentParser()
-parser.add_argument("input_file", type=str)
-parser.add_argument("output_file", type=str)
-
-args = parser.parse_args()
-
-import icartt
-
-ict = icartt.Dataset(args.input_file)
-ict.splitChar = ", "
-
-with open(args.output_file, "w") as f:
-    ict.write(f)
--- a/tests/test_1001.py
+++ b/tests/test_1001.py
@@ -47,7 +47,7 @@ class Simple1001TestCase(unittest.TestCase):
        )
        self.assertEqual(ict.independentVariable.longname, None)
        self.assertEqual(ict.independentVariable.scale, 1.0)
-        self.assertEqual(ict.independentVariable.miss, -99999.0)
+        self.assertEqual(ict.independentVariable.miss, -9999.0)

    def testDvar(self):
        ict = icartt.Dataset(self.fn, loadData=False)

--- a/tests/test_2110.py
+++ b/tests/test_2110.py
@@ -48,7 +48,7 @@ class Simple2110TestCase(unittest.TestCase):
            ict.independentVariable.longname, "number of seconds from 00:00 UTC"
        )
        self.assertEqual(ict.independentVariable.scale, 1.0)
-        self.assertEqual(ict.independentVariable.miss, -99999.0)
+        self.assertEqual(ict.independentVariable.miss, -9999.0)

    def testAuxvar(self):
        ict = icartt.Dataset(self.fn, loadData=False)