Skip to content
Snippets Groups Projects
Commit 4472d96b authored by Christoph Knote's avatar Christoph Knote
Browse files

Cleanup and extend docs, describe output format

parent 3a96ab2b
No related branches found
No related tags found
No related merge requests found
......@@ -30,7 +30,7 @@ release = "2.0"
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = ["sphinx.ext.autodoc", "enum_tools.autoenum"]
extensions = ["sphinx.ext.autodoc", "enum_tools.autoenum", "sphinx.ext.intersphinx"]
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
......@@ -40,6 +40,12 @@ templates_path = ["_templates"]
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = []
# use class header doc as well as __init__ for parameters
# (https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html#confval-autoclass_content)
autoclass_content = "both"
# do not resolve constants in function arguments
autodoc_preserve_defaults = True
# -- Options for HTML output -------------------------------------------------
......@@ -52,3 +58,10 @@ html_theme = "classic"
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = []
# -- intersphinx --
intersphinx_mapping = {
"python": ("https://docs.python.org/3", None),
"numpy": ("https://numpy.org/doc/stable/", None),
}
......@@ -7,7 +7,7 @@
icartt
******
icartt is an ICARTT file format reader and writer for Python
`icartt` is an ICARTT file format reader and writer for Python
The ICARTT data format is described here: https://www-air.larc.nasa.gov/missions/etc/IcarttDataFormat.htm
......@@ -15,8 +15,8 @@ The ICARTT data format is described here: https://www-air.larc.nasa.gov/missions
:maxdepth: 2
:caption: Contents:
Example
#######
Examples
########
.. include:: usage.rst
......@@ -25,11 +25,18 @@ API
.. module:: icartt
Variable
********
Constants
**********
.. autoclass:: Variable
:members:
Reported here for completeness, do not need to be changed by the user. Can be defined by the user upon :class:`Dataset` / :class:`Variable` creation and when writing to output.
.. autodata:: icartt.dataset.DEFAULT_NUM_FORMAT
.. autodata:: icartt.dataset.DEFAULT_FIELD_DELIM
.. autodata:: icartt.dataset.DEFAULT_SCALE_FACTOR
.. autodata:: icartt.dataset.DEFAULT_MISSING_VALUE
Dataset
********
......@@ -40,19 +47,25 @@ Dataset
DataStore
*********
The .data attribute of an ``icartt.Dataset`` is a DataStore, which can be accessed to add data as follows:
The `.data` attribute of a Dataset is a DataStore, which can be accessed to add data as follows:
.. autoclass:: DataStore1001
:members:
:members: add
.. autoclass:: DataStore2110
:members:
:members: add
Formats
********
.. autoenum:: Formats
Variable
********
.. autoclass:: Variable
:members:
Variable types
***************
......
Reading an existing dataset
############################
***************************
Simple format (FFI 1001)
*************************
========================
.. literalinclude:: ../tests/usage_examples/read_ffi1001.py
More complex (FFI 2110)
*************************
========================
Identical to FFI1001, only the data structure is more complex:
Identical to FFI1001, only the data structure is more complex.
.. literalinclude:: ../tests/usage_examples/read_ffi2110.py
Creating a new dataset
############################
**********************
Simple format (FFI 1001)
*************************
========================
.. literalinclude:: ../tests/usage_examples/create_ffi1001.py
More complex (FFI 2110)
*************************
========================
Again, like for FFI 1001 but more complex data structure
Again, like for FFI 1001, but using a more complex data structure.
.. literalinclude:: ../tests/usage_examples/create_ffi2110.py
......@@ -8,13 +8,21 @@ from enum import IntEnum
import numpy as np
DEFAULT_NUM_FORMAT = "%g"
DEFAULT_FIELD_DELIM = ", "
"""Default number format for output. Provides the `fmt` parameter of :func:`numpy.savetxt` internally."""
DEFAULT_FIELD_DELIM = ","
"""Default field delimiter"""
DEFAULT_SCALE_FACTOR = 1.0
"""Default variable scale factor"""
DEFAULT_MISSING_VALUE = -9999.0
"""Default variable missing value"""
class Formats(IntEnum):
"""File Format Indices (FFI)"""
"""ICARTT File Format Indices (FFI)"""
FFI1001 = 1001
FFI2110 = 2110
......@@ -33,6 +41,7 @@ class DataStore1001:
"""Data model for FFI1001"""
def __init__(self, ivar, dvars):
self.ivarname = ivar.shortname
self.varnames = [ivar.shortname] + [x for x in dvars]
......@@ -51,7 +60,7 @@ class DataStore1001:
return self.data[s]
# returns None implicitly if self.data is None
def addFromTxt(self, f, splitChar, max_rows=None):
def addFromTxt(self, f, delimiter, max_rows=None):
# genfromtxt would warn if file is empty. We do not want that.
with warnings.catch_warnings():
warnings.simplefilter("ignore")
......@@ -61,7 +70,7 @@ class DataStore1001:
dtype=self.dtypes,
missing_values=self.missingValues,
usemask=True,
delimiter=splitChar,
delimiter=delimiter,
max_rows=max_rows,
deletechars="",
).filled(fill_value=np.nan)
......@@ -71,7 +80,7 @@ class DataStore1001:
"""(bulk) add data, providing a (structured) numpy array.
Array has to have shape [ (ivar, dvar, dvar, ...), ... ],
missing values have to be set to np.nan.
missing values have to be set to :obj:`numpy.nan`.
:param newData: data to be added
:type newData: numpy.ndarray
......@@ -116,6 +125,7 @@ class DataStore2110(collections.UserDict):
"""Data model for FFI2110"""
def __init__(self, ivar, ibvar, auxvars, dvars):
self.ivarname = ivar.shortname
self.ibvarname = ibvar.shortname
......@@ -142,12 +152,12 @@ class DataStore2110(collections.UserDict):
return self.data[s]
# returns None implicitly if self.data is None
def addFromTxt(self, f, splitChar):
def addFromTxt(self, f, delimiter):
while f:
auxds = DataStore1001(self.ivar, self.auxvars)
depds = DataStore1001(self.ibvar, self.dvars)
try:
auxds.addFromTxt(f, splitChar, max_rows=1)
auxds.addFromTxt(f, delimiter, max_rows=1)
except:
# we are at the end of the file if this happens
break
......@@ -157,7 +167,7 @@ class DataStore2110(collections.UserDict):
# it is indeed possible to have zero dependent data lines
if ndeprows > 0:
try:
depds.addFromTxt(f, splitChar, max_rows=ndeprows)
depds.addFromTxt(f, delimiter, max_rows=ndeprows)
except:
raise IOError("Could not read dependent data lines.")
......@@ -171,7 +181,7 @@ class DataStore2110(collections.UserDict):
Arrays have to have shape [ (ivar, auxvar, auxvar, ...) ] and
[ (ibvar, depvar, depvar, ...), ... ] for auxiliary and dependent data line(s), respectively.
missing values have to be set to np.nan.
missing values have to be set to :obj:`numpy.nan`.
:param newAuxData: auxiliary data line to be added
:type newAuxData: numpy.ndarray
......@@ -190,11 +200,16 @@ class DataStore2110(collections.UserDict):
self.data[ivarValue] = {"AUX": auxds, "DEP": depds}
def write(
self, f=sys.stdout, fmt=DEFAULT_NUM_FORMAT, delimiter=DEFAULT_FIELD_DELIM
self,
f=sys.stdout,
fmt_aux=DEFAULT_NUM_FORMAT,
fmt_dep=DEFAULT_NUM_FORMAT,
delimiter_aux=DEFAULT_FIELD_DELIM,
delimiter_dep=DEFAULT_FIELD_DELIM,
):
for ivarvalue in self.data:
self.data[ivarvalue]["AUX"].write(f, fmt=fmt, delimiter=delimiter)
self.data[ivarvalue]["DEP"].write(f, fmt=fmt, delimiter=delimiter)
self.data[ivarvalue]["AUX"].write(f, fmt=fmt_aux, delimiter=delimiter_aux)
self.data[ivarvalue]["DEP"].write(f, fmt=fmt_dep, delimiter=delimiter_dep)
class KeywordComment:
......@@ -302,33 +317,14 @@ class StandardNormalComments(collections.UserList):
class Variable:
"""An ICARTT variable description with name, units, scale and missing value.
:param shortname: Short name of the variable
:type shortname: str
:param units: Units of the variable
:type units: str
:param standardname: Standard name of the variable
:type standardname: str
:param longname: Long name of the variable
:type longname: str
:param vartype: Variable type (unbounded/bounded independent or dependent)
:type vartype: enum:`icartt.Formats`, defaults to VariableType.dependentVariable
:param scale: Scaling factor for the variable
:type scale: float, defaults to 1.0
:param miss: Missing value for the variable
:type miss: float, defaults to -99999.0
"""
"""An ICARTT variable description with name, units, scale and missing value."""
def desc(self, splitChar=", "):
def desc(self, delimiter=DEFAULT_FIELD_DELIM):
"""Variable description string as it appears in an ICARTT file
:param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM
:type delimiter: str, optional
:return: description string
:rtype: str
"""
......@@ -337,7 +333,7 @@ class Variable:
descstr += [str(self.standardname)]
if self.longname is not None:
descstr += [str(self.longname)]
return splitChar.join(descstr)
return delimiter.join(descstr)
def isValidVariablename(self, name): # TODO: this could be a 'utils' function
# ICARTT Standard v2 2.1.1 2)
......@@ -362,10 +358,31 @@ class Variable:
standardname,
longname,
vartype=VariableType.DependentVariable,
scale=1.0,
miss=-99999.0,
scale=DEFAULT_SCALE_FACTOR,
miss=DEFAULT_MISSING_VALUE,
):
"""Constructor method"""
"""
:param shortname: Short name of the variable
:type shortname: str
:param units: Units of the variable
:type units: str
:param standardname: Standard name of the variable
:type standardname: str
:param longname: Long name of the variable
:type longname: str
:param vartype: Variable type (unbounded/bounded independent or dependent), defaults to `VariableType.dependentVariable`
:type vartype: VariableType, optional
:param scale: Scaling factor for the variable, defaults to DEFAULT_SCALE_FACTOR
:type scale: float, optional
:param miss: Missing value for the variable, defaults to DEFAULT_MISSING_VALUE
:type miss: float, optional
"""
if not self.isValidVariablename(shortname):
warnings.warn(
f"Variable short name {str(shortname)} does not comply with ICARTT standard v2"
......@@ -390,17 +407,6 @@ class Variable:
class Dataset:
"""An ICARTT dataset that can be created from scratch or read from a file,
manipulated, and then written to a file.
:param f: file path or file handle to use
:type f: str or file handle or stream object, defaults to None
:param loadData: load data as well (or only header if False)?
:type loadData: bool, defaults to "True"
:param splitChar: splitting character used to separate fields in a line
:type splitChar: str, defaults to ","
:param format:
"""
@property
......@@ -434,7 +440,7 @@ class Dataset:
def times(self):
"""Time steps of the data
:return: numpy array of time steps
:return: array of time steps
:rtype: numpy.ndarray
"""
......@@ -470,7 +476,7 @@ class Dataset:
"""Variables (independent + dependent + auxiliary)
:return: dictionary of all variables
:rtype: dict of Variable(s)
:rtype: dict
"""
variables = {}
......@@ -485,27 +491,31 @@ class Dataset:
return variables
def readHeader(self, splitChar=","):
"""Read the ICARTT header (from file)"""
def readHeader(self, delimiter=DEFAULT_FIELD_DELIM):
"""Read the ICARTT header (from file)
:param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM
:type delimiter: str, optional
"""
class FilehandleWithLinecounter: # TODO: this could be a 'utils' class
def __init__(self, f, splitChar):
def __init__(self, f, delimiter):
self.f = f
self.line = 0
self.splitChar = splitChar
self.delimiter = delimiter
def readline(self, doSplit=True):
self.line += 1
dmp = self.f.readline().replace("\n", "").replace("\r", "")
if doSplit:
dmp = [word.strip(" ") for word in dmp.split(self.splitChar)]
dmp = [word.strip(" ") for word in dmp.split(self.delimiter)]
return dmp
if self.inputFhandle:
if self.inputFhandle.closed:
self.inputFhandle = open(self.inputFhandle.name, encoding="utf-8")
f = FilehandleWithLinecounter(self.inputFhandle, splitChar)
f = FilehandleWithLinecounter(self.inputFhandle, delimiter)
self._readHeader(f)
self.inputFhandle.close()
......@@ -697,8 +707,12 @@ class Dataset:
f"Number of header lines suggested in line 1 ({int(nHeaderSuggested)}) do not match actual header lines read ({int(self.nHeader)})"
)
def readData(self, splitChar=","):
"""Read ICARTT data (from file)"""
def readData(self, delimiter=DEFAULT_FIELD_DELIM):
"""Read ICARTT data (from file)
:param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM
:type delimiter: str, optional
"""
if self.inputFhandle:
if self.inputFhandle.closed:
self.inputFhandle = open(self.inputFhandle.name, encoding="utf-8")
......@@ -706,23 +720,27 @@ class Dataset:
for _ in range(self.nHeaderFile):
self.inputFhandle.readline()
self.data.addFromTxt(self.inputFhandle, splitChar)
self.data.addFromTxt(self.inputFhandle, delimiter)
self.inputFhandle.close()
def read(self, splitChar=","):
"""Read ICARTT data and header"""
self.readHeader(splitChar)
def read(self, delimiter=DEFAULT_FIELD_DELIM):
"""Read ICARTT data and header
:param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM
:type delimiter: str, optional
"""
self.readHeader(delimiter)
self.endDefineMode()
self.readData(splitChar)
self.readData(delimiter)
def makeFileName(self, dateFormat="%Y%m%d"):
"""Create ICARTT-compliant file name based on the information contained in the dataset
:param dateFormat: date format to use when parsing
:type dateFormat: str, defaults to '%Y%m%d'
:param dateFormat: date format to use when parsing, defaults to '%Y%m%d'
:type dateFormat: str, optional
:return: file name generated
:rtype: string
:rtype: str
"""
fn = (
self.dataID
......@@ -744,11 +762,19 @@ class Dataset:
return fn + ".ict"
def isValidFileName(self, name): # TODO: this could be a 'utils' function
# ICARTT standard v2 2.1.1 3)
# Filename: Uppercase and lowercase ASCII alphanumeric
# characters (i.e. A-Z, a-z, 0-9), underscore, period,
# and hyphen. File names can be a maximum 127
# characters in length.
"""test whether file name complies with ICARTT standard:
ICARTT standard v2 2.1.1 3)
Filename: Uppercase and lowercase ASCII alphanumeric characters (i.e. A-Z, a-z, 0-9), underscore, period, and hyphen. File names can be a maximum 127 characters in length.
:param name: file name
:type name: str
:return: is file name valid according to ICARTT standard?
:rtype: bool
"""
def isAsciiAlpha(x): # TODO: this could be a 'utils' function
return re.match("[a-zA-Z0-9-_.]", x)
......@@ -760,8 +786,11 @@ class Dataset:
def writeHeader(self, f=sys.stdout, delimiter=DEFAULT_FIELD_DELIM):
"""Write header
:param f: handle to write to
:type f: file handle or StringIO stream, defaults to sys.stdout
:param f: `file object <https://docs.python.org/3/glossary.html#term-file-object>`_ to write to, defaults to sys.stdout
:type f: handle, optional
:param delimiter: field delimiter character(s) for output, defaults to DEFAULT_FIELD_DELIM
:type delimiter: str, optional
"""
def write_to_file(txt):
......@@ -859,18 +888,41 @@ class Dataset:
):
"""Write data
:param f: handle to write to
:type f: file handle or StringIO stream, defaults to sys.stdout
:param f: `file object <https://docs.python.org/3/glossary.html#term-file-object>`_ to write to, defaults to sys.stdout
:type f: handle, optional
:param fmt: format string for output, accepts anything :func:`numpy.savetxt` would, defaults to DEFAULT_NUM_FORMAT
:type fmt: str or sequence of str, optional
:param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM
:type delimiter: str, optional
"""
self.data.write(f=f, fmt=fmt, delimiter=delimiter)
if self.format == Formats.FFI1001:
self.data.write(f=f, fmt=fmt, delimiter=delimiter)
elif self.format == Formats.FFI2110:
self.data.write(
f=f,
fmt_aux=fmt,
delimiter_aux=delimiter,
fmt_dep=fmt,
delimiter_dep=delimiter,
)
else:
raise NotImplementedError("Unknown FFI!")
def write(
self, f=sys.stdout, fmt=DEFAULT_NUM_FORMAT, delimiter=DEFAULT_FIELD_DELIM
):
"""Write header and data
:param f: handle to write to
:type f: file handle or StringIO stream, defaults to sys.stdout
:param f: `file object <https://docs.python.org/3/glossary.html#term-file-object>`_ to write to, defaults to sys.stdout
:type f: handle, optional
:param fmt: format string for output, accepts anything :func:`numpy.savetxt` would, defaults to DEFAULT_NUM_FORMAT
:type fmt: str or sequence of str, optional
:param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM
:type delimiter: str, optional
"""
self.writeHeader(f=f, delimiter=delimiter)
self.writeData(f=f, fmt=fmt, delimiter=delimiter)
......@@ -905,8 +957,27 @@ class Dataset:
def __str__(self):
return f"ICARTT Dataset {self.makeFileName()}"
def __init__(self, f=None, loadData=True, splitChar=",", format=Formats.FFI1001):
"""Constructor method"""
def __init__(
self,
f=None,
loadData=True,
delimiter=DEFAULT_FIELD_DELIM,
format=Formats.FFI1001,
):
"""
:param f: file path or file handle to use, defaults to None
:type f: str or `file object <https://docs.python.org/3/glossary.html#term-file-object>`_, optional
:param loadData: whether to load data as well (or only header if False), defaults to `True`
:type loadData: bool, optional
:param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM
:type delimiter: str, optional
:param format: ICARTT file format to create, defaults to 1001
:type format: Formats, optional
"""
self.format = format
self.version = None
......@@ -956,7 +1027,7 @@ class Dataset:
if not self.isValidFileName(pathlib.Path(f).name):
warnings.warn(f"{pathlib.Path(f).name} is not a valid ICARTT filename")
self.readHeader(splitChar)
self.readHeader(delimiter)
if loadData:
self.endDefineMode()
self.readData(splitChar)
self.readData(delimiter)
from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument("input_file", type=str)
parser.add_argument("output_file", type=str)
args = parser.parse_args()
import icartt
ict = icartt.Dataset(args.input_file)
ict.splitChar = ", "
with open(args.output_file, "w") as f:
ict.write(f)
......@@ -47,7 +47,7 @@ class Simple1001TestCase(unittest.TestCase):
)
self.assertEqual(ict.independentVariable.longname, None)
self.assertEqual(ict.independentVariable.scale, 1.0)
self.assertEqual(ict.independentVariable.miss, -99999.0)
self.assertEqual(ict.independentVariable.miss, -9999.0)
def testDvar(self):
ict = icartt.Dataset(self.fn, loadData=False)
......
......@@ -48,7 +48,7 @@ class Simple2110TestCase(unittest.TestCase):
ict.independentVariable.longname, "number of seconds from 00:00 UTC"
)
self.assertEqual(ict.independentVariable.scale, 1.0)
self.assertEqual(ict.independentVariable.miss, -99999.0)
self.assertEqual(ict.independentVariable.miss, -9999.0)
def testAuxvar(self):
ict = icartt.Dataset(self.fn, loadData=False)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment