From 4472d96bb5dc3be57a02a2e6c5742bce1631c8a9 Mon Sep 17 00:00:00 2001 From: Christoph Knote Date: Fri, 1 Apr 2022 13:44:31 +0200 Subject: [PATCH] Cleanup and extend docs, describe output format --- docs/conf.py | 15 ++- docs/index.rst | 33 ++++-- docs/usage.rst | 16 +-- src/icartt/dataset.py | 243 ++++++++++++++++++++++++++-------------- tests/read_and_write.py | 15 --- tests/test_1001.py | 2 +- tests/test_2110.py | 2 +- 7 files changed, 204 insertions(+), 122 deletions(-) delete mode 100644 tests/read_and_write.py diff --git a/docs/conf.py b/docs/conf.py index 2ab4cac..c923536 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -30,7 +30,7 @@ release = "2.0" # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = ["sphinx.ext.autodoc", "enum_tools.autoenum"] +extensions = ["sphinx.ext.autodoc", "enum_tools.autoenum", "sphinx.ext.intersphinx"] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] @@ -40,6 +40,12 @@ templates_path = ["_templates"] # This pattern also affects html_static_path and html_extra_path. exclude_patterns = [] +# use class header doc as well as __init__ for parameters +# (https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html#confval-autoclass_content) +autoclass_content = "both" + +# do not resolve constants in function arguments +autodoc_preserve_defaults = True # -- Options for HTML output ------------------------------------------------- @@ -52,3 +58,10 @@ html_theme = "classic" # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = [] + +# -- intersphinx -- + +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), + "numpy": ("https://numpy.org/doc/stable/", None), +} diff --git a/docs/index.rst b/docs/index.rst index 88055f5..adb8613 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -7,7 +7,7 @@ icartt ****** -icartt is an ICARTT file format reader and writer for Python +`icartt` is an ICARTT file format reader and writer for Python The ICARTT data format is described here: https://www-air.larc.nasa.gov/missions/etc/IcarttDataFormat.htm @@ -15,8 +15,8 @@ The ICARTT data format is described here: https://www-air.larc.nasa.gov/missions :maxdepth: 2 :caption: Contents: -Example -####### +Examples +######## .. include:: usage.rst @@ -25,11 +25,18 @@ API .. module:: icartt -Variable -******** +Constants +********** -.. autoclass:: Variable - :members: +Reported here for completeness, do not need to be changed by the user. Can be defined by the user upon :class:`Dataset` / :class:`Variable` creation and when writing to output. + +.. autodata:: icartt.dataset.DEFAULT_NUM_FORMAT + +.. autodata:: icartt.dataset.DEFAULT_FIELD_DELIM + +.. autodata:: icartt.dataset.DEFAULT_SCALE_FACTOR + +.. autodata:: icartt.dataset.DEFAULT_MISSING_VALUE Dataset ******** @@ -40,19 +47,25 @@ Dataset DataStore ********* -The .data attribute of an ``icartt.Dataset`` is a DataStore, which can be accessed to add data as follows: +The `.data` attribute of a Dataset is a DataStore, which can be accessed to add data as follows: .. autoclass:: DataStore1001 - :members: + :members: add .. autoclass:: DataStore2110 - :members: + :members: add Formats ******** .. autoenum:: Formats +Variable +******** + +.. autoclass:: Variable + :members: + Variable types *************** diff --git a/docs/usage.rst b/docs/usage.rst index ff9b3aa..b15f18a 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -1,29 +1,29 @@ Reading an existing dataset -############################ +*************************** Simple format (FFI 1001) -************************* +======================== .. literalinclude:: ../tests/usage_examples/read_ffi1001.py More complex (FFI 2110) -************************* +======================== -Identical to FFI1001, only the data structure is more complex: +Identical to FFI1001, only the data structure is more complex. .. literalinclude:: ../tests/usage_examples/read_ffi2110.py Creating a new dataset -############################ +********************** Simple format (FFI 1001) -************************* +======================== .. literalinclude:: ../tests/usage_examples/create_ffi1001.py More complex (FFI 2110) -************************* +======================== -Again, like for FFI 1001 but more complex data structure +Again, like for FFI 1001, but using a more complex data structure. .. literalinclude:: ../tests/usage_examples/create_ffi2110.py diff --git a/src/icartt/dataset.py b/src/icartt/dataset.py index 7fb9ec5..81e5a8c 100644 --- a/src/icartt/dataset.py +++ b/src/icartt/dataset.py @@ -8,13 +8,21 @@ from enum import IntEnum import numpy as np - DEFAULT_NUM_FORMAT = "%g" -DEFAULT_FIELD_DELIM = ", " +"""Default number format for output. Provides the `fmt` parameter of :func:`numpy.savetxt` internally.""" + +DEFAULT_FIELD_DELIM = "," +"""Default field delimiter""" + +DEFAULT_SCALE_FACTOR = 1.0 +"""Default variable scale factor""" + +DEFAULT_MISSING_VALUE = -9999.0 +"""Default variable missing value""" class Formats(IntEnum): - """File Format Indices (FFI)""" + """ICARTT File Format Indices (FFI)""" FFI1001 = 1001 FFI2110 = 2110 @@ -33,6 +41,7 @@ class DataStore1001: """Data model for FFI1001""" def __init__(self, ivar, dvars): + self.ivarname = ivar.shortname self.varnames = [ivar.shortname] + [x for x in dvars] @@ -51,7 +60,7 @@ class DataStore1001: return self.data[s] # returns None implicitly if self.data is None - def addFromTxt(self, f, splitChar, max_rows=None): + def addFromTxt(self, f, delimiter, max_rows=None): # genfromtxt would warn if file is empty. We do not want that. with warnings.catch_warnings(): warnings.simplefilter("ignore") @@ -61,7 +70,7 @@ class DataStore1001: dtype=self.dtypes, missing_values=self.missingValues, usemask=True, - delimiter=splitChar, + delimiter=delimiter, max_rows=max_rows, deletechars="", ).filled(fill_value=np.nan) @@ -71,7 +80,7 @@ class DataStore1001: """(bulk) add data, providing a (structured) numpy array. Array has to have shape [ (ivar, dvar, dvar, ...), ... ], - missing values have to be set to np.nan. + missing values have to be set to :obj:`numpy.nan`. :param newData: data to be added :type newData: numpy.ndarray @@ -116,6 +125,7 @@ class DataStore2110(collections.UserDict): """Data model for FFI2110""" def __init__(self, ivar, ibvar, auxvars, dvars): + self.ivarname = ivar.shortname self.ibvarname = ibvar.shortname @@ -142,12 +152,12 @@ class DataStore2110(collections.UserDict): return self.data[s] # returns None implicitly if self.data is None - def addFromTxt(self, f, splitChar): + def addFromTxt(self, f, delimiter): while f: auxds = DataStore1001(self.ivar, self.auxvars) depds = DataStore1001(self.ibvar, self.dvars) try: - auxds.addFromTxt(f, splitChar, max_rows=1) + auxds.addFromTxt(f, delimiter, max_rows=1) except: # we are at the end of the file if this happens break @@ -157,7 +167,7 @@ class DataStore2110(collections.UserDict): # it is indeed possible to have zero dependent data lines if ndeprows > 0: try: - depds.addFromTxt(f, splitChar, max_rows=ndeprows) + depds.addFromTxt(f, delimiter, max_rows=ndeprows) except: raise IOError("Could not read dependent data lines.") @@ -171,7 +181,7 @@ class DataStore2110(collections.UserDict): Arrays have to have shape [ (ivar, auxvar, auxvar, ...) ] and [ (ibvar, depvar, depvar, ...), ... ] for auxiliary and dependent data line(s), respectively. - missing values have to be set to np.nan. + missing values have to be set to :obj:`numpy.nan`. :param newAuxData: auxiliary data line to be added :type newAuxData: numpy.ndarray @@ -190,11 +200,16 @@ class DataStore2110(collections.UserDict): self.data[ivarValue] = {"AUX": auxds, "DEP": depds} def write( - self, f=sys.stdout, fmt=DEFAULT_NUM_FORMAT, delimiter=DEFAULT_FIELD_DELIM + self, + f=sys.stdout, + fmt_aux=DEFAULT_NUM_FORMAT, + fmt_dep=DEFAULT_NUM_FORMAT, + delimiter_aux=DEFAULT_FIELD_DELIM, + delimiter_dep=DEFAULT_FIELD_DELIM, ): for ivarvalue in self.data: - self.data[ivarvalue]["AUX"].write(f, fmt=fmt, delimiter=delimiter) - self.data[ivarvalue]["DEP"].write(f, fmt=fmt, delimiter=delimiter) + self.data[ivarvalue]["AUX"].write(f, fmt=fmt_aux, delimiter=delimiter_aux) + self.data[ivarvalue]["DEP"].write(f, fmt=fmt_dep, delimiter=delimiter_dep) class KeywordComment: @@ -302,33 +317,14 @@ class StandardNormalComments(collections.UserList): class Variable: - """An ICARTT variable description with name, units, scale and missing value. - - :param shortname: Short name of the variable - :type shortname: str - - :param units: Units of the variable - :type units: str - - :param standardname: Standard name of the variable - :type standardname: str - - :param longname: Long name of the variable - :type longname: str - - :param vartype: Variable type (unbounded/bounded independent or dependent) - :type vartype: enum:`icartt.Formats`, defaults to VariableType.dependentVariable - - :param scale: Scaling factor for the variable - :type scale: float, defaults to 1.0 - - :param miss: Missing value for the variable - :type miss: float, defaults to -99999.0 - """ + """An ICARTT variable description with name, units, scale and missing value.""" - def desc(self, splitChar=", "): + def desc(self, delimiter=DEFAULT_FIELD_DELIM): """Variable description string as it appears in an ICARTT file + :param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM + :type delimiter: str, optional + :return: description string :rtype: str """ @@ -337,7 +333,7 @@ class Variable: descstr += [str(self.standardname)] if self.longname is not None: descstr += [str(self.longname)] - return splitChar.join(descstr) + return delimiter.join(descstr) def isValidVariablename(self, name): # TODO: this could be a 'utils' function # ICARTT Standard v2 2.1.1 2) @@ -362,10 +358,31 @@ class Variable: standardname, longname, vartype=VariableType.DependentVariable, - scale=1.0, - miss=-99999.0, + scale=DEFAULT_SCALE_FACTOR, + miss=DEFAULT_MISSING_VALUE, ): - """Constructor method""" + """ + :param shortname: Short name of the variable + :type shortname: str + + :param units: Units of the variable + :type units: str + + :param standardname: Standard name of the variable + :type standardname: str + + :param longname: Long name of the variable + :type longname: str + + :param vartype: Variable type (unbounded/bounded independent or dependent), defaults to `VariableType.dependentVariable` + :type vartype: VariableType, optional + + :param scale: Scaling factor for the variable, defaults to DEFAULT_SCALE_FACTOR + :type scale: float, optional + + :param miss: Missing value for the variable, defaults to DEFAULT_MISSING_VALUE + :type miss: float, optional + """ if not self.isValidVariablename(shortname): warnings.warn( f"Variable short name {str(shortname)} does not comply with ICARTT standard v2" @@ -390,17 +407,6 @@ class Variable: class Dataset: """An ICARTT dataset that can be created from scratch or read from a file, manipulated, and then written to a file. - - :param f: file path or file handle to use - :type f: str or file handle or stream object, defaults to None - - :param loadData: load data as well (or only header if False)? - :type loadData: bool, defaults to "True" - - :param splitChar: splitting character used to separate fields in a line - :type splitChar: str, defaults to "," - - :param format: """ @property @@ -434,7 +440,7 @@ class Dataset: def times(self): """Time steps of the data - :return: numpy array of time steps + :return: array of time steps :rtype: numpy.ndarray """ @@ -470,7 +476,7 @@ class Dataset: """Variables (independent + dependent + auxiliary) :return: dictionary of all variables - :rtype: dict of Variable(s) + :rtype: dict """ variables = {} @@ -485,27 +491,31 @@ class Dataset: return variables - def readHeader(self, splitChar=","): - """Read the ICARTT header (from file)""" + def readHeader(self, delimiter=DEFAULT_FIELD_DELIM): + """Read the ICARTT header (from file) + + :param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM + :type delimiter: str, optional + """ class FilehandleWithLinecounter: # TODO: this could be a 'utils' class - def __init__(self, f, splitChar): + def __init__(self, f, delimiter): self.f = f self.line = 0 - self.splitChar = splitChar + self.delimiter = delimiter def readline(self, doSplit=True): self.line += 1 dmp = self.f.readline().replace("\n", "").replace("\r", "") if doSplit: - dmp = [word.strip(" ") for word in dmp.split(self.splitChar)] + dmp = [word.strip(" ") for word in dmp.split(self.delimiter)] return dmp if self.inputFhandle: if self.inputFhandle.closed: self.inputFhandle = open(self.inputFhandle.name, encoding="utf-8") - f = FilehandleWithLinecounter(self.inputFhandle, splitChar) + f = FilehandleWithLinecounter(self.inputFhandle, delimiter) self._readHeader(f) self.inputFhandle.close() @@ -697,8 +707,12 @@ class Dataset: f"Number of header lines suggested in line 1 ({int(nHeaderSuggested)}) do not match actual header lines read ({int(self.nHeader)})" ) - def readData(self, splitChar=","): - """Read ICARTT data (from file)""" + def readData(self, delimiter=DEFAULT_FIELD_DELIM): + """Read ICARTT data (from file) + + :param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM + :type delimiter: str, optional + """ if self.inputFhandle: if self.inputFhandle.closed: self.inputFhandle = open(self.inputFhandle.name, encoding="utf-8") @@ -706,23 +720,27 @@ class Dataset: for _ in range(self.nHeaderFile): self.inputFhandle.readline() - self.data.addFromTxt(self.inputFhandle, splitChar) + self.data.addFromTxt(self.inputFhandle, delimiter) self.inputFhandle.close() - def read(self, splitChar=","): - """Read ICARTT data and header""" - self.readHeader(splitChar) + def read(self, delimiter=DEFAULT_FIELD_DELIM): + """Read ICARTT data and header + + :param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM + :type delimiter: str, optional + """ + self.readHeader(delimiter) self.endDefineMode() - self.readData(splitChar) + self.readData(delimiter) def makeFileName(self, dateFormat="%Y%m%d"): """Create ICARTT-compliant file name based on the information contained in the dataset - :param dateFormat: date format to use when parsing - :type dateFormat: str, defaults to '%Y%m%d' + :param dateFormat: date format to use when parsing, defaults to '%Y%m%d' + :type dateFormat: str, optional :return: file name generated - :rtype: string + :rtype: str """ fn = ( self.dataID @@ -744,11 +762,19 @@ class Dataset: return fn + ".ict" def isValidFileName(self, name): # TODO: this could be a 'utils' function - # ICARTT standard v2 2.1.1 3) - # Filename: Uppercase and lowercase ASCII alphanumeric - # characters (i.e. A-Z, a-z, 0-9), underscore, period, - # and hyphen. File names can be a maximum 127 - # characters in length. + """test whether file name complies with ICARTT standard: + + ICARTT standard v2 2.1.1 3) + + Filename: Uppercase and lowercase ASCII alphanumeric characters (i.e. A-Z, a-z, 0-9), underscore, period, and hyphen. File names can be a maximum 127 characters in length. + + :param name: file name + :type name: str + + :return: is file name valid according to ICARTT standard? + :rtype: bool + """ + def isAsciiAlpha(x): # TODO: this could be a 'utils' function return re.match("[a-zA-Z0-9-_.]", x) @@ -760,8 +786,11 @@ class Dataset: def writeHeader(self, f=sys.stdout, delimiter=DEFAULT_FIELD_DELIM): """Write header - :param f: handle to write to - :type f: file handle or StringIO stream, defaults to sys.stdout + :param f: `file object `_ to write to, defaults to sys.stdout + :type f: handle, optional + + :param delimiter: field delimiter character(s) for output, defaults to DEFAULT_FIELD_DELIM + :type delimiter: str, optional """ def write_to_file(txt): @@ -859,18 +888,41 @@ class Dataset: ): """Write data - :param f: handle to write to - :type f: file handle or StringIO stream, defaults to sys.stdout + :param f: `file object `_ to write to, defaults to sys.stdout + :type f: handle, optional + + :param fmt: format string for output, accepts anything :func:`numpy.savetxt` would, defaults to DEFAULT_NUM_FORMAT + :type fmt: str or sequence of str, optional + + :param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM + :type delimiter: str, optional """ - self.data.write(f=f, fmt=fmt, delimiter=delimiter) + if self.format == Formats.FFI1001: + self.data.write(f=f, fmt=fmt, delimiter=delimiter) + elif self.format == Formats.FFI2110: + self.data.write( + f=f, + fmt_aux=fmt, + delimiter_aux=delimiter, + fmt_dep=fmt, + delimiter_dep=delimiter, + ) + else: + raise NotImplementedError("Unknown FFI!") def write( self, f=sys.stdout, fmt=DEFAULT_NUM_FORMAT, delimiter=DEFAULT_FIELD_DELIM ): """Write header and data - :param f: handle to write to - :type f: file handle or StringIO stream, defaults to sys.stdout + :param f: `file object `_ to write to, defaults to sys.stdout + :type f: handle, optional + + :param fmt: format string for output, accepts anything :func:`numpy.savetxt` would, defaults to DEFAULT_NUM_FORMAT + :type fmt: str or sequence of str, optional + + :param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM + :type delimiter: str, optional """ self.writeHeader(f=f, delimiter=delimiter) self.writeData(f=f, fmt=fmt, delimiter=delimiter) @@ -905,8 +957,27 @@ class Dataset: def __str__(self): return f"ICARTT Dataset {self.makeFileName()}" - def __init__(self, f=None, loadData=True, splitChar=",", format=Formats.FFI1001): - """Constructor method""" + def __init__( + self, + f=None, + loadData=True, + delimiter=DEFAULT_FIELD_DELIM, + format=Formats.FFI1001, + ): + """ + :param f: file path or file handle to use, defaults to None + :type f: str or `file object `_, optional + + :param loadData: whether to load data as well (or only header if False), defaults to `True` + :type loadData: bool, optional + + :param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM + :type delimiter: str, optional + + :param format: ICARTT file format to create, defaults to 1001 + :type format: Formats, optional + """ + self.format = format self.version = None @@ -956,7 +1027,7 @@ class Dataset: if not self.isValidFileName(pathlib.Path(f).name): warnings.warn(f"{pathlib.Path(f).name} is not a valid ICARTT filename") - self.readHeader(splitChar) + self.readHeader(delimiter) if loadData: self.endDefineMode() - self.readData(splitChar) + self.readData(delimiter) diff --git a/tests/read_and_write.py b/tests/read_and_write.py deleted file mode 100644 index 954322d..0000000 --- a/tests/read_and_write.py +++ /dev/null @@ -1,15 +0,0 @@ -from argparse import ArgumentParser - -parser = ArgumentParser() -parser.add_argument("input_file", type=str) -parser.add_argument("output_file", type=str) - -args = parser.parse_args() - -import icartt - -ict = icartt.Dataset(args.input_file) -ict.splitChar = ", " - -with open(args.output_file, "w") as f: - ict.write(f) diff --git a/tests/test_1001.py b/tests/test_1001.py index 572186a..7a19fcb 100644 --- a/tests/test_1001.py +++ b/tests/test_1001.py @@ -47,7 +47,7 @@ class Simple1001TestCase(unittest.TestCase): ) self.assertEqual(ict.independentVariable.longname, None) self.assertEqual(ict.independentVariable.scale, 1.0) - self.assertEqual(ict.independentVariable.miss, -99999.0) + self.assertEqual(ict.independentVariable.miss, -9999.0) def testDvar(self): ict = icartt.Dataset(self.fn, loadData=False) diff --git a/tests/test_2110.py b/tests/test_2110.py index 6b92a62..d225000 100644 --- a/tests/test_2110.py +++ b/tests/test_2110.py @@ -48,7 +48,7 @@ class Simple2110TestCase(unittest.TestCase): ict.independentVariable.longname, "number of seconds from 00:00 UTC" ) self.assertEqual(ict.independentVariable.scale, 1.0) - self.assertEqual(ict.independentVariable.miss, -99999.0) + self.assertEqual(ict.independentVariable.miss, -9999.0) def testAuxvar(self): ict = icartt.Dataset(self.fn, loadData=False) -- GitLab