more mini-refactorings; explicit for loops, loops over dict keys etc, more TODOs

201348d9 · Florian Obersteiner · 4cae8366 · 201348d9
Commit 201348d9 authored 3 years ago by Florian Obersteiner
--- a/src/icartt/dataset.py
+++ b/src/icartt/dataset.py
@@ -4,9 +4,11 @@ import pathlib
 import collections
 import re
 import warnings
+from enum import IntEnum
+
 import numpy as np

-from enum import IntEnum
+

 DEFAULT_NUM_FORMAT = "%f"
 DEFAULT_FIELD_DELIM = ", "
@@ -67,7 +69,7 @@ class DataStore1001:
        ivarvalue = vmiss_to_npnan(kwargs[self.ivarname], self.missvals[self.ivarname])

        newline = np.array(np.NaN, dtype=[(v, "f8") for v in self.varnames])
-        for key in kwargs.keys():
+        for key in kwargs:
            if key in self.varnames:
                newline[key] = vmiss_to_npnan(kwargs[key], self.missvals[key])

@@ -77,8 +79,7 @@ class DataStore1001:
        else:
            if ivarvalue in self.data[self.ivarname]:
                raise Exception("Cannot replace data (yet).")
-            else:
-                self.data = np.append(self.data, newline)
+            self.data = np.append(self.data, newline)

    def denanify(self, d):
        dd = d.copy()
@@ -125,7 +126,7 @@ class DataStore2110(collections.UserDict):
        self.add(**newdata)

    def addBulkDep(self, ivar, raw):
-        nlines, nvars = raw.shape # nvars not used
+        nlines, _ = raw.shape # _ : nvars not used
        self._addDeplines(ivar, raw, nlines)

    def _addDeplines(self, ivar, raw, n):
@@ -163,7 +164,7 @@ class DataStore2110(collections.UserDict):
        ivarvalue = vmiss_to_npnan(kwargs[self.ivarname], self.missvals[self.ivarname])

        # this is an AUX line
-        if any(x in self.auxvarnames for x in kwargs.keys()):
+        if any(x in self.auxvarnames for x in kwargs):
            # and we create the whole dataset if needed
            if not ivarvalue in self.data.keys():
                self.data[ivarvalue] = {
@@ -173,7 +174,7 @@ class DataStore2110(collections.UserDict):
            self.data[ivarvalue]["AUX"].add(**kwargs)

        # this is a DEP line
-        if any(x in self.dvarnames for x in kwargs.keys()):
+        if any(x in self.dvarnames for x in kwargs):
            if not self.ibvarname in kwargs.keys():
                raise Exception("Need independent (bounded) variable data.")

@@ -321,15 +322,15 @@ class Variable:
            descstr += [str(self.longname)]
        return splitChar.join(descstr)

-    def isValidVariablename(self, name):
+    def isValidVariablename(self, name): # TODO: this could be a 'utils' function
        # ICARTT Standard v2 2.1.1 2)
        # Variable short names and variable standard names:
        # Uppercase and lowercase ASCII alphanumeric characters
        # and underscores.
-        def isAsciiAlphaOrUnderscore(x):
+        def isAsciiAlphaOrUnderscore(x): # TODO: this could be a 'utils' function
            return re.match("[a-zA-Z0-9_]", x)

-        allAreAlphaOrUnderscore = all([isAsciiAlphaOrUnderscore(x) for x in name])
+        allAreAlphaOrUnderscore = all(isAsciiAlphaOrUnderscore(x) for x in name)
        # The first character must be a letter,
        firstIsAlpha = bool(re.match("[a-zA-Z]", name[0]))
        # and the name can be at most 31 characters in length.
@@ -449,7 +450,7 @@ class Dataset:
    def readHeader(self, splitChar=","):
        """Read the ICARTT header (from file)"""

-        class FilehandleWithLinecounter:
+        class FilehandleWithLinecounter: # TODO: this could be a 'utils' class
            def __init__(self, f, splitChar):
                self.f = f
                self.line = 0
@@ -667,10 +668,11 @@ class Dataset:
            if self.inputFhandle.closed:
                self.inputFhandle = open(self.inputFhandle.name, encoding='utf-8')

-            _ = [self.inputFhandle.readline() for i in range(self.nHeaderFile)]
-            raw = [line.split(splitChar) for line in self.inputFhandle]
-            _ = self.data.addBulkFromTxt(raw)
+            for _ in range(self.nHeaderFile):
+                self.inputFhandle.readline()

+            raw = [line.split(splitChar) for line in self.inputFhandle]
+            self.data.addBulkFromTxt(raw)
            self.inputFhandle.close()

    def read(self, splitChar=","):
@@ -705,13 +707,13 @@ class Dataset:

        return fn + ".ict"

-    def isValidFileName(self, name):
+    def isValidFileName(self, name): # TODO: this could be a 'utils' function
        # ICARTT standard v2 2.1.1 3)
        # Filename: Uppercase and lowercase ASCII alphanumeric
        # characters (i.e. A-Z, a-z, 0-9), underscore, period,
        # and hyphen. File names can be a maximum 127
        # characters in length.
-        def isAsciiAlpha(x):
+        def isAsciiAlpha(x): # TODO: this could be a 'utils' function
            return re.match("[a-zA-Z0-9-_.]", x)

        allAsciiAlpha = all(isAsciiAlpha(x) for x in name)
@@ -754,6 +756,7 @@ class Dataset:
        write_to_file(
            delimiter.join(
                [
+                    # TODO: if we use anything other than datetime.datetime, we'll have to ensure this still works...
                    datetime.datetime.strftime(x, delimiter.join(["%Y", "%m", "%d"]))
                    for x in [self.dateOfCollection, self.dateOfRevision]
                ]
@@ -781,7 +784,8 @@ class Dataset:
            )
        )
        # Variable names and units (Short variable name and units are required, and optional long descriptive name, in that order, and separated by commas. If the variable is unitless, enter the keyword "none" for its units. Each short variable name and units (and optional long name) are entered on one line. The short variable name must correspond exactly to the name used for that variable as a column header, i.e., the last header line prior to start of data.).
-        _ = [write_to_file(DVAR.desc(delimiter)) for DVAR in self.dependentVariables.values()]
+        for DVAR in self.dependentVariables.values():
+            write_to_file(DVAR.desc(delimiter))
        if self.format == Formats.FFI2110:
            # Number of variables (Integer value showing the number of dependent variables: the total number of columns of data is this value plus one.).
            write_to_file(len(self.auxiliaryVariables))
@@ -798,24 +802,25 @@ class Dataset:
                )
            )
            # Variable names and units (Short variable name and units are required, and optional long descriptive name, in that order, and separated by commas. If the variable is unitless, enter the keyword "none" for its units. Each short variable name and units (and optional long name) are entered on one line. The short variable name must correspond exactly to the name used for that variable as a column header, i.e., the last header line prior to start of data.).
-            _ = [
+            for AUXVAR in self.auxiliaryVariables.values():
                write_to_file(AUXVAR.desc(delimiter))
-                for AUXVAR in self.auxiliaryVariables.values()
-            ]
+

        # Number of SPECIAL comment lines (Integer value indicating the number of lines of special comments, NOT including this line.).
        write_to_file(f"{len(self.specialComments)}")
        # Special comments (Notes of problems or special circumstances unique to this file. An example would be comments/problems associated with a particular flight.).
-        _ = [write_to_file(x) for x in self.specialComments]
+        for x in self.specialComments:
+            write_to_file(x)
        # Number of Normal comments (i.e., number of additional lines of SUPPORTING information: Integer value indicating the number of lines of additional information, NOT including this line.).
        write_to_file(f"{self.normalComments.nlines}")
        # Normal comments (SUPPORTING information: This is the place for investigators to more completely describe the data and measurement parameters. The supporting information structure is described below as a list of key word: value pairs. Specifically include here information on the platform used, the geo-location of data, measurement technique, and data revision comments. Note the non-optional information regarding uncertainty, the upper limit of detection (ULOD) and the lower limit of detection (LLOD) for each measured variable. The ULOD and LLOD are the values, in the same units as the measurements that correspond to the flags -7777s and -8888s within the data, respectively. The last line of this section should contain all the short variable names on one line. The key words in this section are written in BOLD below and must appear in this section of the header along with the relevant data listed after the colon. For key words where information is not needed or applicable, simply enter N/A.).
        # re-create last line out of actual data if missing...
-        if self.normalComments.shortnames == []:
+        if not self.normalComments.shortnames:
            self.normalComments.shortnames = delimiter.join(
                [self.variables[x].shortname for x in self.variables]
            )
-        _ = [write_to_file(x) for x in self.normalComments]
+        for x in self.normalComments:
+            write_to_file(x)

    def writeData(
        self, f=sys.stdout, fmt=DEFAULT_NUM_FORMAT, delimiter=DEFAULT_FIELD_DELIM