Skip to content
Snippets Groups Projects
Commit 201348d9 authored by Florian Obersteiner's avatar Florian Obersteiner
Browse files

more mini-refactorings; explicit for loops, loops over dict keys etc, more TODOs

parent 4cae8366
Branches
Tags
1 merge request!6code refactor
This commit is part of merge request !6. Comments created here will be created in the context of that merge request.
......@@ -4,9 +4,11 @@ import pathlib
import collections
import re
import warnings
from enum import IntEnum
import numpy as np
from enum import IntEnum
DEFAULT_NUM_FORMAT = "%f"
DEFAULT_FIELD_DELIM = ", "
......@@ -67,7 +69,7 @@ class DataStore1001:
ivarvalue = vmiss_to_npnan(kwargs[self.ivarname], self.missvals[self.ivarname])
newline = np.array(np.NaN, dtype=[(v, "f8") for v in self.varnames])
for key in kwargs.keys():
for key in kwargs:
if key in self.varnames:
newline[key] = vmiss_to_npnan(kwargs[key], self.missvals[key])
......@@ -77,8 +79,7 @@ class DataStore1001:
else:
if ivarvalue in self.data[self.ivarname]:
raise Exception("Cannot replace data (yet).")
else:
self.data = np.append(self.data, newline)
self.data = np.append(self.data, newline)
def denanify(self, d):
dd = d.copy()
......@@ -125,7 +126,7 @@ class DataStore2110(collections.UserDict):
self.add(**newdata)
def addBulkDep(self, ivar, raw):
nlines, nvars = raw.shape # nvars not used
nlines, _ = raw.shape # _ : nvars not used
self._addDeplines(ivar, raw, nlines)
def _addDeplines(self, ivar, raw, n):
......@@ -163,7 +164,7 @@ class DataStore2110(collections.UserDict):
ivarvalue = vmiss_to_npnan(kwargs[self.ivarname], self.missvals[self.ivarname])
# this is an AUX line
if any(x in self.auxvarnames for x in kwargs.keys()):
if any(x in self.auxvarnames for x in kwargs):
# and we create the whole dataset if needed
if not ivarvalue in self.data.keys():
self.data[ivarvalue] = {
......@@ -173,7 +174,7 @@ class DataStore2110(collections.UserDict):
self.data[ivarvalue]["AUX"].add(**kwargs)
# this is a DEP line
if any(x in self.dvarnames for x in kwargs.keys()):
if any(x in self.dvarnames for x in kwargs):
if not self.ibvarname in kwargs.keys():
raise Exception("Need independent (bounded) variable data.")
......@@ -321,15 +322,15 @@ class Variable:
descstr += [str(self.longname)]
return splitChar.join(descstr)
def isValidVariablename(self, name):
def isValidVariablename(self, name): # TODO: this could be a 'utils' function
# ICARTT Standard v2 2.1.1 2)
# Variable short names and variable standard names:
# Uppercase and lowercase ASCII alphanumeric characters
# and underscores.
def isAsciiAlphaOrUnderscore(x):
def isAsciiAlphaOrUnderscore(x): # TODO: this could be a 'utils' function
return re.match("[a-zA-Z0-9_]", x)
allAreAlphaOrUnderscore = all([isAsciiAlphaOrUnderscore(x) for x in name])
allAreAlphaOrUnderscore = all(isAsciiAlphaOrUnderscore(x) for x in name)
# The first character must be a letter,
firstIsAlpha = bool(re.match("[a-zA-Z]", name[0]))
# and the name can be at most 31 characters in length.
......@@ -449,7 +450,7 @@ class Dataset:
def readHeader(self, splitChar=","):
"""Read the ICARTT header (from file)"""
class FilehandleWithLinecounter:
class FilehandleWithLinecounter: # TODO: this could be a 'utils' class
def __init__(self, f, splitChar):
self.f = f
self.line = 0
......@@ -667,10 +668,11 @@ class Dataset:
if self.inputFhandle.closed:
self.inputFhandle = open(self.inputFhandle.name, encoding='utf-8')
_ = [self.inputFhandle.readline() for i in range(self.nHeaderFile)]
raw = [line.split(splitChar) for line in self.inputFhandle]
_ = self.data.addBulkFromTxt(raw)
for _ in range(self.nHeaderFile):
self.inputFhandle.readline()
raw = [line.split(splitChar) for line in self.inputFhandle]
self.data.addBulkFromTxt(raw)
self.inputFhandle.close()
def read(self, splitChar=","):
......@@ -705,13 +707,13 @@ class Dataset:
return fn + ".ict"
def isValidFileName(self, name):
def isValidFileName(self, name): # TODO: this could be a 'utils' function
# ICARTT standard v2 2.1.1 3)
# Filename: Uppercase and lowercase ASCII alphanumeric
# characters (i.e. A-Z, a-z, 0-9), underscore, period,
# and hyphen. File names can be a maximum 127
# characters in length.
def isAsciiAlpha(x):
def isAsciiAlpha(x): # TODO: this could be a 'utils' function
return re.match("[a-zA-Z0-9-_.]", x)
allAsciiAlpha = all(isAsciiAlpha(x) for x in name)
......@@ -754,6 +756,7 @@ class Dataset:
write_to_file(
delimiter.join(
[
# TODO: if we use anything other than datetime.datetime, we'll have to ensure this still works...
datetime.datetime.strftime(x, delimiter.join(["%Y", "%m", "%d"]))
for x in [self.dateOfCollection, self.dateOfRevision]
]
......@@ -781,7 +784,8 @@ class Dataset:
)
)
# Variable names and units (Short variable name and units are required, and optional long descriptive name, in that order, and separated by commas. If the variable is unitless, enter the keyword "none" for its units. Each short variable name and units (and optional long name) are entered on one line. The short variable name must correspond exactly to the name used for that variable as a column header, i.e., the last header line prior to start of data.).
_ = [write_to_file(DVAR.desc(delimiter)) for DVAR in self.dependentVariables.values()]
for DVAR in self.dependentVariables.values():
write_to_file(DVAR.desc(delimiter))
if self.format == Formats.FFI2110:
# Number of variables (Integer value showing the number of dependent variables: the total number of columns of data is this value plus one.).
write_to_file(len(self.auxiliaryVariables))
......@@ -798,24 +802,25 @@ class Dataset:
)
)
# Variable names and units (Short variable name and units are required, and optional long descriptive name, in that order, and separated by commas. If the variable is unitless, enter the keyword "none" for its units. Each short variable name and units (and optional long name) are entered on one line. The short variable name must correspond exactly to the name used for that variable as a column header, i.e., the last header line prior to start of data.).
_ = [
for AUXVAR in self.auxiliaryVariables.values():
write_to_file(AUXVAR.desc(delimiter))
for AUXVAR in self.auxiliaryVariables.values()
]
# Number of SPECIAL comment lines (Integer value indicating the number of lines of special comments, NOT including this line.).
write_to_file(f"{len(self.specialComments)}")
# Special comments (Notes of problems or special circumstances unique to this file. An example would be comments/problems associated with a particular flight.).
_ = [write_to_file(x) for x in self.specialComments]
for x in self.specialComments:
write_to_file(x)
# Number of Normal comments (i.e., number of additional lines of SUPPORTING information: Integer value indicating the number of lines of additional information, NOT including this line.).
write_to_file(f"{self.normalComments.nlines}")
# Normal comments (SUPPORTING information: This is the place for investigators to more completely describe the data and measurement parameters. The supporting information structure is described below as a list of key word: value pairs. Specifically include here information on the platform used, the geo-location of data, measurement technique, and data revision comments. Note the non-optional information regarding uncertainty, the upper limit of detection (ULOD) and the lower limit of detection (LLOD) for each measured variable. The ULOD and LLOD are the values, in the same units as the measurements that correspond to the flags -7777s and -8888s within the data, respectively. The last line of this section should contain all the short variable names on one line. The key words in this section are written in BOLD below and must appear in this section of the header along with the relevant data listed after the colon. For key words where information is not needed or applicable, simply enter N/A.).
# re-create last line out of actual data if missing...
if self.normalComments.shortnames == []:
if not self.normalComments.shortnames:
self.normalComments.shortnames = delimiter.join(
[self.variables[x].shortname for x in self.variables]
)
_ = [write_to_file(x) for x in self.normalComments]
for x in self.normalComments:
write_to_file(x)
def writeData(
self, f=sys.stdout, fmt=DEFAULT_NUM_FORMAT, delimiter=DEFAULT_FIELD_DELIM
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment