From 3e55b6198ef44026790a5756abbc634338104809 Mon Sep 17 00:00:00 2001 From: Florian Obersteiner <florian.obersteiner@kit.edu> Date: Thu, 7 Apr 2022 11:54:00 +0200 Subject: [PATCH] utility functions / classes done --- src/icartt/dataset.py | 42 +++++++----------------------------------- src/icartt/ictutils.py | 31 +++++++++++++++++++++++++++++-- 2 files changed, 36 insertions(+), 37 deletions(-) diff --git a/src/icartt/dataset.py b/src/icartt/dataset.py index 789231d..660a2e9 100644 --- a/src/icartt/dataset.py +++ b/src/icartt/dataset.py @@ -498,25 +498,10 @@ class Dataset: :param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM :type delimiter: str, optional """ - - class FilehandleWithLinecounter: # TODO: this could be a 'utils' class - def __init__(self, f, delimiter): - self.f = f - self.line = 0 - self.delimiter = delimiter - - def readline(self, doSplit=True): - self.line += 1 - dmp = self.f.readline().replace("\n", "").replace("\r", "") - if doSplit: - dmp = [word.strip(" ") for word in dmp.split(self.delimiter)] - return dmp - if self.inputFhandle: if self.inputFhandle.closed: self.inputFhandle = open(self.inputFhandle.name, encoding="utf-8") - - f = FilehandleWithLinecounter(self.inputFhandle, delimiter) + f = utl.FilehandleWithLinecounter(self.inputFhandle, delimiter) self._readHeader(f) self.inputFhandle.close() @@ -579,18 +564,9 @@ class Dataset: # here that the independent variable should monotonically increase even when # crossing over to a second day. - def extractVardesc(dmp): # TODO: could be a 'utils' function or one line, - shortname = dmp[ - 0 - ] # shortname, units, standardname, longname, *_ = dmp + [None] * 3 - units = dmp[1] - standardname = dmp[2] if len(dmp) > 2 else None - longname = dmp[3] if len(dmp) > 3 else None - return shortname, units, standardname, longname - if self.format == Formats.FFI2110: dmp = f.readline() - shortname, units, standardname, longname = extractVardesc(dmp) + shortname, units, standardname, longname = utl.extractVardesc(dmp) self.independentBoundedVariable = Variable( shortname, units, @@ -600,7 +576,7 @@ class Dataset: ) dmp = f.readline() - shortname, units, standardname, longname = extractVardesc(dmp) + shortname, units, standardname, longname = utl.extractVardesc(dmp) self.independentVariable = Variable( shortname, units, @@ -633,7 +609,7 @@ class Dataset: # the name used for that variable as a column header, i.e., the last header # line prior to start of data.). dmp = f.readline() - shortname, units, standardname, longname = extractVardesc(dmp) + shortname, units, standardname, longname = utl.extractVardesc(dmp) vshortname = [shortname] vunits = [units] vstandardname = [standardname] @@ -641,7 +617,7 @@ class Dataset: for _ in range(1, nvar): dmp = f.readline() - shortname, units, standardname, longname = extractVardesc(dmp) + shortname, units, standardname, longname = utl.extractVardesc(dmp) vshortname += [shortname] vunits += [units] vstandardname += [standardname] @@ -768,7 +744,7 @@ class Dataset: return fn + ".ict" - def isValidFileName(self, name): # TODO: this could be a 'utils' function + def isValidFileName(self, name): """test whether file name complies with ICARTT standard: ICARTT standard v2 2.1.1 3) @@ -781,11 +757,7 @@ class Dataset: :return: is file name valid according to ICARTT standard? :rtype: bool """ - - def isAsciiAlpha(x): # TODO: this could be a 'utils' function - return re.match("[a-zA-Z0-9-_.]", x) - - allAsciiAlpha = all(isAsciiAlpha(x) for x in name) + allAsciiAlpha = utl.isAsciiAlpha(name) lessThan128Characters = len(name) < 128 return allAsciiAlpha and lessThan128Characters and name.endswith(".ict") diff --git a/src/icartt/ictutils.py b/src/icartt/ictutils.py index afedb84..0f2066c 100644 --- a/src/icartt/ictutils.py +++ b/src/icartt/ictutils.py @@ -3,6 +3,33 @@ import re -def isAsciiAlphaOrUnderscore(x: str, _only="[a-zA-Z0-9_]") -> bool: +class FilehandleWithLinecounter: + """a file handle that counts the number of files that were read""" + + def __init__(self, f, delimiter): + self.f = f + self.line = 0 + self.delimiter = delimiter + + def readline(self, doSplit=True): + self.line += 1 + dmp = self.f.readline().replace("\n", "").replace("\r", "") + if doSplit: + dmp = [word.strip(" ") for word in dmp.split(self.delimiter)] + return dmp + + +def isAsciiAlphaOrUnderscore(x: str) -> bool: """check if string x contains only characters from [a-zA-Z0-9_] regex""" - return re.match(_only, x) + return re.match("[a-zA-Z0-9_]", x) + + +def isAsciiAlpha(x): + """check if string x contains only characters from [a-zA-Z0-9-_.] regex""" + return re.match("[a-zA-Z0-9-_.]", x) + + +def extractVardesc(line_parts: list) -> str: + """extract variable description from ict header line parts (splitted line)""" + shortname, units, standardname, longname, *_ = line_parts + [None] * 3 + return shortname, units, standardname, longname -- GitLab