Skip to content
Snippets Groups Projects
Commit 3e55b619 authored by Florian Obersteiner's avatar Florian Obersteiner
Browse files

utility functions / classes done

parent 933855d9
Branches
No related tags found
No related merge requests found
...@@ -498,25 +498,10 @@ class Dataset: ...@@ -498,25 +498,10 @@ class Dataset:
:param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM :param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM
:type delimiter: str, optional :type delimiter: str, optional
""" """
class FilehandleWithLinecounter: # TODO: this could be a 'utils' class
def __init__(self, f, delimiter):
self.f = f
self.line = 0
self.delimiter = delimiter
def readline(self, doSplit=True):
self.line += 1
dmp = self.f.readline().replace("\n", "").replace("\r", "")
if doSplit:
dmp = [word.strip(" ") for word in dmp.split(self.delimiter)]
return dmp
if self.inputFhandle: if self.inputFhandle:
if self.inputFhandle.closed: if self.inputFhandle.closed:
self.inputFhandle = open(self.inputFhandle.name, encoding="utf-8") self.inputFhandle = open(self.inputFhandle.name, encoding="utf-8")
f = utl.FilehandleWithLinecounter(self.inputFhandle, delimiter)
f = FilehandleWithLinecounter(self.inputFhandle, delimiter)
self._readHeader(f) self._readHeader(f)
self.inputFhandle.close() self.inputFhandle.close()
...@@ -579,18 +564,9 @@ class Dataset: ...@@ -579,18 +564,9 @@ class Dataset:
# here that the independent variable should monotonically increase even when # here that the independent variable should monotonically increase even when
# crossing over to a second day. # crossing over to a second day.
def extractVardesc(dmp): # TODO: could be a 'utils' function or one line,
shortname = dmp[
0
] # shortname, units, standardname, longname, *_ = dmp + [None] * 3
units = dmp[1]
standardname = dmp[2] if len(dmp) > 2 else None
longname = dmp[3] if len(dmp) > 3 else None
return shortname, units, standardname, longname
if self.format == Formats.FFI2110: if self.format == Formats.FFI2110:
dmp = f.readline() dmp = f.readline()
shortname, units, standardname, longname = extractVardesc(dmp) shortname, units, standardname, longname = utl.extractVardesc(dmp)
self.independentBoundedVariable = Variable( self.independentBoundedVariable = Variable(
shortname, shortname,
units, units,
...@@ -600,7 +576,7 @@ class Dataset: ...@@ -600,7 +576,7 @@ class Dataset:
) )
dmp = f.readline() dmp = f.readline()
shortname, units, standardname, longname = extractVardesc(dmp) shortname, units, standardname, longname = utl.extractVardesc(dmp)
self.independentVariable = Variable( self.independentVariable = Variable(
shortname, shortname,
units, units,
...@@ -633,7 +609,7 @@ class Dataset: ...@@ -633,7 +609,7 @@ class Dataset:
# the name used for that variable as a column header, i.e., the last header # the name used for that variable as a column header, i.e., the last header
# line prior to start of data.). # line prior to start of data.).
dmp = f.readline() dmp = f.readline()
shortname, units, standardname, longname = extractVardesc(dmp) shortname, units, standardname, longname = utl.extractVardesc(dmp)
vshortname = [shortname] vshortname = [shortname]
vunits = [units] vunits = [units]
vstandardname = [standardname] vstandardname = [standardname]
...@@ -641,7 +617,7 @@ class Dataset: ...@@ -641,7 +617,7 @@ class Dataset:
for _ in range(1, nvar): for _ in range(1, nvar):
dmp = f.readline() dmp = f.readline()
shortname, units, standardname, longname = extractVardesc(dmp) shortname, units, standardname, longname = utl.extractVardesc(dmp)
vshortname += [shortname] vshortname += [shortname]
vunits += [units] vunits += [units]
vstandardname += [standardname] vstandardname += [standardname]
...@@ -768,7 +744,7 @@ class Dataset: ...@@ -768,7 +744,7 @@ class Dataset:
return fn + ".ict" return fn + ".ict"
def isValidFileName(self, name): # TODO: this could be a 'utils' function def isValidFileName(self, name):
"""test whether file name complies with ICARTT standard: """test whether file name complies with ICARTT standard:
ICARTT standard v2 2.1.1 3) ICARTT standard v2 2.1.1 3)
...@@ -781,11 +757,7 @@ class Dataset: ...@@ -781,11 +757,7 @@ class Dataset:
:return: is file name valid according to ICARTT standard? :return: is file name valid according to ICARTT standard?
:rtype: bool :rtype: bool
""" """
allAsciiAlpha = utl.isAsciiAlpha(name)
def isAsciiAlpha(x): # TODO: this could be a 'utils' function
return re.match("[a-zA-Z0-9-_.]", x)
allAsciiAlpha = all(isAsciiAlpha(x) for x in name)
lessThan128Characters = len(name) < 128 lessThan128Characters = len(name) < 128
return allAsciiAlpha and lessThan128Characters and name.endswith(".ict") return allAsciiAlpha and lessThan128Characters and name.endswith(".ict")
......
...@@ -3,6 +3,33 @@ ...@@ -3,6 +3,33 @@
import re import re
def isAsciiAlphaOrUnderscore(x: str, _only="[a-zA-Z0-9_]") -> bool: class FilehandleWithLinecounter:
"""a file handle that counts the number of files that were read"""
def __init__(self, f, delimiter):
self.f = f
self.line = 0
self.delimiter = delimiter
def readline(self, doSplit=True):
self.line += 1
dmp = self.f.readline().replace("\n", "").replace("\r", "")
if doSplit:
dmp = [word.strip(" ") for word in dmp.split(self.delimiter)]
return dmp
def isAsciiAlphaOrUnderscore(x: str) -> bool:
"""check if string x contains only characters from [a-zA-Z0-9_] regex""" """check if string x contains only characters from [a-zA-Z0-9_] regex"""
return re.match(_only, x) return re.match("[a-zA-Z0-9_]", x)
def isAsciiAlpha(x):
"""check if string x contains only characters from [a-zA-Z0-9-_.] regex"""
return re.match("[a-zA-Z0-9-_.]", x)
def extractVardesc(line_parts: list) -> str:
"""extract variable description from ict header line parts (splitted line)"""
shortname, units, standardname, longname, *_ = line_parts + [None] * 3
return shortname, units, standardname, longname
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment