Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mbees/icartt_pypackage
  • FObersteiner/icartt-dev
  • zssherman/icartt_pypackage
3 results
Show changes
Commits on Source (7)
......@@ -8,6 +8,8 @@ from enum import IntEnum
import numpy as np
from . import ictutils as utl
DEFAULT_NUM_FORMAT = "%g"
"""Default number format for output. Provides the `fmt` parameter of :func:`numpy.savetxt` internally."""
......@@ -317,6 +319,9 @@ class StandardNormalComments(collections.UserList):
self.keywords["UNCERTAINTY"].naAllowed = False
self.keywords["REVISION"].naAllowed = False
def __str__(self):
return "\n".join(f"{str(v)}" for v in self.keywords.values())
class Variable:
"""An ICARTT variable description with name, units, scale and missing value."""
......@@ -337,15 +342,13 @@ class Variable:
descstr += [str(self.longname)]
return delimiter.join(descstr)
def isValidVariablename(self, name): # TODO: this could be a 'utils' function
def isValidVariablename(self, name):
# ICARTT Standard v2 2.1.1 2)
# Variable short names and variable standard names:
# Uppercase and lowercase ASCII alphanumeric characters
# and underscores.
def isAsciiAlphaOrUnderscore(x): # TODO: this could be a 'utils' function
return re.match("[a-zA-Z0-9_]", x)
allAreAlphaOrUnderscore = all(isAsciiAlphaOrUnderscore(x) for x in name)
allAreAlphaOrUnderscore = all(re.match("[a-zA-Z0-9_]", c) for c in name)
# The first character must be a letter,
firstIsAlpha = bool(re.match("[a-zA-Z]", name[0]))
# and the name can be at most 31 characters in length.
......@@ -398,10 +401,6 @@ class Variable:
self.scale = scale
self.miss = miss
def __repr__(self):
# TODO: this sould be something else than __str__ ?
return self.desc()
def __str__(self):
return self.desc()
......@@ -499,25 +498,10 @@ class Dataset:
:param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM
:type delimiter: str, optional
"""
class FilehandleWithLinecounter: # TODO: this could be a 'utils' class
def __init__(self, f, delimiter):
self.f = f
self.line = 0
self.delimiter = delimiter
def readline(self, doSplit=True):
self.line += 1
dmp = self.f.readline().replace("\n", "").replace("\r", "")
if doSplit:
dmp = [word.strip(" ") for word in dmp.split(self.delimiter)]
return dmp
if self.inputFhandle:
if self.inputFhandle.closed:
self.inputFhandle = open(self.inputFhandle.name, encoding="utf-8")
f = FilehandleWithLinecounter(self.inputFhandle, delimiter)
f = utl.FilehandleWithLinecounter(self.inputFhandle, delimiter)
self._readHeader(f)
self.inputFhandle.close()
......@@ -580,18 +564,9 @@ class Dataset:
# here that the independent variable should monotonically increase even when
# crossing over to a second day.
def extractVardesc(dmp): # TODO: could be a 'utils' function or one line,
shortname = dmp[
0
] # shortname, units, standardname, longname, *_ = dmp + [None] * 3
units = dmp[1]
standardname = dmp[2] if len(dmp) > 2 else None
longname = dmp[3] if len(dmp) > 3 else None
return shortname, units, standardname, longname
if self.format == Formats.FFI2110:
dmp = f.readline()
shortname, units, standardname, longname = extractVardesc(dmp)
shortname, units, standardname, longname = utl.extractVardesc(dmp)
self.independentBoundedVariable = Variable(
shortname,
units,
......@@ -601,7 +576,7 @@ class Dataset:
)
dmp = f.readline()
shortname, units, standardname, longname = extractVardesc(dmp)
shortname, units, standardname, longname = utl.extractVardesc(dmp)
self.independentVariable = Variable(
shortname,
units,
......@@ -634,7 +609,7 @@ class Dataset:
# the name used for that variable as a column header, i.e., the last header
# line prior to start of data.).
dmp = f.readline()
shortname, units, standardname, longname = extractVardesc(dmp)
shortname, units, standardname, longname = utl.extractVardesc(dmp)
vshortname = [shortname]
vunits = [units]
vstandardname = [standardname]
......@@ -642,7 +617,7 @@ class Dataset:
for _ in range(1, nvar):
dmp = f.readline()
shortname, units, standardname, longname = extractVardesc(dmp)
shortname, units, standardname, longname = utl.extractVardesc(dmp)
vshortname += [shortname]
vunits += [units]
vstandardname += [standardname]
......@@ -702,8 +677,14 @@ class Dataset:
rawNcom = [f.readline(doSplit=False) for _ in range(nncom)]
self.normalComments.ingest(rawNcom)
r = self.normalComments.keywords["REVISION"].data
r = "0" if not r else r[0].strip("R")
self.revision = r
self.nHeaderFile = f.line
# TODO this warning might be missleading since it assumes all normalComment keywords
# had been defined - which is not guaranteed.
if self.nHeader != nHeaderSuggested:
warnings.warn(
f"Number of header lines suggested in line 1 ({int(nHeaderSuggested)}) do not match actual header lines read ({int(self.nHeader)})"
......@@ -763,7 +744,7 @@ class Dataset:
return fn + ".ict"
def isValidFileName(self, name): # TODO: this could be a 'utils' function
def isValidFileName(self, name):
"""test whether file name complies with ICARTT standard:
ICARTT standard v2 2.1.1 3)
......@@ -776,11 +757,7 @@ class Dataset:
:return: is file name valid according to ICARTT standard?
:rtype: bool
"""
def isAsciiAlpha(x): # TODO: this could be a 'utils' function
return re.match("[a-zA-Z0-9-_.]", x)
allAsciiAlpha = all(isAsciiAlpha(x) for x in name)
allAsciiAlpha = all(re.match("[a-zA-Z0-9-_.]", c) for c in name)
lessThan128Characters = len(name) < 128
return allAsciiAlpha and lessThan128Characters and name.endswith(".ict")
......@@ -952,12 +929,18 @@ class Dataset:
if not self.inputFhandle.closed:
self.inputFhandle.close()
def __repr__(self):
# TODO: this could be more meaningful
return "icartt.Dataset()"
def __str__(self):
return f"ICARTT Dataset {self.makeFileName()}"
s = [
f"ICARTT Dataset {self.makeFileName()}, format index {self.format.value}",
f"data ID: {self.dataID}",
f"location ID: {self.locationID}",
f"PI: {self.PIName}",
f"Affiliation: {self.PIAffiliation}",
f"Mission: {self.missionName}",
f"Collection date, Revision date: {self.dateOfCollection}, {self.dateOfRevision}",
f"Variables ({len(self.variables)}):\n{', '.join(x for x in self.variables)}",
]
return "\n".join(s)
def __init__(
self,
......@@ -981,7 +964,7 @@ class Dataset:
"""
self.format = format
self.version = None # TODO: should this be 2.0 by default?
self.version = None # TODO: should this be 2.0 by default?
self.dataID = "dataID"
self.locationID = "locationID"
......@@ -1028,7 +1011,11 @@ class Dataset:
if not self.isValidFileName(pathlib.Path(f).name):
warnings.warn(f"{pathlib.Path(f).name} is not a valid ICARTT filename")
# TODO: else -> split on "_", then the first part should be dataID, second part locationID
else: # try to obtain dataID and locationID from file name
parts = pathlib.Path(f).name.split("_")
if len(parts) > 2:
self.dataID = parts[0]
self.locationID = parts[1]
self.readHeader(delimiter)
if loadData:
......
# -*- coding: utf-8 -*-
class FilehandleWithLinecounter:
"""a file handle that counts the number of files that were read"""
def __init__(self, f, delimiter):
self.f = f
self.line = 0
self.delimiter = delimiter
def readline(self, doSplit=True):
self.line += 1
dmp = self.f.readline().replace("\n", "").replace("\r", "")
if doSplit:
dmp = [word.strip(" ") for word in dmp.split(self.delimiter)]
return dmp
def extractVardesc(line_parts: list) -> str:
"""extract variable description from ict header line parts (splitted line)"""
shortname, units, standardname, longname, *_ = line_parts + [None] * 3
return shortname, units, standardname, longname