From 82b65e53945bd85c69dc593c2bed8078f5920ad8 Mon Sep 17 00:00:00 2001 From: Florian Obersteiner Date: Thu, 7 Apr 2022 09:40:31 +0200 Subject: [PATCH 01/14] todo updated --- src/icartt/dataset.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/icartt/dataset.py b/src/icartt/dataset.py index 6d3c7ce..862adda 100644 --- a/src/icartt/dataset.py +++ b/src/icartt/dataset.py @@ -981,7 +981,7 @@ class Dataset: """ self.format = format - self.version = None + self.version = None # TODO: should this be 2.0 by default? self.dataID = "dataID" self.locationID = "locationID" @@ -1028,6 +1028,7 @@ class Dataset: if not self.isValidFileName(pathlib.Path(f).name): warnings.warn(f"{pathlib.Path(f).name} is not a valid ICARTT filename") + # TODO: else -> split on "_", then the first part should be dataID, second part locationID self.readHeader(delimiter) if loadData: -- GitLab From 18210b27a446c305881f55e771362d394356c42c Mon Sep 17 00:00:00 2001 From: Florian Obersteiner Date: Thu, 7 Apr 2022 09:43:05 +0200 Subject: [PATCH 02/14] readme updated --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f074f4b..d507a6e 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ pip install -e . # Changelog -## 2.0.0 (2022-02-x) +## 2.0.0 (2022-04-x) - Compatible with ICARTT v2 standard - Formats 1001 and 2110 -- GitLab From 4b49058cb1ec9443503fc5af398a88023c9decc3 Mon Sep 17 00:00:00 2001 From: Florian Obersteiner Date: Thu, 7 Apr 2022 09:48:17 +0200 Subject: [PATCH 03/14] readme updated #2 --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d507a6e..9ec7098 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ make your changes and then [submit a merge request](https://mbees.med.uni-augsbu ## Installation of the development version -Clone this repository / or your fork and install as "editable": +Clone this repository / or your fork, then install e.g. as "editable": ``` git clone https://mbees.med.uni-augsburg.de/gitlab/mbees/icartt_pypackage.git or @@ -24,6 +24,8 @@ cd icartt_pypackage pip install -e . ``` +Note: the package is managed with [poetry](https://python-poetry.org/). + # Changelog ## 2.0.0 (2022-04-x) -- GitLab From f1def13ade94e0597bf14f1168cd412a5adc2349 Mon Sep 17 00:00:00 2001 From: Florian Obersteiner Date: Thu, 7 Apr 2022 09:49:35 +0200 Subject: [PATCH 04/14] version to 2.0.0-rc1 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c99cea8..11198af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "icartt" -version = "1.9.1" +version = "2.0.0-rc1" description = "ICARTT format reader and writer" license = "GPL-3.0-or-later" authors = ["Christoph Knote "] -- GitLab From dbac459a818190b51f8b611f41326a15560a66cf Mon Sep 17 00:00:00 2001 From: Florian Obersteiner Date: Thu, 7 Apr 2022 09:51:32 +0200 Subject: [PATCH 05/14] keyword icarttt added --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 11198af..e84c2ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ authors = ["Christoph Knote "] readme = "README.md" homepage = "https://mbees.med.uni-augsburg.de/" repository = "https://mbees.med.uni-augsburg.de/gitlab/mbees/icartt_pypackage" -keywords = [ "atmosphere", "file format", "ames", "nasa" ] +keywords = [ "atmosphere", "file format", "icartt", "ames", "nasa" ] classifiers = [ "Programming Language :: Python :: 3", "Development Status :: 5 - Production/Stable", -- GitLab From 172fb3b68f6c171968531787617e3477f8073168 Mon Sep 17 00:00:00 2001 From: Florian Obersteiner Date: Thu, 7 Apr 2022 10:24:34 +0200 Subject: [PATCH 06/14] added __str__ for normalComments --- src/icartt/dataset.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/icartt/dataset.py b/src/icartt/dataset.py index 862adda..cf625dc 100644 --- a/src/icartt/dataset.py +++ b/src/icartt/dataset.py @@ -318,6 +318,11 @@ class StandardNormalComments(collections.UserList): self.keywords["REVISION"].naAllowed = False + def __str__(self): + s = "\n".join(f"{str(v)}" for _, v in self.keywords.items()) + return s + + class Variable: """An ICARTT variable description with name, units, scale and missing value.""" @@ -1028,6 +1033,11 @@ class Dataset: if not self.isValidFileName(pathlib.Path(f).name): warnings.warn(f"{pathlib.Path(f).name} is not a valid ICARTT filename") + else: # try to obtain dataID and locationID from file name + parts = pathlib.Path(f).name.split("_") + if len(parts) > 2: + self.dataID = parts[0] + self.dataID = parts[1] # TODO: else -> split on "_", then the first part should be dataID, second part locationID self.readHeader(delimiter) -- GitLab From b42d23aca3529fe691eef5532e88e9e6fdaa01da Mon Sep 17 00:00:00 2001 From: Florian Obersteiner Date: Thu, 7 Apr 2022 10:49:29 +0200 Subject: [PATCH 07/14] added parser for REVISION keyword to Dataset.revision property --- src/icartt/dataset.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/icartt/dataset.py b/src/icartt/dataset.py index cf625dc..a193dae 100644 --- a/src/icartt/dataset.py +++ b/src/icartt/dataset.py @@ -319,8 +319,7 @@ class StandardNormalComments(collections.UserList): def __str__(self): - s = "\n".join(f"{str(v)}" for _, v in self.keywords.items()) - return s + return "\n".join(f"{str(v)}" for v in self.keywords.values()) class Variable: @@ -707,6 +706,10 @@ class Dataset: rawNcom = [f.readline(doSplit=False) for _ in range(nncom)] self.normalComments.ingest(rawNcom) + r = self.normalComments.keywords["REVISION"].data + r = "0" if not r else r[0].strip("R") + self.revision = r + self.nHeaderFile = f.line if self.nHeader != nHeaderSuggested: -- GitLab From 4f5c45d69c2963c1c2d1dde5e3970dac919a9d90 Mon Sep 17 00:00:00 2001 From: Florian Obersteiner Date: Thu, 7 Apr 2022 10:52:26 +0200 Subject: [PATCH 08/14] comment on nHeader != nHeaderSuggested warning --- src/icartt/dataset.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/icartt/dataset.py b/src/icartt/dataset.py index a193dae..811c500 100644 --- a/src/icartt/dataset.py +++ b/src/icartt/dataset.py @@ -712,6 +712,8 @@ class Dataset: self.nHeaderFile = f.line + # TODO this warning might be missleading since it assumes all normalComment keywords + # had been defined - which is not guaranteed. if self.nHeader != nHeaderSuggested: warnings.warn( f"Number of header lines suggested in line 1 ({int(nHeaderSuggested)}) do not match actual header lines read ({int(self.nHeader)})" -- GitLab From 405c44f7422ea38d5aae669f672a7f169a5a61ff Mon Sep 17 00:00:00 2001 From: Florian Obersteiner Date: Thu, 7 Apr 2022 11:14:14 +0200 Subject: [PATCH 09/14] __str__ for Dataset --- src/icartt/dataset.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/icartt/dataset.py b/src/icartt/dataset.py index 811c500..a20d933 100644 --- a/src/icartt/dataset.py +++ b/src/icartt/dataset.py @@ -317,7 +317,6 @@ class StandardNormalComments(collections.UserList): self.keywords["UNCERTAINTY"].naAllowed = False self.keywords["REVISION"].naAllowed = False - def __str__(self): return "\n".join(f"{str(v)}" for v in self.keywords.values()) @@ -962,12 +961,18 @@ class Dataset: if not self.inputFhandle.closed: self.inputFhandle.close() - def __repr__(self): - # TODO: this could be more meaningful - return "icartt.Dataset()" - def __str__(self): - return f"ICARTT Dataset {self.makeFileName()}" + s = [ + f"ICARTT Dataset {self.makeFileName()}, format index {self.format.value}", + f"data ID: {self.dataID}", + f"location ID: {self.locationID}", + f"PI: {self.PIName}", + f"Affiliation: {self.PIAffiliation}", + f"Mission: {self.missionName}", + f"Collection date, Revision date: {self.dateOfCollection}, {self.dateOfRevision}", + f"Variables ({len(self.variables)}):\n{', '.join(x for x in self.variables)}", + ] + return "\n".join(s) def __init__( self, @@ -991,7 +996,7 @@ class Dataset: """ self.format = format - self.version = None # TODO: should this be 2.0 by default? + self.version = None # TODO: should this be 2.0 by default? self.dataID = "dataID" self.locationID = "locationID" @@ -1038,12 +1043,11 @@ class Dataset: if not self.isValidFileName(pathlib.Path(f).name): warnings.warn(f"{pathlib.Path(f).name} is not a valid ICARTT filename") - else: # try to obtain dataID and locationID from file name + else: # try to obtain dataID and locationID from file name parts = pathlib.Path(f).name.split("_") if len(parts) > 2: self.dataID = parts[0] - self.dataID = parts[1] - # TODO: else -> split on "_", then the first part should be dataID, second part locationID + self.locationID = parts[1] self.readHeader(delimiter) if loadData: -- GitLab From 933855d914a4487eaf4e9c70bc870f2631c38b1c Mon Sep 17 00:00:00 2001 From: Florian Obersteiner Date: Thu, 7 Apr 2022 11:38:25 +0200 Subject: [PATCH 10/14] created .py for utility functions --- src/icartt/dataset.py | 12 ++++-------- src/icartt/ictutils.py | 8 ++++++++ 2 files changed, 12 insertions(+), 8 deletions(-) create mode 100644 src/icartt/ictutils.py diff --git a/src/icartt/dataset.py b/src/icartt/dataset.py index a20d933..789231d 100644 --- a/src/icartt/dataset.py +++ b/src/icartt/dataset.py @@ -8,6 +8,8 @@ from enum import IntEnum import numpy as np +from . import ictutils as utl + DEFAULT_NUM_FORMAT = "%g" """Default number format for output. Provides the `fmt` parameter of :func:`numpy.savetxt` internally.""" @@ -340,15 +342,13 @@ class Variable: descstr += [str(self.longname)] return delimiter.join(descstr) - def isValidVariablename(self, name): # TODO: this could be a 'utils' function + def isValidVariablename(self, name): # ICARTT Standard v2 2.1.1 2) # Variable short names and variable standard names: # Uppercase and lowercase ASCII alphanumeric characters # and underscores. - def isAsciiAlphaOrUnderscore(x): # TODO: this could be a 'utils' function - return re.match("[a-zA-Z0-9_]", x) - allAreAlphaOrUnderscore = all(isAsciiAlphaOrUnderscore(x) for x in name) + allAreAlphaOrUnderscore = all(utl.isAsciiAlphaOrUnderscore(x) for x in name) # The first character must be a letter, firstIsAlpha = bool(re.match("[a-zA-Z]", name[0])) # and the name can be at most 31 characters in length. @@ -401,10 +401,6 @@ class Variable: self.scale = scale self.miss = miss - def __repr__(self): - # TODO: this sould be something else than __str__ ? - return self.desc() - def __str__(self): return self.desc() diff --git a/src/icartt/ictutils.py b/src/icartt/ictutils.py new file mode 100644 index 0000000..afedb84 --- /dev/null +++ b/src/icartt/ictutils.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- + +import re + + +def isAsciiAlphaOrUnderscore(x: str, _only="[a-zA-Z0-9_]") -> bool: + """check if string x contains only characters from [a-zA-Z0-9_] regex""" + return re.match(_only, x) -- GitLab From 3e55b6198ef44026790a5756abbc634338104809 Mon Sep 17 00:00:00 2001 From: Florian Obersteiner Date: Thu, 7 Apr 2022 11:54:00 +0200 Subject: [PATCH 11/14] utility functions / classes done --- src/icartt/dataset.py | 42 +++++++----------------------------------- src/icartt/ictutils.py | 31 +++++++++++++++++++++++++++++-- 2 files changed, 36 insertions(+), 37 deletions(-) diff --git a/src/icartt/dataset.py b/src/icartt/dataset.py index 789231d..660a2e9 100644 --- a/src/icartt/dataset.py +++ b/src/icartt/dataset.py @@ -498,25 +498,10 @@ class Dataset: :param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM :type delimiter: str, optional """ - - class FilehandleWithLinecounter: # TODO: this could be a 'utils' class - def __init__(self, f, delimiter): - self.f = f - self.line = 0 - self.delimiter = delimiter - - def readline(self, doSplit=True): - self.line += 1 - dmp = self.f.readline().replace("\n", "").replace("\r", "") - if doSplit: - dmp = [word.strip(" ") for word in dmp.split(self.delimiter)] - return dmp - if self.inputFhandle: if self.inputFhandle.closed: self.inputFhandle = open(self.inputFhandle.name, encoding="utf-8") - - f = FilehandleWithLinecounter(self.inputFhandle, delimiter) + f = utl.FilehandleWithLinecounter(self.inputFhandle, delimiter) self._readHeader(f) self.inputFhandle.close() @@ -579,18 +564,9 @@ class Dataset: # here that the independent variable should monotonically increase even when # crossing over to a second day. - def extractVardesc(dmp): # TODO: could be a 'utils' function or one line, - shortname = dmp[ - 0 - ] # shortname, units, standardname, longname, *_ = dmp + [None] * 3 - units = dmp[1] - standardname = dmp[2] if len(dmp) > 2 else None - longname = dmp[3] if len(dmp) > 3 else None - return shortname, units, standardname, longname - if self.format == Formats.FFI2110: dmp = f.readline() - shortname, units, standardname, longname = extractVardesc(dmp) + shortname, units, standardname, longname = utl.extractVardesc(dmp) self.independentBoundedVariable = Variable( shortname, units, @@ -600,7 +576,7 @@ class Dataset: ) dmp = f.readline() - shortname, units, standardname, longname = extractVardesc(dmp) + shortname, units, standardname, longname = utl.extractVardesc(dmp) self.independentVariable = Variable( shortname, units, @@ -633,7 +609,7 @@ class Dataset: # the name used for that variable as a column header, i.e., the last header # line prior to start of data.). dmp = f.readline() - shortname, units, standardname, longname = extractVardesc(dmp) + shortname, units, standardname, longname = utl.extractVardesc(dmp) vshortname = [shortname] vunits = [units] vstandardname = [standardname] @@ -641,7 +617,7 @@ class Dataset: for _ in range(1, nvar): dmp = f.readline() - shortname, units, standardname, longname = extractVardesc(dmp) + shortname, units, standardname, longname = utl.extractVardesc(dmp) vshortname += [shortname] vunits += [units] vstandardname += [standardname] @@ -768,7 +744,7 @@ class Dataset: return fn + ".ict" - def isValidFileName(self, name): # TODO: this could be a 'utils' function + def isValidFileName(self, name): """test whether file name complies with ICARTT standard: ICARTT standard v2 2.1.1 3) @@ -781,11 +757,7 @@ class Dataset: :return: is file name valid according to ICARTT standard? :rtype: bool """ - - def isAsciiAlpha(x): # TODO: this could be a 'utils' function - return re.match("[a-zA-Z0-9-_.]", x) - - allAsciiAlpha = all(isAsciiAlpha(x) for x in name) + allAsciiAlpha = utl.isAsciiAlpha(name) lessThan128Characters = len(name) < 128 return allAsciiAlpha and lessThan128Characters and name.endswith(".ict") diff --git a/src/icartt/ictutils.py b/src/icartt/ictutils.py index afedb84..0f2066c 100644 --- a/src/icartt/ictutils.py +++ b/src/icartt/ictutils.py @@ -3,6 +3,33 @@ import re -def isAsciiAlphaOrUnderscore(x: str, _only="[a-zA-Z0-9_]") -> bool: +class FilehandleWithLinecounter: + """a file handle that counts the number of files that were read""" + + def __init__(self, f, delimiter): + self.f = f + self.line = 0 + self.delimiter = delimiter + + def readline(self, doSplit=True): + self.line += 1 + dmp = self.f.readline().replace("\n", "").replace("\r", "") + if doSplit: + dmp = [word.strip(" ") for word in dmp.split(self.delimiter)] + return dmp + + +def isAsciiAlphaOrUnderscore(x: str) -> bool: """check if string x contains only characters from [a-zA-Z0-9_] regex""" - return re.match(_only, x) + return re.match("[a-zA-Z0-9_]", x) + + +def isAsciiAlpha(x): + """check if string x contains only characters from [a-zA-Z0-9-_.] regex""" + return re.match("[a-zA-Z0-9-_.]", x) + + +def extractVardesc(line_parts: list) -> str: + """extract variable description from ict header line parts (splitted line)""" + shortname, units, standardname, longname, *_ = line_parts + [None] * 3 + return shortname, units, standardname, longname -- GitLab From 96da4f77f156deb98d0df7108f8fddf3b221b3cc Mon Sep 17 00:00:00 2001 From: Florian Obersteiner Date: Thu, 7 Apr 2022 12:09:25 +0200 Subject: [PATCH 12/14] revised utils / simplified --- src/icartt/dataset.py | 4 ++-- src/icartt/ictutils.py | 12 ------------ 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/src/icartt/dataset.py b/src/icartt/dataset.py index 660a2e9..1528158 100644 --- a/src/icartt/dataset.py +++ b/src/icartt/dataset.py @@ -348,7 +348,7 @@ class Variable: # Uppercase and lowercase ASCII alphanumeric characters # and underscores. - allAreAlphaOrUnderscore = all(utl.isAsciiAlphaOrUnderscore(x) for x in name) + allAreAlphaOrUnderscore = all(re.match("[a-zA-Z0-9_]", c) for c in name) # The first character must be a letter, firstIsAlpha = bool(re.match("[a-zA-Z]", name[0])) # and the name can be at most 31 characters in length. @@ -757,7 +757,7 @@ class Dataset: :return: is file name valid according to ICARTT standard? :rtype: bool """ - allAsciiAlpha = utl.isAsciiAlpha(name) + allAsciiAlpha = all(re.match("[a-zA-Z0-9-_.]", c) for c in name) lessThan128Characters = len(name) < 128 return allAsciiAlpha and lessThan128Characters and name.endswith(".ict") diff --git a/src/icartt/ictutils.py b/src/icartt/ictutils.py index 0f2066c..f7f666c 100644 --- a/src/icartt/ictutils.py +++ b/src/icartt/ictutils.py @@ -1,7 +1,5 @@ # -*- coding: utf-8 -*- -import re - class FilehandleWithLinecounter: """a file handle that counts the number of files that were read""" @@ -19,16 +17,6 @@ class FilehandleWithLinecounter: return dmp -def isAsciiAlphaOrUnderscore(x: str) -> bool: - """check if string x contains only characters from [a-zA-Z0-9_] regex""" - return re.match("[a-zA-Z0-9_]", x) - - -def isAsciiAlpha(x): - """check if string x contains only characters from [a-zA-Z0-9-_.] regex""" - return re.match("[a-zA-Z0-9-_.]", x) - - def extractVardesc(line_parts: list) -> str: """extract variable description from ict header line parts (splitted line)""" shortname, units, standardname, longname, *_ = line_parts + [None] * 3 -- GitLab From 26c104277d959f7fff284e8a30c6a9176ed5ad2c Mon Sep 17 00:00:00 2001 From: Florian Obersteiner Date: Thu, 7 Apr 2022 12:50:29 +0200 Subject: [PATCH 13/14] cosmetics --- src/icartt/dataset.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/icartt/dataset.py b/src/icartt/dataset.py index 1528158..d49ceaf 100644 --- a/src/icartt/dataset.py +++ b/src/icartt/dataset.py @@ -352,9 +352,9 @@ class Variable: # The first character must be a letter, firstIsAlpha = bool(re.match("[a-zA-Z]", name[0])) # and the name can be at most 31 characters in length. - lessThan31Chars = len(name) <= 31 + le31Chars = len(name) <= 31 - return allAreAlphaOrUnderscore and firstIsAlpha and lessThan31Chars + return allAreAlphaOrUnderscore and firstIsAlpha and le31Chars def __init__( self, @@ -401,6 +401,9 @@ class Variable: self.scale = scale self.miss = miss + def __repr__(self): + return f"[{self.units}], {self.vartype.name}" + def __str__(self): return self.desc() @@ -448,7 +451,7 @@ class Dataset: if self.defineMode: return np.datetime64("NaT") - # for 1001, its an array, for 2110 a dict + # for 1001 it's an array, for 2110 a dict if not isinstance(self.data.data, (np.ndarray, dict)): return np.datetime64("NaT") @@ -1011,8 +1014,10 @@ class Dataset: if not self.isValidFileName(pathlib.Path(f).name): warnings.warn(f"{pathlib.Path(f).name} is not a valid ICARTT filename") - else: # try to obtain dataID and locationID from file name + else: + # try to obtain dataID and locationID from file name parts = pathlib.Path(f).name.split("_") + # there should be at least 3 parts; data ID, location ID and revision date + file name extension if len(parts) > 2: self.dataID = parts[0] self.locationID = parts[1] -- GitLab From 0cb5da37e7224ffe4663d9cfb1ac4057890680e7 Mon Sep 17 00:00:00 2001 From: Florian Obersteiner Date: Thu, 7 Apr 2022 14:31:22 +0200 Subject: [PATCH 14/14] added test for revision parser --- tests/test_1001.py | 1 - tests/test_bulkIO.py | 7 +++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_1001.py b/tests/test_1001.py index 7a19fcb..738eb6b 100644 --- a/tests/test_1001.py +++ b/tests/test_1001.py @@ -163,7 +163,6 @@ class Simple1001TestCase(unittest.TestCase): ["Use of these data requires PRIOR OK from the PI"], ) self.assertEqual(ict.normalComments.keywords["OTHER_COMMENTS"].data, ["N/A"]) - # TODO test revision information def testReadData(self): ict = icartt.Dataset(self.fn, loadData=True) diff --git a/tests/test_bulkIO.py b/tests/test_bulkIO.py index 9cbc816..3d8ef77 100644 --- a/tests/test_bulkIO.py +++ b/tests/test_bulkIO.py @@ -1,8 +1,8 @@ import unittest import pathlib import io +import re -# import pytest import icartt @@ -44,7 +44,7 @@ fileinfo = { } -# TODO: dataset -> close file pointer after read ?! +# TODO? dataset -> close file pointer after read class BulkIOTestCase(unittest.TestCase): @@ -75,6 +75,9 @@ class BulkIOTestCase(unittest.TestCase): with self.subTest(msg=f"Reading data from test file {str(fn)}"): ict = icartt.Dataset(fn, loadData=True) self.assertEqual(type(ict), icartt.Dataset) + m = re.search("R([a-zA-Z0-9]).ict", fn.name) + if m: + self.assertEqual(m.groups()[0], ict.revision) def testWriteHeader(self): for fn in self.files_ok: -- GitLab