From 82b65e53945bd85c69dc593c2bed8078f5920ad8 Mon Sep 17 00:00:00 2001
From: Florian Obersteiner <florian.obersteiner@kit.edu>
Date: Thu, 7 Apr 2022 09:40:31 +0200
Subject: [PATCH 01/14] todo  updated

---
 src/icartt/dataset.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/icartt/dataset.py b/src/icartt/dataset.py
index 6d3c7ce..862adda 100644
--- a/src/icartt/dataset.py
+++ b/src/icartt/dataset.py
@@ -981,7 +981,7 @@ class Dataset:
         """
 
         self.format = format
-        self.version = None
+        self.version = None # TODO: should this be 2.0 by default?
 
         self.dataID = "dataID"
         self.locationID = "locationID"
@@ -1028,6 +1028,7 @@ class Dataset:
 
             if not self.isValidFileName(pathlib.Path(f).name):
                 warnings.warn(f"{pathlib.Path(f).name} is not a valid ICARTT filename")
+            # TODO: else -> split on "_", then the first part should be dataID, second part locationID
 
             self.readHeader(delimiter)
             if loadData:
-- 
GitLab


From 18210b27a446c305881f55e771362d394356c42c Mon Sep 17 00:00:00 2001
From: Florian Obersteiner <florian.obersteiner@kit.edu>
Date: Thu, 7 Apr 2022 09:43:05 +0200
Subject: [PATCH 02/14] readme updated

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index f074f4b..d507a6e 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@ pip install -e .
 
 # Changelog
 
-## 2.0.0 (2022-02-x)
+## 2.0.0 (2022-04-x)
 
 - Compatible with ICARTT v2 standard
 - Formats 1001 and 2110
-- 
GitLab


From 4b49058cb1ec9443503fc5af398a88023c9decc3 Mon Sep 17 00:00:00 2001
From: Florian Obersteiner <florian.obersteiner@kit.edu>
Date: Thu, 7 Apr 2022 09:48:17 +0200
Subject: [PATCH 03/14] readme updated #2

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d507a6e..9ec7098 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ make your changes and then [submit a merge request](https://mbees.med.uni-augsbu
 
 ## Installation of the development version
 
-Clone this repository / or your fork and install as "editable":
+Clone this repository / or your fork, then install e.g. as "editable":
 
 ```
 git clone https://mbees.med.uni-augsburg.de/gitlab/mbees/icartt_pypackage.git or <URL of your fork>
@@ -24,6 +24,8 @@ cd icartt_pypackage
 pip install -e .
 ```
 
+Note: the package is managed with [poetry](https://python-poetry.org/).
+
 # Changelog
 
 ## 2.0.0 (2022-04-x)
-- 
GitLab


From f1def13ade94e0597bf14f1168cd412a5adc2349 Mon Sep 17 00:00:00 2001
From: Florian Obersteiner <florian.obersteiner@kit.edu>
Date: Thu, 7 Apr 2022 09:49:35 +0200
Subject: [PATCH 04/14] version to 2.0.0-rc1

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index c99cea8..11198af 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "icartt"
-version = "1.9.1"
+version = "2.0.0-rc1"
 description = "ICARTT format reader and writer"
 license = "GPL-3.0-or-later"
 authors = ["Christoph Knote <christoph.knote@med.uni-augsburg.de>"]
-- 
GitLab


From dbac459a818190b51f8b611f41326a15560a66cf Mon Sep 17 00:00:00 2001
From: Florian Obersteiner <florian.obersteiner@kit.edu>
Date: Thu, 7 Apr 2022 09:51:32 +0200
Subject: [PATCH 05/14] keyword icarttt added

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 11198af..e84c2ae 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ authors = ["Christoph Knote <christoph.knote@med.uni-augsburg.de>"]
 readme = "README.md"
 homepage = "https://mbees.med.uni-augsburg.de/"
 repository = "https://mbees.med.uni-augsburg.de/gitlab/mbees/icartt_pypackage"
-keywords = [ "atmosphere", "file format", "ames", "nasa" ]
+keywords = [ "atmosphere", "file format", "icartt", "ames", "nasa" ]
 classifiers = [
     "Programming Language :: Python :: 3",
     "Development Status :: 5 - Production/Stable",
-- 
GitLab


From 172fb3b68f6c171968531787617e3477f8073168 Mon Sep 17 00:00:00 2001
From: Florian Obersteiner <florian.obersteiner@kit.edu>
Date: Thu, 7 Apr 2022 10:24:34 +0200
Subject: [PATCH 06/14] added __str__ for normalComments

---
 src/icartt/dataset.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/icartt/dataset.py b/src/icartt/dataset.py
index 862adda..cf625dc 100644
--- a/src/icartt/dataset.py
+++ b/src/icartt/dataset.py
@@ -318,6 +318,11 @@ class StandardNormalComments(collections.UserList):
         self.keywords["REVISION"].naAllowed = False
 
 
+    def __str__(self):
+        s = "\n".join(f"{str(v)}" for _, v in self.keywords.items())
+        return s
+
+
 class Variable:
     """An ICARTT variable description with name, units, scale and missing value."""
 
@@ -1028,6 +1033,11 @@ class Dataset:
 
             if not self.isValidFileName(pathlib.Path(f).name):
                 warnings.warn(f"{pathlib.Path(f).name} is not a valid ICARTT filename")
+            else: # try to obtain dataID and locationID from file name
+                parts = pathlib.Path(f).name.split("_")
+                if len(parts) > 2:
+                    self.dataID = parts[0]
+                    self.dataID = parts[1]
             # TODO: else -> split on "_", then the first part should be dataID, second part locationID
 
             self.readHeader(delimiter)
-- 
GitLab


From b42d23aca3529fe691eef5532e88e9e6fdaa01da Mon Sep 17 00:00:00 2001
From: Florian Obersteiner <florian.obersteiner@kit.edu>
Date: Thu, 7 Apr 2022 10:49:29 +0200
Subject: [PATCH 07/14] added parser for REVISION keyword to Dataset.revision
 property

---
 src/icartt/dataset.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/icartt/dataset.py b/src/icartt/dataset.py
index cf625dc..a193dae 100644
--- a/src/icartt/dataset.py
+++ b/src/icartt/dataset.py
@@ -319,8 +319,7 @@ class StandardNormalComments(collections.UserList):
 
 
     def __str__(self):
-        s = "\n".join(f"{str(v)}" for _, v in self.keywords.items())
-        return s
+        return "\n".join(f"{str(v)}" for v in self.keywords.values())
 
 
 class Variable:
@@ -707,6 +706,10 @@ class Dataset:
         rawNcom = [f.readline(doSplit=False) for _ in range(nncom)]
         self.normalComments.ingest(rawNcom)
 
+        r = self.normalComments.keywords["REVISION"].data
+        r = "0" if not r else r[0].strip("R")
+        self.revision = r
+
         self.nHeaderFile = f.line
 
         if self.nHeader != nHeaderSuggested:
-- 
GitLab


From 4f5c45d69c2963c1c2d1dde5e3970dac919a9d90 Mon Sep 17 00:00:00 2001
From: Florian Obersteiner <florian.obersteiner@kit.edu>
Date: Thu, 7 Apr 2022 10:52:26 +0200
Subject: [PATCH 08/14] comment on nHeader != nHeaderSuggested warning

---
 src/icartt/dataset.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/icartt/dataset.py b/src/icartt/dataset.py
index a193dae..811c500 100644
--- a/src/icartt/dataset.py
+++ b/src/icartt/dataset.py
@@ -712,6 +712,8 @@ class Dataset:
 
         self.nHeaderFile = f.line
 
+        # TODO this warning might be missleading since it assumes all normalComment keywords
+        #      had been defined - which is not guaranteed.
         if self.nHeader != nHeaderSuggested:
             warnings.warn(
                 f"Number of header lines suggested in line 1 ({int(nHeaderSuggested)}) do not match actual header lines read ({int(self.nHeader)})"
-- 
GitLab


From 405c44f7422ea38d5aae669f672a7f169a5a61ff Mon Sep 17 00:00:00 2001
From: Florian Obersteiner <florian.obersteiner@kit.edu>
Date: Thu, 7 Apr 2022 11:14:14 +0200
Subject: [PATCH 09/14] __str__ for Dataset

---
 src/icartt/dataset.py | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/src/icartt/dataset.py b/src/icartt/dataset.py
index 811c500..a20d933 100644
--- a/src/icartt/dataset.py
+++ b/src/icartt/dataset.py
@@ -317,7 +317,6 @@ class StandardNormalComments(collections.UserList):
         self.keywords["UNCERTAINTY"].naAllowed = False
         self.keywords["REVISION"].naAllowed = False
 
-
     def __str__(self):
         return "\n".join(f"{str(v)}" for v in self.keywords.values())
 
@@ -962,12 +961,18 @@ class Dataset:
             if not self.inputFhandle.closed:
                 self.inputFhandle.close()
 
-    def __repr__(self):
-        # TODO: this could be more meaningful
-        return "icartt.Dataset()"
-
     def __str__(self):
-        return f"ICARTT Dataset {self.makeFileName()}"
+        s = [
+            f"ICARTT Dataset {self.makeFileName()}, format index {self.format.value}",
+            f"data ID: {self.dataID}",
+            f"location ID: {self.locationID}",
+            f"PI: {self.PIName}",
+            f"Affiliation: {self.PIAffiliation}",
+            f"Mission: {self.missionName}",
+            f"Collection date, Revision date: {self.dateOfCollection}, {self.dateOfRevision}",
+            f"Variables ({len(self.variables)}):\n{', '.join(x for x in self.variables)}",
+        ]
+        return "\n".join(s)
 
     def __init__(
         self,
@@ -991,7 +996,7 @@ class Dataset:
         """
 
         self.format = format
-        self.version = None # TODO: should this be 2.0 by default?
+        self.version = None  # TODO: should this be 2.0 by default?
 
         self.dataID = "dataID"
         self.locationID = "locationID"
@@ -1038,12 +1043,11 @@ class Dataset:
 
             if not self.isValidFileName(pathlib.Path(f).name):
                 warnings.warn(f"{pathlib.Path(f).name} is not a valid ICARTT filename")
-            else: # try to obtain dataID and locationID from file name
+            else:  # try to obtain dataID and locationID from file name
                 parts = pathlib.Path(f).name.split("_")
                 if len(parts) > 2:
                     self.dataID = parts[0]
-                    self.dataID = parts[1]
-            # TODO: else -> split on "_", then the first part should be dataID, second part locationID
+                    self.locationID = parts[1]
 
             self.readHeader(delimiter)
             if loadData:
-- 
GitLab


From 933855d914a4487eaf4e9c70bc870f2631c38b1c Mon Sep 17 00:00:00 2001
From: Florian Obersteiner <florian.obersteiner@kit.edu>
Date: Thu, 7 Apr 2022 11:38:25 +0200
Subject: [PATCH 10/14] created .py for utility functions

---
 src/icartt/dataset.py  | 12 ++++--------
 src/icartt/ictutils.py |  8 ++++++++
 2 files changed, 12 insertions(+), 8 deletions(-)
 create mode 100644 src/icartt/ictutils.py

diff --git a/src/icartt/dataset.py b/src/icartt/dataset.py
index a20d933..789231d 100644
--- a/src/icartt/dataset.py
+++ b/src/icartt/dataset.py
@@ -8,6 +8,8 @@ from enum import IntEnum
 
 import numpy as np
 
+from . import ictutils as utl
+
 DEFAULT_NUM_FORMAT = "%g"
 """Default number format for output. Provides the `fmt` parameter of :func:`numpy.savetxt` internally."""
 
@@ -340,15 +342,13 @@ class Variable:
             descstr += [str(self.longname)]
         return delimiter.join(descstr)
 
-    def isValidVariablename(self, name):  # TODO: this could be a 'utils' function
+    def isValidVariablename(self, name):
         # ICARTT Standard v2 2.1.1 2)
         # Variable short names and variable standard names:
         # Uppercase and lowercase ASCII alphanumeric characters
         # and underscores.
-        def isAsciiAlphaOrUnderscore(x):  # TODO: this could be a 'utils' function
-            return re.match("[a-zA-Z0-9_]", x)
 
-        allAreAlphaOrUnderscore = all(isAsciiAlphaOrUnderscore(x) for x in name)
+        allAreAlphaOrUnderscore = all(utl.isAsciiAlphaOrUnderscore(x) for x in name)
         # The first character must be a letter,
         firstIsAlpha = bool(re.match("[a-zA-Z]", name[0]))
         # and the name can be at most 31 characters in length.
@@ -401,10 +401,6 @@ class Variable:
         self.scale = scale
         self.miss = miss
 
-    def __repr__(self):
-        # TODO: this sould be something else than __str__ ?
-        return self.desc()
-
     def __str__(self):
         return self.desc()
 
diff --git a/src/icartt/ictutils.py b/src/icartt/ictutils.py
new file mode 100644
index 0000000..afedb84
--- /dev/null
+++ b/src/icartt/ictutils.py
@@ -0,0 +1,8 @@
+# -*- coding: utf-8 -*-
+
+import re
+
+
+def isAsciiAlphaOrUnderscore(x: str, _only="[a-zA-Z0-9_]") -> bool:
+    """check if string x contains only characters from [a-zA-Z0-9_] regex"""
+    return re.match(_only, x)
-- 
GitLab


From 3e55b6198ef44026790a5756abbc634338104809 Mon Sep 17 00:00:00 2001
From: Florian Obersteiner <florian.obersteiner@kit.edu>
Date: Thu, 7 Apr 2022 11:54:00 +0200
Subject: [PATCH 11/14] utility functions / classes done

---
 src/icartt/dataset.py  | 42 +++++++-----------------------------------
 src/icartt/ictutils.py | 31 +++++++++++++++++++++++++++++--
 2 files changed, 36 insertions(+), 37 deletions(-)

diff --git a/src/icartt/dataset.py b/src/icartt/dataset.py
index 789231d..660a2e9 100644
--- a/src/icartt/dataset.py
+++ b/src/icartt/dataset.py
@@ -498,25 +498,10 @@ class Dataset:
         :param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM
         :type delimiter: str, optional
         """
-
-        class FilehandleWithLinecounter:  # TODO: this could be a 'utils' class
-            def __init__(self, f, delimiter):
-                self.f = f
-                self.line = 0
-                self.delimiter = delimiter
-
-            def readline(self, doSplit=True):
-                self.line += 1
-                dmp = self.f.readline().replace("\n", "").replace("\r", "")
-                if doSplit:
-                    dmp = [word.strip(" ") for word in dmp.split(self.delimiter)]
-                return dmp
-
         if self.inputFhandle:
             if self.inputFhandle.closed:
                 self.inputFhandle = open(self.inputFhandle.name, encoding="utf-8")
-
-            f = FilehandleWithLinecounter(self.inputFhandle, delimiter)
+            f = utl.FilehandleWithLinecounter(self.inputFhandle, delimiter)
             self._readHeader(f)
             self.inputFhandle.close()
 
@@ -579,18 +564,9 @@ class Dataset:
         # here that the independent variable should monotonically increase even when
         # crossing over to a second day.
 
-        def extractVardesc(dmp):  # TODO: could be a 'utils' function or one line,
-            shortname = dmp[
-                0
-            ]  # shortname, units, standardname, longname, *_ = dmp + [None] * 3
-            units = dmp[1]
-            standardname = dmp[2] if len(dmp) > 2 else None
-            longname = dmp[3] if len(dmp) > 3 else None
-            return shortname, units, standardname, longname
-
         if self.format == Formats.FFI2110:
             dmp = f.readline()
-            shortname, units, standardname, longname = extractVardesc(dmp)
+            shortname, units, standardname, longname = utl.extractVardesc(dmp)
             self.independentBoundedVariable = Variable(
                 shortname,
                 units,
@@ -600,7 +576,7 @@ class Dataset:
             )
 
         dmp = f.readline()
-        shortname, units, standardname, longname = extractVardesc(dmp)
+        shortname, units, standardname, longname = utl.extractVardesc(dmp)
         self.independentVariable = Variable(
             shortname,
             units,
@@ -633,7 +609,7 @@ class Dataset:
             # the name used for that variable as a column header, i.e., the last header
             # line prior to start of data.).
             dmp = f.readline()
-            shortname, units, standardname, longname = extractVardesc(dmp)
+            shortname, units, standardname, longname = utl.extractVardesc(dmp)
             vshortname = [shortname]
             vunits = [units]
             vstandardname = [standardname]
@@ -641,7 +617,7 @@ class Dataset:
 
             for _ in range(1, nvar):
                 dmp = f.readline()
-                shortname, units, standardname, longname = extractVardesc(dmp)
+                shortname, units, standardname, longname = utl.extractVardesc(dmp)
                 vshortname += [shortname]
                 vunits += [units]
                 vstandardname += [standardname]
@@ -768,7 +744,7 @@ class Dataset:
 
         return fn + ".ict"
 
-    def isValidFileName(self, name):  # TODO: this could be a 'utils' function
+    def isValidFileName(self, name):
         """test whether file name complies with ICARTT standard:
 
         ICARTT standard v2 2.1.1 3)
@@ -781,11 +757,7 @@ class Dataset:
         :return: is file name valid according to ICARTT standard?
         :rtype: bool
         """
-
-        def isAsciiAlpha(x):  # TODO: this could be a 'utils' function
-            return re.match("[a-zA-Z0-9-_.]", x)
-
-        allAsciiAlpha = all(isAsciiAlpha(x) for x in name)
+        allAsciiAlpha = utl.isAsciiAlpha(name)
         lessThan128Characters = len(name) < 128
 
         return allAsciiAlpha and lessThan128Characters and name.endswith(".ict")
diff --git a/src/icartt/ictutils.py b/src/icartt/ictutils.py
index afedb84..0f2066c 100644
--- a/src/icartt/ictutils.py
+++ b/src/icartt/ictutils.py
@@ -3,6 +3,33 @@
 import re
 
 
-def isAsciiAlphaOrUnderscore(x: str, _only="[a-zA-Z0-9_]") -> bool:
+class FilehandleWithLinecounter:
+    """a file handle that counts the number of files that were read"""
+
+    def __init__(self, f, delimiter):
+        self.f = f
+        self.line = 0
+        self.delimiter = delimiter
+
+    def readline(self, doSplit=True):
+        self.line += 1
+        dmp = self.f.readline().replace("\n", "").replace("\r", "")
+        if doSplit:
+            dmp = [word.strip(" ") for word in dmp.split(self.delimiter)]
+        return dmp
+
+
+def isAsciiAlphaOrUnderscore(x: str) -> bool:
     """check if string x contains only characters from [a-zA-Z0-9_] regex"""
-    return re.match(_only, x)
+    return re.match("[a-zA-Z0-9_]", x)
+
+
+def isAsciiAlpha(x):
+    """check if string x contains only characters from [a-zA-Z0-9-_.] regex"""
+    return re.match("[a-zA-Z0-9-_.]", x)
+
+
+def extractVardesc(line_parts: list) -> str:
+    """extract variable description from ict header line parts (splitted line)"""
+    shortname, units, standardname, longname, *_ = line_parts + [None] * 3
+    return shortname, units, standardname, longname
-- 
GitLab


From 96da4f77f156deb98d0df7108f8fddf3b221b3cc Mon Sep 17 00:00:00 2001
From: Florian Obersteiner <florian.obersteiner@kit.edu>
Date: Thu, 7 Apr 2022 12:09:25 +0200
Subject: [PATCH 12/14] revised utils / simplified

---
 src/icartt/dataset.py  |  4 ++--
 src/icartt/ictutils.py | 12 ------------
 2 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/src/icartt/dataset.py b/src/icartt/dataset.py
index 660a2e9..1528158 100644
--- a/src/icartt/dataset.py
+++ b/src/icartt/dataset.py
@@ -348,7 +348,7 @@ class Variable:
         # Uppercase and lowercase ASCII alphanumeric characters
         # and underscores.
 
-        allAreAlphaOrUnderscore = all(utl.isAsciiAlphaOrUnderscore(x) for x in name)
+        allAreAlphaOrUnderscore = all(re.match("[a-zA-Z0-9_]", c) for c in name)
         # The first character must be a letter,
         firstIsAlpha = bool(re.match("[a-zA-Z]", name[0]))
         # and the name can be at most 31 characters in length.
@@ -757,7 +757,7 @@ class Dataset:
         :return: is file name valid according to ICARTT standard?
         :rtype: bool
         """
-        allAsciiAlpha = utl.isAsciiAlpha(name)
+        allAsciiAlpha = all(re.match("[a-zA-Z0-9-_.]", c) for c in name)
         lessThan128Characters = len(name) < 128
 
         return allAsciiAlpha and lessThan128Characters and name.endswith(".ict")
diff --git a/src/icartt/ictutils.py b/src/icartt/ictutils.py
index 0f2066c..f7f666c 100644
--- a/src/icartt/ictutils.py
+++ b/src/icartt/ictutils.py
@@ -1,7 +1,5 @@
 # -*- coding: utf-8 -*-
 
-import re
-
 
 class FilehandleWithLinecounter:
     """a file handle that counts the number of files that were read"""
@@ -19,16 +17,6 @@ class FilehandleWithLinecounter:
         return dmp
 
 
-def isAsciiAlphaOrUnderscore(x: str) -> bool:
-    """check if string x contains only characters from [a-zA-Z0-9_] regex"""
-    return re.match("[a-zA-Z0-9_]", x)
-
-
-def isAsciiAlpha(x):
-    """check if string x contains only characters from [a-zA-Z0-9-_.] regex"""
-    return re.match("[a-zA-Z0-9-_.]", x)
-
-
 def extractVardesc(line_parts: list) -> str:
     """extract variable description from ict header line parts (splitted line)"""
     shortname, units, standardname, longname, *_ = line_parts + [None] * 3
-- 
GitLab


From 26c104277d959f7fff284e8a30c6a9176ed5ad2c Mon Sep 17 00:00:00 2001
From: Florian Obersteiner <florian.obersteiner@kit.edu>
Date: Thu, 7 Apr 2022 12:50:29 +0200
Subject: [PATCH 13/14] cosmetics

---
 src/icartt/dataset.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/icartt/dataset.py b/src/icartt/dataset.py
index 1528158..d49ceaf 100644
--- a/src/icartt/dataset.py
+++ b/src/icartt/dataset.py
@@ -352,9 +352,9 @@ class Variable:
         # The first character must be a letter,
         firstIsAlpha = bool(re.match("[a-zA-Z]", name[0]))
         # and the name can be at most 31 characters in length.
-        lessThan31Chars = len(name) <= 31
+        le31Chars = len(name) <= 31
 
-        return allAreAlphaOrUnderscore and firstIsAlpha and lessThan31Chars
+        return allAreAlphaOrUnderscore and firstIsAlpha and le31Chars
 
     def __init__(
         self,
@@ -401,6 +401,9 @@ class Variable:
         self.scale = scale
         self.miss = miss
 
+    def __repr__(self):
+        return f"[{self.units}], {self.vartype.name}"
+
     def __str__(self):
         return self.desc()
 
@@ -448,7 +451,7 @@ class Dataset:
         if self.defineMode:
             return np.datetime64("NaT")
 
-        # for 1001, its an array, for 2110 a dict
+        # for 1001 it's an array, for 2110 a dict
         if not isinstance(self.data.data, (np.ndarray, dict)):
             return np.datetime64("NaT")
 
@@ -1011,8 +1014,10 @@ class Dataset:
 
             if not self.isValidFileName(pathlib.Path(f).name):
                 warnings.warn(f"{pathlib.Path(f).name} is not a valid ICARTT filename")
-            else:  # try to obtain dataID and locationID from file name
+            else:
+                # try to obtain dataID and locationID from file name
                 parts = pathlib.Path(f).name.split("_")
+                # there should be at least 3 parts; data ID, location ID and revision date + file name extension
                 if len(parts) > 2:
                     self.dataID = parts[0]
                     self.locationID = parts[1]
-- 
GitLab


From 0cb5da37e7224ffe4663d9cfb1ac4057890680e7 Mon Sep 17 00:00:00 2001
From: Florian Obersteiner <florian.obersteiner@kit.edu>
Date: Thu, 7 Apr 2022 14:31:22 +0200
Subject: [PATCH 14/14] added test for revision parser

---
 tests/test_1001.py   | 1 -
 tests/test_bulkIO.py | 7 +++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/test_1001.py b/tests/test_1001.py
index 7a19fcb..738eb6b 100644
--- a/tests/test_1001.py
+++ b/tests/test_1001.py
@@ -163,7 +163,6 @@ class Simple1001TestCase(unittest.TestCase):
             ["Use of these data requires PRIOR OK from the PI"],
         )
         self.assertEqual(ict.normalComments.keywords["OTHER_COMMENTS"].data, ["N/A"])
-        # TODO test revision information
 
     def testReadData(self):
         ict = icartt.Dataset(self.fn, loadData=True)
diff --git a/tests/test_bulkIO.py b/tests/test_bulkIO.py
index 9cbc816..3d8ef77 100644
--- a/tests/test_bulkIO.py
+++ b/tests/test_bulkIO.py
@@ -1,8 +1,8 @@
 import unittest
 import pathlib
 import io
+import re
 
-# import pytest
 
 import icartt
 
@@ -44,7 +44,7 @@ fileinfo = {
 }
 
 
-# TODO: dataset -> close file pointer after read ?!
+# TODO? dataset -> close file pointer after read
 
 
 class BulkIOTestCase(unittest.TestCase):
@@ -75,6 +75,9 @@ class BulkIOTestCase(unittest.TestCase):
             with self.subTest(msg=f"Reading data from test file {str(fn)}"):
                 ict = icartt.Dataset(fn, loadData=True)
                 self.assertEqual(type(ict), icartt.Dataset)
+                m = re.search("R([a-zA-Z0-9]).ict", fn.name)
+                if m:
+                    self.assertEqual(m.groups()[0], ict.revision)
 
     def testWriteHeader(self):
         for fn in self.files_ok:
-- 
GitLab