From f18c55a2abc426b3040c43285f82ead13da2550a Mon Sep 17 00:00:00 2001
From: Christoph Knote <christoph.knote@med.uni-augsburg.de>
Date: Tue, 15 Feb 2022 13:02:21 +0100
Subject: [PATCH] All tests pass

---
 icartt/dataset.py                          | 103 ++++++++++++++-------
 tests/examples/PAVE-AR_DC8_20050203_R0.ict |   4 +-
 tests/test_dataset.py                      |  19 ++--
 3 files changed, 82 insertions(+), 44 deletions(-)

diff --git a/icartt/dataset.py b/icartt/dataset.py
index 98cf5bb..6cd8cb4 100644
--- a/icartt/dataset.py
+++ b/icartt/dataset.py
@@ -33,6 +33,11 @@ class KeywordComment():
 
 
 class StandardNormalComments(collections.UserList):
+    @property
+    def nlines(self):
+        # "+ 1" -> shortnames line, and keywords might be multiline...
+        return len(self.freeform) + 1 + sum([ len(k.data) for k in self.keywords.values() ])
+
     @property
     def data(self):
         return self.freeform + [ str(s) for s in self.keywords.values()  ] + [ self.shortnames ]
@@ -50,7 +55,7 @@ class StandardNormalComments(collections.UserList):
 
         current_keyword = None
         for l in raw:
-            possible_keyword = l.split(":")[0]
+            possible_keyword = l.split(":")[0].strip()
 #            import pdb; pdb.set_trace()
             if possible_keyword in self.keywords or re.match("R[a-zA-Z0-9]{1,2}[ ]*", possible_keyword):
                 current_keyword = possible_keyword
@@ -60,7 +65,11 @@ class StandardNormalComments(collections.UserList):
             if current_keyword is None:
                 self.freeform.append(l)
             else:
-                self.keywords[current_keyword].append( l.replace(current_keyword + ":", "").strip() )
+                self.keywords[current_keyword].append( l.replace(l.split(":")[0] + ":", "").strip() )
+        
+        for key in self.keywords:
+            if self.keywords[key].data == []:
+                warnings.warn("Normal comments: required keyword {:s} is missing.".format(key))
 
     def __init__(self, contents=[]):
         self.freeform   = []
@@ -97,12 +106,15 @@ class StandardNormalComments(collections.UserList):
 class Variable(collections.UserList):
     '''An ICARTT variable description with name, units, scale and missing value.
 
-    :param name: Name of the variable
-    :type name: str
+    :param shortname: Short name of the variable
+    :type shortname: str
 
     :param units: Units of the variable
     :type units: str
 
+    :param standardname: Standard name of the variable
+    :type standardname: str
+
     :param longname: Long name of the variable
     :type longname: str
 
@@ -125,7 +137,12 @@ class Variable(collections.UserList):
         :return: description string
         :rtype: str
         '''
-        return splitChar.join([str(self.name), str(self.units), str(self.longname)])
+        descstr = [ str(self.shortname), str(self.units) ]
+        if not self.standardname is None:
+            descstr += [ str(self.standardname) ]
+        if not self.longname is None:
+            descstr += [ str(self.longname) ]
+        return splitChar.join(descstr)
 
     def append(self, *argv):
         '''Append data to a variable. Depending on type (independent, dependent variable),
@@ -172,20 +189,21 @@ class Variable(collections.UserList):
 
         return (all_are_alpha_or_underscore and first_is_alpha and less_than_31_chars)
 
-    def __init__(self, name, units, longname, vartype=VariableType.DVAR, scale=1.0, miss=-99999.0, splitChar=","):
+    def __init__(self, shortname, units, standardname, longname, vartype=VariableType.DVAR, scale=1.0, miss=-99999.0, splitChar=","):
         '''Constructor method
         '''
-        if not self.is_valid_variablename(name):
-            warnings.warn("Variable name {:s} does not comply with ICARTT standard v2".format(name))
+        if not self.is_valid_variablename(shortname):
+            warnings.warn("Variable short name {:s} does not comply with ICARTT standard v2".format(shortname))
 
-        self.name = name
-        self.units = units
-        self.longname = longname
-        self.vartype = vartype
-        self.scale = scale
-        self.miss = miss
+        self.shortname      = shortname
+        self.units          = units
+        self.standardname   = standardname
+        self.longname       = longname
+        self.vartype        = vartype
+        self.scale          = scale
+        self.miss           = miss
 
-        self.splitChar = splitChar
+        self.splitChar      = splitChar
 
         self.data = []
 
@@ -214,11 +232,11 @@ class Dataset:
         '''
         total = -1
         if self.format == Formats.FFI_1001:
-            total = 14 + len(self.DVARS) + len(self.SCOM) + len(self.NCOM)
+            total = 14 + len(self.DVARS) + len(self.SCOM) + self.NCOM.nlines
         if self.format == Formats.FFI_2110:
             # 2: IVAR + IBVAR
-            total = 16 + 2 + len(self.AUXVARS) + \
-                len(self.DVARS) + len(self.SCOM) + len(self.NCOM)
+            total = 16 + 2 + len(self.AUXVARS) + len(self.DVARS) +\
+                len(self.SCOM) + self.NCOM.nlines
         return total
 
     @property
@@ -322,7 +340,7 @@ class Dataset:
         # Special comments (Notes of problems or special circumstances unique to this file. An example would be comments/problems associated with a particular flight.).
         nul = [prnt(x) for x in self.SCOM]
         # Number of Normal comments (i.e., number of additional lines of SUPPORTING information: Integer value indicating the number of lines of additional information, NOT including this line.).
-        prnt("{:d}".format(len(self.NCOM)))
+        prnt("{:d}".format(self.NCOM.nlines))
         # Normal comments (SUPPORTING information: This is the place for investigators to more completely describe the data and measurement parameters. The supporting information structure is described below as a list of key word: value pairs. Specifically include here information on the platform used, the geo-location of data, measurement technique, and data revision comments. Note the non-optional information regarding uncertainty, the upper limit of detection (ULOD) and the lower limit of detection (LLOD) for each measured variable. The ULOD and LLOD are the values, in the same units as the measurements that correspond to the flags -7777s and -8888s within the data, respectively. The last line of this section should contain all the short variable names on one line. The key words in this section are written in BOLD below and must appear in this section of the header along with the relevant data listed after the colon. For key words where information is not needed or applicable, simply enter N/A.).
         nul = [prnt(x) for x in self.NCOM]
 
@@ -491,14 +509,24 @@ class Dataset:
         # from the start of the day on which measurements began. It should be noted
         # here that the independent variable should monotonically increase even when
         # crossing over to a second day.
+
+        def extract_vardesc(dmp):
+            shortname       = dmp[0]
+            units           = dmp[1]
+            standardname    = dmp[2] if len(dmp) > 2 else None
+            longname        = dmp[3] if len(dmp) > 3 else None
+            return shortname, units, standardname, longname
+
         if self.format == Formats.FFI_2110:
             dmp = f.readline()
-            self.IBVAR = Variable(dmp[0], dmp[1], dmp[2 if len(
-                dmp) > 2 else 1], splitChar=self.splitChar)
+            shortname, units, standardname, longname = extract_vardesc(dmp)
+            self.IBVAR = Variable(shortname, units, standardname, longname, 
+                                splitChar=self.splitChar)
 
         dmp = f.readline()
-        self.IVAR = Variable(dmp[0], dmp[1], dmp[2 if len(
-            dmp) > 2 else 1], splitChar=self.splitChar)
+        shortname, units, standardname, longname = extract_vardesc(dmp)
+        self.IVAR = Variable(shortname, units, standardname, longname, 
+                            splitChar=self.splitChar)
 
         def read_vars(f):
             # line 10 - Number of variables (Integer value showing the number of
@@ -523,18 +551,22 @@ class Dataset:
             # entered on one line. The short variable name must correspond exactly to
             # the name used for that variable as a column header, i.e., the last header
             # line prior to start of data.).
-            dmp = f.readline()
-            vname = [dmp[0]]
-            vunits = [dmp[1]]
-            vlongname = [dmp[2 if len(dmp) > 2 else 1]]
+            dmp             = f.readline()
+            shortname, units, standardname, longname = extract_vardesc(dmp)
+            vshortname      = [ shortname ]
+            vunits          = [ units ]
+            vstandardname   = [ standardname ]
+            vlongname       = [ longname ]
 
             for i in range(1, nvar):
                 dmp = f.readline()
-                vname += [dmp[0]]
-                vunits += [dmp[1]]
-                vlongname += [dmp[2 if len(dmp) > 2 else 1]]
+                shortname, units, standardname, longname = extract_vardesc(dmp)
+                vshortname      += [ shortname ]
+                vunits          += [ units ]
+                vstandardname   += [ standardname ]
+                vlongname       += [ longname ]
 
-            return {name: Variable(name, unit, longname, scale=scale, miss=miss, splitChar=self.splitChar) for name, unit, longname, scale, miss in zip(vname, vunits, vlongname, vscale, vmiss)}
+            return {shortname: Variable(shortname, unit, standardname, longname, scale=scale, miss=miss, splitChar=self.splitChar) for shortname, unit, standardname, longname, scale, miss in zip(vshortname, vunits, vstandardname, vlongname, vscale, vmiss)}
 
         self.DVARS = read_vars(f)
 
@@ -658,7 +690,8 @@ class Dataset:
         self.dataInterval = [ 0.0 ]
         self.IVAR = Variable('Time_Start',
                              'seconds_from_0_hours_on_valid_date',
-                             'seconds_from_0_hours_on_valid_date',
+                             'Time_Start',
+                             'Time_Start',
                              vartype=VariableType.IVAR,
                              scale=1.0, miss=-9999999, splitChar=splitChar)
         self.IBVAR = None
@@ -667,12 +700,14 @@ class Dataset:
             'Time_Stop':
             Variable('Time_Stop',
                      'seconds_from_0_hours_on_valid_date',
-                     'seconds_from_0_hours_on_valid_date',
+                     'Time_Stop',
+                     'Time_Stop',
                      scale=1.0, miss=-9999999, splitChar=splitChar),
             'Some_Variable':
             Variable('Some_Variable',
                      'ppbv',
-                     'ppbv',
+                     'Some_Variable',
+                     'Some_Variable',
                      scale=1.0, miss=-9999999, splitChar=splitChar)
         }
 
diff --git a/tests/examples/PAVE-AR_DC8_20050203_R0.ict b/tests/examples/PAVE-AR_DC8_20050203_R0.ict
index 61f9fb3..7e7ab06 100644
--- a/tests/examples/PAVE-AR_DC8_20050203_R0.ict
+++ b/tests/examples/PAVE-AR_DC8_20050203_R0.ict
@@ -23,8 +23,8 @@ Log10_O3NumDensity_Err[], part/cc, Log10_O3NumDensity_Error, Log10_NumDensity_er
 -9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999 
 NumAlts, #, Number_of_altitudes, Number_of_altitudes_reported
 Year, yyyy, Year_UTC, Year_UTC
-Month, mm, UTC, Month_UTC, Month_UTC
-Day, dd, UTC, Day_UTC, Day_UTC
+Month, mm, Month_UTC, Month_UTC
+Day, dd, Day_UTC, Day_UTC
 AvgTime, minutes, Averaging_time, Averaging_time_of_presented_data xxx.x_minutes 
 Lat, degrees, Latitude
 Lon, degrees, Longitude
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
index 7b5078c..ff79780 100644
--- a/tests/test_dataset.py
+++ b/tests/test_dataset.py
@@ -1,7 +1,6 @@
 import unittest
 import os
 import io
-import sys
 import icartt
 
 def compare_files(fn, str_in, str_out, skiplines=0, nlines=-1): # pragma: no cover
@@ -82,23 +81,27 @@ class Simple1001TestCase(unittest.TestCase):
     
     def test_ivar(self):
         ict      = icartt.Dataset(self.fn, loadData=False)
-        self.assertEqual( ict.IVAR.name,     "Start_UTC" )
-        self.assertEqual( ict.IVAR.units,    "seconds" )
-        self.assertEqual( ict.IVAR.longname, "number_of_seconds_from_0000_UTC" )
-        self.assertEqual( ict.IVAR.scale,    1.0 )
-        self.assertEqual( ict.IVAR.miss,     -99999.0 )
+        self.assertEqual( ict.IVAR.shortname,       "Start_UTC" )
+        self.assertEqual( ict.IVAR.units,           "seconds" )
+        self.assertEqual( ict.IVAR.standardname,    "number_of_seconds_from_0000_UTC" )
+        self.assertEqual( ict.IVAR.longname,        None )
+        self.assertEqual( ict.IVAR.scale,           1.0 )
+        self.assertEqual( ict.IVAR.miss,            -99999.0 )
 
     def test_dvar(self):
         ict      = icartt.Dataset(self.fn, loadData=False)
         
-        self.assertEqual( [ DVAR.name for DVAR in ict.DVARS.values() ], 
+        self.assertEqual( [ DVAR.shortname for DVAR in ict.DVARS.values() ], 
                           [ "Stop_UTC", "Mid_UTC", "DLat", "DLon", "Elev", "NO_ppbv", "NO_1sig", "NO2_ppbv", "NO2_1sig" ] )
 
         self.assertEqual( [ DVAR.units for DVAR in ict.DVARS.values() ], 
                           [ "seconds", "seconds", "deg_N", "deg_E", "meters", "ppbv", "ppbv", "ppbv", "ppbv" ] )
 
+        self.assertEqual( [ DVAR.standardname for DVAR in ict.DVARS.values() ], 
+                          [ None, None, None, None, None, None, None, None, None])
+
         self.assertEqual( [ DVAR.longname for DVAR in ict.DVARS.values() ], 
-                          [ "seconds", "seconds", "deg_N", "deg_E", "meters", "ppbv", "ppbv", "ppbv", "ppbv" ] )
+                          [ None, None, None, None, None, None, None, None, None])
 
         self.assertEqual( [ DVAR.scale for DVAR in ict.DVARS.values() ], 
                           [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ] )
-- 
GitLab