diff --git a/icartt/dataset.py b/icartt/dataset.py index 98cf5bb18289e5dae19ffb70a5ad1793a8d8844e..6cd8cb45983d66e4a0aec1467cd0dcd341d9173a 100644 --- a/icartt/dataset.py +++ b/icartt/dataset.py @@ -33,6 +33,11 @@ class KeywordComment(): class StandardNormalComments(collections.UserList): + @property + def nlines(self): + # "+ 1" -> shortnames line, and keywords might be multiline... + return len(self.freeform) + 1 + sum([ len(k.data) for k in self.keywords.values() ]) + @property def data(self): return self.freeform + [ str(s) for s in self.keywords.values() ] + [ self.shortnames ] @@ -50,7 +55,7 @@ class StandardNormalComments(collections.UserList): current_keyword = None for l in raw: - possible_keyword = l.split(":")[0] + possible_keyword = l.split(":")[0].strip() # import pdb; pdb.set_trace() if possible_keyword in self.keywords or re.match("R[a-zA-Z0-9]{1,2}[ ]*", possible_keyword): current_keyword = possible_keyword @@ -60,7 +65,11 @@ class StandardNormalComments(collections.UserList): if current_keyword is None: self.freeform.append(l) else: - self.keywords[current_keyword].append( l.replace(current_keyword + ":", "").strip() ) + self.keywords[current_keyword].append( l.replace(l.split(":")[0] + ":", "").strip() ) + + for key in self.keywords: + if self.keywords[key].data == []: + warnings.warn("Normal comments: required keyword {:s} is missing.".format(key)) def __init__(self, contents=[]): self.freeform = [] @@ -97,12 +106,15 @@ class StandardNormalComments(collections.UserList): class Variable(collections.UserList): '''An ICARTT variable description with name, units, scale and missing value. - :param name: Name of the variable - :type name: str + :param shortname: Short name of the variable + :type shortname: str :param units: Units of the variable :type units: str + :param standardname: Standard name of the variable + :type standardname: str + :param longname: Long name of the variable :type longname: str @@ -125,7 +137,12 @@ class Variable(collections.UserList): :return: description string :rtype: str ''' - return splitChar.join([str(self.name), str(self.units), str(self.longname)]) + descstr = [ str(self.shortname), str(self.units) ] + if not self.standardname is None: + descstr += [ str(self.standardname) ] + if not self.longname is None: + descstr += [ str(self.longname) ] + return splitChar.join(descstr) def append(self, *argv): '''Append data to a variable. Depending on type (independent, dependent variable), @@ -172,20 +189,21 @@ class Variable(collections.UserList): return (all_are_alpha_or_underscore and first_is_alpha and less_than_31_chars) - def __init__(self, name, units, longname, vartype=VariableType.DVAR, scale=1.0, miss=-99999.0, splitChar=","): + def __init__(self, shortname, units, standardname, longname, vartype=VariableType.DVAR, scale=1.0, miss=-99999.0, splitChar=","): '''Constructor method ''' - if not self.is_valid_variablename(name): - warnings.warn("Variable name {:s} does not comply with ICARTT standard v2".format(name)) + if not self.is_valid_variablename(shortname): + warnings.warn("Variable short name {:s} does not comply with ICARTT standard v2".format(shortname)) - self.name = name - self.units = units - self.longname = longname - self.vartype = vartype - self.scale = scale - self.miss = miss + self.shortname = shortname + self.units = units + self.standardname = standardname + self.longname = longname + self.vartype = vartype + self.scale = scale + self.miss = miss - self.splitChar = splitChar + self.splitChar = splitChar self.data = [] @@ -214,11 +232,11 @@ class Dataset: ''' total = -1 if self.format == Formats.FFI_1001: - total = 14 + len(self.DVARS) + len(self.SCOM) + len(self.NCOM) + total = 14 + len(self.DVARS) + len(self.SCOM) + self.NCOM.nlines if self.format == Formats.FFI_2110: # 2: IVAR + IBVAR - total = 16 + 2 + len(self.AUXVARS) + \ - len(self.DVARS) + len(self.SCOM) + len(self.NCOM) + total = 16 + 2 + len(self.AUXVARS) + len(self.DVARS) +\ + len(self.SCOM) + self.NCOM.nlines return total @property @@ -322,7 +340,7 @@ class Dataset: # Special comments (Notes of problems or special circumstances unique to this file. An example would be comments/problems associated with a particular flight.). nul = [prnt(x) for x in self.SCOM] # Number of Normal comments (i.e., number of additional lines of SUPPORTING information: Integer value indicating the number of lines of additional information, NOT including this line.). - prnt("{:d}".format(len(self.NCOM))) + prnt("{:d}".format(self.NCOM.nlines)) # Normal comments (SUPPORTING information: This is the place for investigators to more completely describe the data and measurement parameters. The supporting information structure is described below as a list of key word: value pairs. Specifically include here information on the platform used, the geo-location of data, measurement technique, and data revision comments. Note the non-optional information regarding uncertainty, the upper limit of detection (ULOD) and the lower limit of detection (LLOD) for each measured variable. The ULOD and LLOD are the values, in the same units as the measurements that correspond to the flags -7777s and -8888s within the data, respectively. The last line of this section should contain all the short variable names on one line. The key words in this section are written in BOLD below and must appear in this section of the header along with the relevant data listed after the colon. For key words where information is not needed or applicable, simply enter N/A.). nul = [prnt(x) for x in self.NCOM] @@ -491,14 +509,24 @@ class Dataset: # from the start of the day on which measurements began. It should be noted # here that the independent variable should monotonically increase even when # crossing over to a second day. + + def extract_vardesc(dmp): + shortname = dmp[0] + units = dmp[1] + standardname = dmp[2] if len(dmp) > 2 else None + longname = dmp[3] if len(dmp) > 3 else None + return shortname, units, standardname, longname + if self.format == Formats.FFI_2110: dmp = f.readline() - self.IBVAR = Variable(dmp[0], dmp[1], dmp[2 if len( - dmp) > 2 else 1], splitChar=self.splitChar) + shortname, units, standardname, longname = extract_vardesc(dmp) + self.IBVAR = Variable(shortname, units, standardname, longname, + splitChar=self.splitChar) dmp = f.readline() - self.IVAR = Variable(dmp[0], dmp[1], dmp[2 if len( - dmp) > 2 else 1], splitChar=self.splitChar) + shortname, units, standardname, longname = extract_vardesc(dmp) + self.IVAR = Variable(shortname, units, standardname, longname, + splitChar=self.splitChar) def read_vars(f): # line 10 - Number of variables (Integer value showing the number of @@ -523,18 +551,22 @@ class Dataset: # entered on one line. The short variable name must correspond exactly to # the name used for that variable as a column header, i.e., the last header # line prior to start of data.). - dmp = f.readline() - vname = [dmp[0]] - vunits = [dmp[1]] - vlongname = [dmp[2 if len(dmp) > 2 else 1]] + dmp = f.readline() + shortname, units, standardname, longname = extract_vardesc(dmp) + vshortname = [ shortname ] + vunits = [ units ] + vstandardname = [ standardname ] + vlongname = [ longname ] for i in range(1, nvar): dmp = f.readline() - vname += [dmp[0]] - vunits += [dmp[1]] - vlongname += [dmp[2 if len(dmp) > 2 else 1]] + shortname, units, standardname, longname = extract_vardesc(dmp) + vshortname += [ shortname ] + vunits += [ units ] + vstandardname += [ standardname ] + vlongname += [ longname ] - return {name: Variable(name, unit, longname, scale=scale, miss=miss, splitChar=self.splitChar) for name, unit, longname, scale, miss in zip(vname, vunits, vlongname, vscale, vmiss)} + return {shortname: Variable(shortname, unit, standardname, longname, scale=scale, miss=miss, splitChar=self.splitChar) for shortname, unit, standardname, longname, scale, miss in zip(vshortname, vunits, vstandardname, vlongname, vscale, vmiss)} self.DVARS = read_vars(f) @@ -658,7 +690,8 @@ class Dataset: self.dataInterval = [ 0.0 ] self.IVAR = Variable('Time_Start', 'seconds_from_0_hours_on_valid_date', - 'seconds_from_0_hours_on_valid_date', + 'Time_Start', + 'Time_Start', vartype=VariableType.IVAR, scale=1.0, miss=-9999999, splitChar=splitChar) self.IBVAR = None @@ -667,12 +700,14 @@ class Dataset: 'Time_Stop': Variable('Time_Stop', 'seconds_from_0_hours_on_valid_date', - 'seconds_from_0_hours_on_valid_date', + 'Time_Stop', + 'Time_Stop', scale=1.0, miss=-9999999, splitChar=splitChar), 'Some_Variable': Variable('Some_Variable', 'ppbv', - 'ppbv', + 'Some_Variable', + 'Some_Variable', scale=1.0, miss=-9999999, splitChar=splitChar) } diff --git a/tests/examples/PAVE-AR_DC8_20050203_R0.ict b/tests/examples/PAVE-AR_DC8_20050203_R0.ict index 61f9fb303dc4c404a088cdb0a6a1a16d996711a9..7e7ab06655b36b75646026f4e679a56e2a4b4fc5 100644 --- a/tests/examples/PAVE-AR_DC8_20050203_R0.ict +++ b/tests/examples/PAVE-AR_DC8_20050203_R0.ict @@ -23,8 +23,8 @@ Log10_O3NumDensity_Err[], part/cc, Log10_O3NumDensity_Error, Log10_NumDensity_er -9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999 NumAlts, #, Number_of_altitudes, Number_of_altitudes_reported Year, yyyy, Year_UTC, Year_UTC -Month, mm, UTC, Month_UTC, Month_UTC -Day, dd, UTC, Day_UTC, Day_UTC +Month, mm, Month_UTC, Month_UTC +Day, dd, Day_UTC, Day_UTC AvgTime, minutes, Averaging_time, Averaging_time_of_presented_data xxx.x_minutes Lat, degrees, Latitude Lon, degrees, Longitude diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 7b5078cdf460ff4c60586a69fa4ef2cf7f261e23..ff7978065a9926c6af395bd1d1fd2ec910764761 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -1,7 +1,6 @@ import unittest import os import io -import sys import icartt def compare_files(fn, str_in, str_out, skiplines=0, nlines=-1): # pragma: no cover @@ -82,23 +81,27 @@ class Simple1001TestCase(unittest.TestCase): def test_ivar(self): ict = icartt.Dataset(self.fn, loadData=False) - self.assertEqual( ict.IVAR.name, "Start_UTC" ) - self.assertEqual( ict.IVAR.units, "seconds" ) - self.assertEqual( ict.IVAR.longname, "number_of_seconds_from_0000_UTC" ) - self.assertEqual( ict.IVAR.scale, 1.0 ) - self.assertEqual( ict.IVAR.miss, -99999.0 ) + self.assertEqual( ict.IVAR.shortname, "Start_UTC" ) + self.assertEqual( ict.IVAR.units, "seconds" ) + self.assertEqual( ict.IVAR.standardname, "number_of_seconds_from_0000_UTC" ) + self.assertEqual( ict.IVAR.longname, None ) + self.assertEqual( ict.IVAR.scale, 1.0 ) + self.assertEqual( ict.IVAR.miss, -99999.0 ) def test_dvar(self): ict = icartt.Dataset(self.fn, loadData=False) - self.assertEqual( [ DVAR.name for DVAR in ict.DVARS.values() ], + self.assertEqual( [ DVAR.shortname for DVAR in ict.DVARS.values() ], [ "Stop_UTC", "Mid_UTC", "DLat", "DLon", "Elev", "NO_ppbv", "NO_1sig", "NO2_ppbv", "NO2_1sig" ] ) self.assertEqual( [ DVAR.units for DVAR in ict.DVARS.values() ], [ "seconds", "seconds", "deg_N", "deg_E", "meters", "ppbv", "ppbv", "ppbv", "ppbv" ] ) + self.assertEqual( [ DVAR.standardname for DVAR in ict.DVARS.values() ], + [ None, None, None, None, None, None, None, None, None]) + self.assertEqual( [ DVAR.longname for DVAR in ict.DVARS.values() ], - [ "seconds", "seconds", "deg_N", "deg_E", "meters", "ppbv", "ppbv", "ppbv", "ppbv" ] ) + [ None, None, None, None, None, None, None, None, None]) self.assertEqual( [ DVAR.scale for DVAR in ict.DVARS.values() ], [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ] )