diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1b4e7134656234c8fb66b39fcc26dc0d6a91f37c --- /dev/null +++ b/Makefile @@ -0,0 +1,3 @@ +.Phony: coverage +coverage: + coverage run --source=tests -m unittest discover && coverage report && coverage html diff --git a/icartt/__init__.py b/icartt/__init__.py index d540827dbc0de2c16a84b69e9e66d91448c19390..244119bc81c46e06d92839103486b54baf4cffcd 100644 --- a/icartt/__init__.py +++ b/icartt/__init__.py @@ -1 +1 @@ -from .dataset import Dataset, SimpleNormalComments, StandardNormalComments \ No newline at end of file +from .dataset import Dataset, StandardNormalComments \ No newline at end of file diff --git a/icartt/dataset.py b/icartt/dataset.py index 5a9271e0d7d8e5880baa98ffa3170ebf5714df8c..0ac8edecd469d979c80de6a99ee528d4bafb26db 100644 --- a/icartt/dataset.py +++ b/icartt/dataset.py @@ -1,20 +1,12 @@ import datetime import sys import collections -import copy +import decimal import math -from functools import total_ordering IMPLEMENTED_FORMATS = [ 1001, 2110 ] -class SimpleNormalComments(collections.UserList): - @property - def data(self): - return self.contents - def __init__(self, contents=[]): - self.contents = contents - -class StandardNormalComments(SimpleNormalComments): +class StandardNormalComments(collections.UserList): @property def data(self): return [ k + ": " + str(v) for k, v in self.keywords.items() ] + self.freeform + [ self.shortnames ] @@ -59,50 +51,50 @@ class StandardNormalComments(SimpleNormalComments): if not contents is []: self.ingest(contents) -class Data_1001(collections.UserList): +class Container_1001(collections.UserList): def write(self, prnt=lambda x: sys.stdout.write(x)): - for line in zip(zip(self.ivar), *self.dvar): - prnt([ line[0][0] ] + [ x[1] for x in line[1:] ]) + def p(val, var): + return var.miss if math.isnan(val) else val + + for i in range(len(self.IVAR)): + prnt( [ p(self.IVAR[i],self.IVAR) ] + [ p(DVAR[i][1],DVAR) for DVAR in self.DVARS.values() ] ) def extract_items(self, raw): for cur in range(len(raw)): - self.ivar.append_value_from_string_ivar(raw[cur][0]) - nul = [ var.append_value_from_string(raw[cur][0], raw[cur][i+1]) for i, var in enumerate(self.dvar) ] - - def __init__(self, raw=[], ivar=None, dvar=None): - self.ivar = ivar - self.dvar = dvar + self.IVAR.append(raw[cur][0]) + nul = [ self.DVARS[key].append(raw[cur][0], raw[cur][i+1]) for i, key in enumerate(self.DVARS) ] + + def __init__(self, raw=[], IVAR=None, IBVAR=None, AUXVARS=None, DVARS=None): + self.IVAR = IVAR + self.IBVAR = IBVAR + self.AUXVARS = AUXVARS + self.DVARS = DVARS # self.extract_items(raw) -class Data_2110(Data_1001): +class Container_2110(Container_1001): def write(self, prnt=lambda x: sys.stdout.write(x)): - for ivar in self.ivar: - prnt( [ ivar ] + [ a[1] for auxvar in self.auxvar for a in auxvar if a[0] == ivar ] ) - for ibvar in [ b[0][1] for b in self.dvar[0] if b[0][0] == ivar ]: - prnt([ ibvar ] + [ d[1] for dvar in self.dvar for d in dvar if (d[0][0] == ivar) and (d[0][1] == ibvar) ]) + def p(val, var): + return var.miss if math.isnan(val) else val + + for ival in self.IVAR: + prnt( [ p(ival, self.IVAR) ] + [ p(auxval[1], AUXVAR) for AUXVAR in self.AUXVARS.values() for auxval in AUXVAR if auxval[0] == ival ] ) + for ibval in [ b[1] for b in self.IBVAR if b[0] == ival ]: + prnt([ p(ibval, self.IBVAR) ] + [ p(dval[1], DVAR) for DVAR in self.DVARS.values() for dval in DVAR if (dval[0][0] == ival) and (dval[0][1] == ibval) ]) def extract_items(self, raw): cur = 0 + num_var_name = list(self.AUXVARS.keys())[0] while cur < len(raw): - self.ivar.append_value_from_string_ivar(raw[cur][0]) - nul = [ var.append_value_from_string_ibvar(raw[cur][0], raw[cur][i+1]) for i, var in enumerate(self.auxvar) ] - nprimary = int(self.auxvar[0][-1][1]) + self.IVAR.append(raw[cur][0]) + nul = [ self.AUXVARS[key].append(raw[cur][0], raw[cur][i+1]) for i, key in enumerate(self.AUXVARS) ] + nprimary = int(self.AUXVARS[num_var_name][-1][1]) for i in range(nprimary): -# import pdb; pdb.set_trace() - self.ibvar.append_value_from_string_ibvar(raw[cur][0], raw[cur+i+1][0]) - nul = [ var.append_value_from_string(raw[cur][0], raw[cur+i+1][0], raw[cur+i+1][j+1]) for j, var in enumerate(self.dvar) ] + self.IBVAR.append(raw[cur][0], raw[cur+i+1][0]) + nul = [ self.DVARS[key].append(raw[cur][0], raw[cur+i+1][0], raw[cur+i+1][j+1]) for j, key in enumerate(self.DVARS) ] cur += 1 + nprimary - - def __init__(self, raw=[], ivar=None, ibvar=None, auxvar=None, dvar=None): - self.ivar = ivar - self.ibvar = ibvar - self.auxvar = auxvar - self.dvar = dvar - # - self.extract_items(raw) -class Variable_1001(collections.UserList): +class Variable(collections.UserList): ''' A Variable is a ICARTT variable description with name, units, scale and missing value. ''' @@ -111,49 +103,53 @@ class Variable_1001(collections.UserList): ''' Return variable description string as it appears in an ICARTT file ''' - toplot = [self.name, self.units ] - if self.units != self.longname: - toplot += [ self.longname ] - return self.splitChar.join( toplot ) - - def __add__(self, other): - # for doing 'data += something' - for item in other: - self.data.append(item) - return self + return self.splitChar.join( [ self.name, self.units, self.longname ] ) - def _sanitize(self, x): - return float(x.strip().replace(self.miss, 'NaN')) - - def append_value_from_string_ivar(self, ivar): - self.data.append(self._sanitize(ivar)) - - def append_value_from_string(self, ivar, dvar): - self.data.append( ( self._sanitize(ivar), self._sanitize(dvar) ) ) + def append(self, *argv): + ''' + Append data to a variable. Depending on type (independent, dependent variable), + all identifying (bounded and unbounded) independent variables need to be given. + + Examples: + + - file type 1001, add value of independent variable: + ivar.append(234.4) + - file type 1001, add value of dependent variable: + ivar.append(234.4, 18.2) + + - file type 2110, add value of independent (unbounded) variable: + ivar.append(234.4) + - file type 2110, add value of independent (bounded) variable: + ivar.append(234.4, 9148.2) + - file type 2110, add value of dependent variable: + ivar.append(234.4, 9148.2, 34.2) + ''' + sanitized = lambda z: float(z) if not float(z) == float(self.miss) else float('NaN') + + v = [ sanitized(y) for y in argv ] + + if len(v) > 2: + # ( (a, b, ...), c ) + x = ( tuple( [ y for y in v[:-1] ] ), v[-1] ) + elif len(v) > 1: + # ( a, b ) + x = ( ( v[0] ), v[1] ) + else: + # ( a ) + x = ( v[0] ) + + self.data.append( x ) - def __init__(self, name, units, longname, values=[], scale=1.0, miss=-99999, splitChar=","): - #: Name + def __init__(self, name, units, longname, scale=1.0, miss=-99999.0, splitChar=","): self.name = name - #: Units self.units = units - #: Long name self.longname = longname - #: Values - self.data = [] - #: Scale factor self.scale = scale - #: Missing value (string, just as it appears in the ICARTT file) - self.miss = str(miss) - #: Split character for description string + self.miss = miss + self.splitChar = splitChar - -class Variable_2110(Variable_1001): - def append_value_from_string_ibvar(self, ivar, ibvar): - self.data.append( ( self._sanitize(ivar) , self._sanitize(ibvar) ) ) - - def append_value_from_string(self, ivar, ibvar, dvar): - self.data.append( ( ( self._sanitize(ivar), self._sanitize(ibvar) ) , self._sanitize(dvar) ) ) - + + self.data = [] class Dataset: ''' @@ -167,98 +163,39 @@ class Dataset: ''' total = -1 if self.format == 1001: - total = 14 + self.ndvar + self.nscom + self.nncom + total = 14 + len(self.DVARS) + len(self.SCOM) + len(self.NCOM) if self.format == 2110: - total = 16 + self.nivar + self.nauxvar + self.ndvar + self.nscom + self.nncom + # 2: IVAR + IBVAR + total = 16 + 2 + len(self.AUXVARS) + len(self.DVARS) + len(self.SCOM) + len(self.NCOM) return total @property - def ndvar(self): - ''' - Dependent variable count - ''' - return len(self.DVAR) - @property - def nivar(self): - ''' - Independent variable count - ''' - return 1 + (0 if self.IBVAR is None else 1) - @property - def nauxvar(self): - ''' - Auxiliary variables count - ''' - return len(self.AUXVAR) - @property - def nvar(self): - ''' - Variable count (independent + dependent + auxiliary) - ''' - return len(self.VAR) - @property - def nscom(self): - ''' - Special comments count - ''' - return len(self.SCOM) - @property - def nncom(self): - ''' - Normal comments count - ''' - return len(self.NCOM) - @property - def VAR(self): + def VARS(self): ''' Variables (independent + dependent + auxiliary) ''' - vars = [ self.IVAR ] + self.DVAR + vars = { self.IVAR.name: self.IVAR, **self.DVARS } if self.format == 2110: - vars = [ self.IBVAR ] + vars + self.AUXVAR + vars = { self.IBVAR.name: self.IBVAR, **vars, **self.AUXVARS } return vars @property def varnames(self): ''' Names of variables (independent and dependent) ''' - return [ x.name for x in self.VAR ] + return [ x.name for x in self.VARS ] @property def times(self): ''' Time steps of the data contained. ''' - return [ self.dateValid + datetime.timedelta(seconds=x) for x in self[self.IVAR.name] ] - + return [ self.dateValid + datetime.timedelta(seconds=x) for x in self.IVAR ] + def __getitem__(self, name): ''' - Convenience function to access variable data by name:: - - ict = icartt.Dataset() - ict['O3'] - ''' - var = [ x for x in self.VAR if x.name == name ] - if not len(var) == 1: - raise Exception("{:s} not found in data".format(name)) - return var[0] - - def units(self, name): + Shortcut to enable access to variable data by name ''' - Units of variable - ''' - res = [ x.units for x in self.VAR if x.name == name ] - if len(res) == 0: - res = [ '' ] - return res[0] - - def index(self, name): - ''' - Index of variable in data array - ''' - res = [ i for i, x in enumerate(self.VAR) if x.name == name ] - if len(res) == 0: - res = [ -1 ] - return res[0] - + return self.VARS[name] + def write(self, f=sys.stdout): ''' Write to file handle @@ -288,29 +225,29 @@ class Dataset: # Description or name of independent variable (This is the name chosen for the start time. It always refers to the number of seconds UTC from the start of the day on which measurements began. It should be noted here that the independent variable should monotonically increase even when crossing over to a second day.). prnt(self.IVAR.desc) # Number of variables (Integer value showing the number of dependent variables: the total number of columns of data is this value plus one.). - prnt(self.ndvar) + prnt(len(self.DVARS)) # Scale factors (1 for most cases, except where grossly inconvenient) - comma delimited. - prnt(self.splitChar.join( [ "{:.1g}".format(x.scale) for x in self.DVAR ])) + prnt(self.splitChar.join( [ "{:.1g}".format(DVAR.scale) for DVAR in self.DVARS.values() ])) # Missing data indicators (This is -9999 (or -99999, etc.) for any missing data condition, except for the main time (independent) variable which is never missing) - comma delimited. - prnt(self.splitChar.join( [ str(x.miss) for x in self.DVAR ])) + prnt(self.splitChar.join( [ str(DVAR.miss) for DVAR in self.DVARS.values() ])) # Variable names and units (Short variable name and units are required, and optional long descriptive name, in that order, and separated by commas. If the variable is unitless, enter the keyword "none" for its units. Each short variable name and units (and optional long name) are entered on one line. The short variable name must correspond exactly to the name used for that variable as a column header, i.e., the last header line prior to start of data.). - nul = [ prnt(x.desc) for x in self.DVAR ] + nul = [ prnt(DVAR.desc) for DVAR in self.DVARS.values() ] if self.format == 2110: # Number of variables (Integer value showing the number of dependent variables: the total number of columns of data is this value plus one.). - prnt(self.nauxvar) + prnt(len(self.AUXVARS)) # Scale factors (1 for most cases, except where grossly inconvenient) - comma delimited. - prnt(self.splitChar.join( [ "{:.1g}".format(x.scale) for x in self.AUXVAR ])) + prnt(self.splitChar.join( [ "{:.1g}".format(AUXVAR.scale) for AUXVAR in self.AUXVARS.values() ])) # Missing data indicators (This is -9999 (or -99999, etc.) for any missing data condition, except for the main time (independent) variable which is never missing) - comma delimited. - prnt(self.splitChar.join( [ str(x.miss) for x in self.AUXVAR ])) + prnt(self.splitChar.join( [ str(AUXVAR.miss) for AUXVAR in self.AUXVARS.values() ])) # Variable names and units (Short variable name and units are required, and optional long descriptive name, in that order, and separated by commas. If the variable is unitless, enter the keyword "none" for its units. Each short variable name and units (and optional long name) are entered on one line. The short variable name must correspond exactly to the name used for that variable as a column header, i.e., the last header line prior to start of data.). - nul = [ prnt(x.desc) for x in self.AUXVAR ] + nul = [ prnt(AUXVAR.desc) for AUXVAR in self.AUXVARS.values() ] # Number of SPECIAL comment lines (Integer value indicating the number of lines of special comments, NOT including this line.). - prnt("{:d}".format(self.nscom)) + prnt("{:d}".format(len(self.SCOM))) # Special comments (Notes of problems or special circumstances unique to this file. An example would be comments/problems associated with a particular flight.). nul = [ prnt(x) for x in self.SCOM ] # Number of Normal comments (i.e., number of additional lines of SUPPORTING information: Integer value indicating the number of lines of additional information, NOT including this line.). - prnt("{:d}".format(self.nncom)) + prnt("{:d}".format(len(self.NCOM))) # Normal comments (SUPPORTING information: This is the place for investigators to more completely describe the data and measurement parameters. The supporting information structure is described below as a list of key word: value pairs. Specifically include here information on the platform used, the geo-location of data, measurement technique, and data revision comments. Note the non-optional information regarding uncertainty, the upper limit of detection (ULOD) and the lower limit of detection (LLOD) for each measured variable. The ULOD and LLOD are the values, in the same units as the measurements that correspond to the flags -7777s and -8888s within the data, respectively. The last line of this section should contain all the short variable names on one line. The key words in this section are written in BOLD below and must appear in this section of the header along with the relevant data listed after the colon. For key words where information is not needed or applicable, simply enter N/A.). nul = [ prnt(x) for x in self.NCOM ] # data! @@ -358,7 +295,8 @@ class Dataset: if len(dmp) > 2: self.version = dmp[2] - assert self.format in IMPLEMENTED_FORMATS, "ICARTT format {:d} not implemented".format(self.format) + if not self.format in IMPLEMENTED_FORMATS: + raise ValueError("ICARTT format {:d} not implemented".format(self.format)) # line 2 - PI last name, first name/initial. self.PI = f.readline(do_split=False) @@ -369,7 +307,7 @@ class Dataset: # line 4 - Data source description (e.g., instrument name, platform name, # model name, etc.). self.dataSource = f.readline(do_split=False) - + # line 5 - Mission name (usually the mission acronym). self.mission = f.readline(do_split=False) @@ -396,8 +334,6 @@ class Dataset: # 2.5 below.). self.dataInterval = float(f.readline()[0]) - Variable = Variable_1001 if self.format == 1001 else Variable_2110 - # line 9 - Description or name of independent variable (This is the name # chosen for the start time. It always refers to the number of seconds UTC # from the start of the day on which measurements began. It should be noted @@ -410,55 +346,20 @@ class Dataset: dmp = f.readline() self.IVAR = Variable(dmp[0], dmp[1], dmp[2 if len(dmp) > 2 else 1], splitChar=self.splitChar) - # line 10 - Number of variables (Integer value showing the number of - # dependent variables: the total number of columns of data is this value - # plus one.). - ndvar = int(f.readline()[0]) - - # line 11- Scale factors (1 for most cases, except where grossly - # inconvenient) - comma delimited. - dvscale = [ float(x) for x in f.readline() ] - - # line 12 - Missing data indicators (This is -9999 (or -99999, etc.) for - # any missing data condition, except for the main time (independent) - # variable which is never missing) - comma delimited. - dvmiss = [ x for x in f.readline() ] - # no float casting here, as we need to do string comparison lateron when reading data... - - # line 13 - Variable names and units (Short variable name and units are - # required, and optional long descriptive name, in that order, and separated - # by commas. If the variable is unitless, enter the keyword "none" for its - # units. Each short variable name and units (and optional long name) are - # entered on one line. The short variable name must correspond exactly to - # the name used for that variable as a column header, i.e., the last header - # line prior to start of data.). - dmp = f.readline() - dvname = [ dmp[0] ] - dvunits = [ dmp[1] ] - dvlongname = [ dmp[2 if len(dmp) > 2 else 1] ] - - for i in range(1, ndvar): - dmp = f.readline() - dvname += [ dmp[0] ] - dvunits += [ dmp[1] ] - dvlongname += [ dmp[2 if len(dmp) > 2 else 1] ] - - self.DVAR = [ Variable(name, unit, longname, scale=scale, miss=miss, splitChar=self.splitChar) for name, unit, longname, scale, miss in zip(dvname, dvunits, dvlongname, dvscale, dvmiss) ] - - if self.format == 2110: + def read_vars(f): # line 10 - Number of variables (Integer value showing the number of # dependent variables: the total number of columns of data is this value # plus one.). - navar = int(f.readline()[0]) + nvar = int(f.readline()[0]) # line 11- Scale factors (1 for most cases, except where grossly # inconvenient) - comma delimited. - avscale = [ float(x) for x in f.readline() ] + vscale = [ float(x) for x in f.readline() ] # line 12 - Missing data indicators (This is -9999 (or -99999, etc.) for # any missing data condition, except for the main time (independent) # variable which is never missing) - comma delimited. - avmiss = [ x for x in f.readline() ] + vmiss = [ float(x) for x in f.readline() ] # no float casting here, as we need to do string comparison lateron when reading data... # line 13 - Variable names and units (Short variable name and units are @@ -469,17 +370,22 @@ class Dataset: # the name used for that variable as a column header, i.e., the last header # line prior to start of data.). dmp = f.readline() - avname = [ dmp[0] ] - avunits = [ dmp[1] ] - avlongname = [ dmp[2 if len(dmp) > 2 else 1] ] - - for i in range(1, navar): - dmp = f.readline() - avname += [ dmp[0] ] - avunits += [ dmp[1] ] - avlongname += [ dmp[2 if len(dmp) > 2 else 1] ] + vname = [ dmp[0] ] + vunits = [ dmp[1] ] + vlongname = [ dmp[2 if len(dmp) > 2 else 1] ] - self.AUXVAR = [ Variable(name, unit, longname, scale=scale, miss=miss, splitChar=self.splitChar) for name, unit, longname, scale, miss in zip(avname, avunits, avlongname, avscale, avmiss) ] + for i in range(1, nvar): + dmp = f.readline() + vname += [ dmp[0] ] + vunits += [ dmp[1] ] + vlongname += [ dmp[2 if len(dmp) > 2 else 1] ] + + return { name: Variable(name, unit, longname, scale=scale, miss=miss, splitChar=self.splitChar) for name, unit, longname, scale, miss in zip(vname, vunits, vlongname, vscale, vmiss) } + + self.DVARS = read_vars(f) + + if self.format == 2110: + self.AUXVARS = read_vars(f) # line 14 + nvar - Number of SPECIAL comment lines (Integer value # indicating the number of lines of special comments, NOT including this @@ -512,11 +418,7 @@ class Dataset: # colon. For key words where information is not needed or applicable, simply # enter N/A.). raw_ncom = [ f.readline(do_split=False) for i in range(0, nncom) ] - try: - self.NCOM = StandardNormalComments(raw_ncom) - except: - warnings.warn("Normal comments do not adhere to ICARTT v2.0 standard.") - self.NCOM = SimpleNormalComments(raw_ncom) + self.NCOM = StandardNormalComments(raw_ncom) self.nheader_file = f.line @@ -533,33 +435,14 @@ class Dataset: nul = [ self.input_fhandle.readline() for i in range(self.nheader_file) ] if self.format == 1001: - self.data = Data_1001([ line.split(self.splitChar) for line in self.input_fhandle ], ivar=self.IVAR, dvar=self.DVAR) + self.data = Container_1001([ line.split(self.splitChar) for line in self.input_fhandle ], IVAR=self.IVAR, DVARS=self.DVARS) elif self.format == 2110: - self.data = Data_2110([ line.split(self.splitChar) for line in self.input_fhandle ], ivar=self.IVAR, ibvar=self.IBVAR, auxvar=self.AUXVAR, dvar=self.DVAR) + self.data = Container_2110([ line.split(self.splitChar) for line in self.input_fhandle ], IVAR=self.IVAR, IBVAR=self.IBVAR, AUXVARS=self.AUXVARS, DVARS=self.DVARS) else: print("Unknown format") self.input_fhandle.close() - def read_first_and_last(self): - ''' - Read first and last ICARTT data line (from file). Useful for quick estimates e.g. of the time extent - of big ICARTT files, without having to read the whole thing, which would be slow. - ''' - if self.input_fhandle.closed: - self.input_fhandle = open(self.input_fhandle.name) - - nul = [ self.input_fhandle.readline() for i in range(self.nheader_file) ] - - first = self.input_fhandle.readline() - self.data = Data_1001([ first.split(self.splitChar) ], ivar=self.IVAR, dvar=self.DVAR) - for line in self.input_fhandle: - pass - last = line - self.data += [ last.split(self.splitChar) ] - - self.input_fhandle.close() - def read(self): ''' Read ICARTT data and header @@ -591,24 +474,26 @@ class Dataset: self.dateValid = datetime.datetime.today() self.dateRevised = datetime.datetime.today() self.dataInterval = 0.0 - self.IVAR = Variable_1001('Time_Start', + self.IVAR = Variable('Time_Start', 'seconds_from_0_hours_on_valid_date', 'seconds_from_0_hours_on_valid_date', scale=1.0, miss=-9999999, splitChar=splitChar) - self.DVAR = [ - Variable_1001('Time_Stop', + self.DVARS = { + 'Time_Stop': + Variable('Time_Stop', 'seconds_from_0_hours_on_valid_date', 'seconds_from_0_hours_on_valid_date', scale=1.0, miss=-9999999, splitChar=splitChar), - Variable_1001('Some_Variable', + 'Some_Variable': + Variable('Some_Variable', 'ppbv', 'ppbv', scale=1.0, miss=-9999999, splitChar=splitChar) - ] + } self.SCOM = [] self.NCOM = [] - self.data = Data_1001([]) + self.data = Container_1001([]) # for 2210 self.IBVAR = None @@ -626,8 +511,7 @@ class Dataset: self.input_fhandle = open(f, 'r') else: self.input_fhandle = f - + self.read_header() if loadData: self.read_data() - diff --git a/setup.py b/setup.py index 08a071ba40a2aa8bafd04794eebedb4f2bc3dd38..188ba424a21e2f5559204b0725feb70c401036a7 100644 --- a/setup.py +++ b/setup.py @@ -8,6 +8,7 @@ def read(filename): setup(name='icartt', description='ICARTT format reader and writer', long_description=read('README.rst') + '\n\n' + read('INSTALL.rst') + '\n\n' + read('CHANGES.rst'), + long_description_content_type='text/markdown', version='1.0.1', url='https://boxmodeling.meteo.physik.uni-muenchen.de', author='Christoph Knote', @@ -30,4 +31,6 @@ setup(name='icartt', ], keywords='', packages=['icartt'], + test_suite = 'tests', + tests_require = [], zip_safe=False) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/examples/AROTAL-RAY_DC8_20040715_R1.ict b/tests/examples/AROTAL-RAY_DC8_20040715_R1.ict new file mode 100644 index 0000000000000000000000000000000000000000..7ed31115c176bc908857389a5bd22994806d4df2 --- /dev/null +++ b/tests/examples/AROTAL-RAY_DC8_20040715_R1.ict @@ -0,0 +1,90 @@ +68, 2110, V02_2016 +MAHONEY, MJ +M/S 169-237; Jet Propulsion Laboratory; Pasadena, CA 91109-8099 +MTP - Microwave Temperature Profiler +SEAC4RS +1, 1 +2013, 08, 01, 2013, 08, 18 +0, 0 +Palt[], meters, Pressure_Altitude, Pressure_Altitude_array +Start_UTC, seconds, Time_Start, elapsed time from 0000 UTC +4 +1.0, 1.0, 1.0, 1E+21 +-99999, -99999, -99999, -99999 +Temperature[], K, Temperature, Temperature_array +Temperture_SE[], K, Temperature_Standard_Error, Temperature_Standard_Error_array +Geometric_altitude[], meters, Geometric_Altitude, Geometric_Altitude_array +Molecular_air_density[], number_per_cubic_meter, Molecular_air_density, Molecular_air_density_array +17 +1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 +-9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999 +NZ, none, number_of_altitudes_in_data_record +Stop_UTC, seconds, Time_Stop, end of scan +Mid_UTC, seconds, Time_Mid, horizon location of scan +Zp, km, Pressure_altitude +Pitch, deg, Aircraft_Pitch +Roll, deg, Aircraft_Roll +OAT, K, Outside_air_temperature +Zt1, km, Tropopause_1_pressure_altitude +Zt2, km, Tropopause_2_pressure_altitude +PTt1, K, Potential_Temperature_tropopause_1 +PTt2, K, Potential_Temperature_tropopause_2 +Latitude, deg, Aircraft_Latitude +Longitude, deg, Aircraft_Longitude +LRac, K/km, Lapse_Rate_at_Flight_Level +MRI, none, Retrieval_quality_metric +Tcp, K, Cold_point_temperature +Zcp, km, Cold_point_pressure_altitude +1 +*** Preliminary Data Preliminary Data Preliminary Data Preliminary Data Preliminary Data *** +19 +Here's a brief free-form tutorial on how to decipher the MTP data: +Data groups consist of the following group of lines per 15-second observing cycle. +First line is: UTstart, number of retrieval levels, UTend, UTmid, Pressure Altitude, Pitch, Roll, Outside air temp (K), tropopause altitude #1 (km), tropopause altitude #2 (km) [if present], potential temperatures of tropopause #1 and #2, latitude, longitude, & lapse rate near flight level. +Remaining set of lines for each cycle consist of 5 columns: col#1 is pressure altitude (meters), col#2 is temperature from MTP (Kelvin), col#3 is temperature error estimate (K), col#4 is geometric altitude (meters), based on GPS altitude (meters), and col#5 is molecular air density [1E+21/m3]. +PI_CONTACT_INFO: M/S 246-102; Jet Propulsion Laboratory; Pasadena, CA 91109- 8099; Michael.J.Mahoney@jpl.nasa.gov +PLATFORM: NASA Global Hawk 872 +LOCATION: see http://espoarchive.nasa.gov/archive/info ASSOCIATED_DATA: See http://espoarchive.nasa.gov/archive/browse/attrex INSTRUMENT_INFO: See http://mtp.jpl.nasa.gov/ +DATA_INFO: See text above +UNCERTAINTY: Contact PI +ULOD_FLAG: -7777 +ULOD_VALUE: N/A +LLOD_FLAG: -8888 +LLOD_VALUE: N/A +DM_CONTACT_INFO: M/S 169-237; Jet Propulsion Laboratory; Pasadena, CA 91109-8099; Michael.J.Mahoney@jpl.nasa.gov +PROJECT_INFO: SEAC4RS mission 01 August - 30 September 2013; DAOF and Ellington Field, Houston, Texas +STIPULATIONS_ON_USE: Use of these data should be done in consultation with the PI OTHER_COMMENTS: +REVISION: R0 +R0: Preliminary Data +Start_UTC, NZ, Stop_UTC, Mid_UTC, Zp, Pitch, Roll, OAT, Zt1, Zt2, PTt1, PTt2, Latitude, Longitude, LRac, MRI, Tcp, Zcp, Palt[], Temperature[], Temperture_SE[], Geometric_altitude[], Molecular_air_density[] +77381, 0, 77394, 77386, 6.358, 15.8, -0.2, 261.9, -9999, -9999, -9999, -9999, 34.779, -118.165, -9999, 0.25, 200.66, 15.36 +77394, 0, 77407, 77399, 6.618, 11.9, -0.6, 259.7, -9999, -9999, -9999, -9999, 34.792, -118.171, -9999, 0.17, 202.05, 15.62 +77407, 13, 77420, 77412, 6.790, 6.1, 0.2, 258.2, 15.2, 99.9, 373.2, 999.9, 34.805, -118.177, -6.7, 0.21, 201.61, 15.79 +13690, 208.0, 1.4, 14295, 5150 +10790, 229.1, 1.0, 11364, 7387 +9290, 241.3, 0.8, 9775, 8827 +8390, 248.4, 0.7, 8816, 9799 +7790, 252.3, 0.5, 8176, 10526 +7290, 255.6, 0.4, 7643, 11160 +6940, 257.9, 0.2, 7270, 11623 +6640, 259.7, 0.3, 6950, 12035 +6290, 262.3, 0.5, 6577, 12512 +5790, 265.6, 0.8, 6045, 13239 +5211, 269.0, 1.0, 5429, 14142 +4343, 274.6, 1.0, 4506, 15559 +1834, 288.6, 9.1, 1839, 20460 +77621, 14, 77634, 77626, 10.279, 6.1, -0.1, 230.2, 15.0, 99.9, 386.7, 999.9, 35.058, -118.258, -5.1, 0.32, 210.34, 15.48 +17179, 211.0, 1.3, 17650, 2929 +14279, 213.0, 1.4, 14814, 4582 +12779, 218.4, 1.1, 13314, 5662 +11879, 221.8, 0.8, 12397, 6425 +11279, 225.0, 0.5, 11776, 6963 +10779, 228.1, 0.3, 11252, 7433 +10429, 229.9, 0.2, 10885, 7789 +10129, 231.3, 0.2, 10571, 8109 +9779, 233.2, 0.3, 10206, 8485 +9279, 236.2, 0.4, 9686, 9034 +8679, 240.0, 0.6, 9062, 9720 +7779, 246.8, 0.8, 8125, 10777 +6279, 259.0, 0.9, 6554, 12691 +3779, 279.4, 1.2, 3910, 16472 diff --git a/tests/examples/AR_DC8_20050203_R0.ict b/tests/examples/AR_DC8_20050203_R0.ict new file mode 100644 index 0000000000000000000000000000000000000000..c77093949879e377273cab003076b97fde785a98 --- /dev/null +++ b/tests/examples/AR_DC8_20050203_R0.ict @@ -0,0 +1,73 @@ +54, 2110 +PI LastName, First Name +Code 916, Goddard Space Flight Center, Greenbelt, MD 20771 +AROTAL +PAVE Mission +1, 1 +2005, 02, 03, 2006, 01, 18 +1 +Altitude[], meters, Altitude_array +UTC, XX.XXXX_hours_from_0_hours_on_flight_date +7 +0.1, 0.0001, 0.1, 0.01, 0.0001, 0.1, 0.0001 +-9999, -999999, -999999, -999999, -999999, -99999, -999999 +TempK[], K, Temperature_array +Log10_NumDensity[], part/cc, Log10_NumDensity_array +TempK_Err[], K, Temperature_error_array +AerKlet[], Klet, Aerosol_array +Log10_O3NumDensity[], part/cc, Log10_Ozone_NumDensity_array +O3_MR[], ppb, Ozone_mixing_ratio_array +Log10_O3NumDensity_Err[], part/cc, Log10_NumDensity_error_array +11 +1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 +-9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999 +NumAlts, none, Number_of_altitudes_reported +Year, UT +Month, UT +Day, UT +AvgTime, xxx.x_minutes, Averaging_time_of_presented_data +Latitude, degrees +Longitude, degrees +PAlt, meters, pressure_altitude +GPSAlt, meters, GPS_altitude +SAT, K, Static_air_temperature +SZA, degrees +0 +18 +PI_CONTACT_INFO: Enter PI Address here +PLATFORM: NASA DC8 +LOCATION: Lat, Lon, and Alt included in the data records +ASSOCIATED_DATA: N/A +INSTRUMENT_INFO:N/A +DATA_INFO:N/A +UNCERTAINTY: Contact PI +ULOD_FLAG: -7777 +ULOD_VALUE: N/A +LLOD_FLAG: -8888 +LLOD_VALUE: N/A +DM_CONTACT_INFO: Enter Data Manager Info here +PROJECT_INFO: PAVE MISSION: Jan-Feb 2005 +STIPULATIONS_ON_USE: Use of these data should be done in consultation with the PI +OTHER_COMMENTS: N/A +REVISION: R0; +R0: Version 2005-0: AROTAL T & O3 Rayleigh Retrievals. Further revisions may be needed to fine-tune aerosol characterization. +UTC, NumAlts, Year, Month, Day, AvgTime, Latitude, Longitude, PAlt, GpsAlt, SAT, SZA, Altitude[], TempK[], Log10_NumDensity[], TempK_Err[], AerKlet[], Log10_O3NumDensity[], O3_MR[], Log10_O3NumDensity_Err[] +54000, 9, 2005, 2, 3, 0, 42.308, -70.582, 6910, 6979, 242.5, 65.5 + 9154, -9999, -999999, -9999, -9999, 113178, 212, -999999 + 9304, -9999, -999999, -9999, -9999, 123353, 2250, -999999 + 9454, -9999, -999999, -9999, -9999, 123008, 2116, -999999 + 9604, -9999, -999999, -9999, -9999, 120933, 1337, -999999 + 9754, -9999, -999999, -9999, -9999, 119675, 1019, -999999 + 9904, -9999, -999999, -9999, -9999, 122655, 2061, -999999 + 10054, -9999, -999999, -9999, -9999, 124384, 3126, -999999 + 10204, -9999, -999999, -9999, -9999, 124632, 3371, -999999 + 10354, -9999, -999999, -9999, -9999, 121341, 1609, -999999 +54001, 8, 2005, 02, 03, 0, 42.278, -70.613, 6978, 7043, 241.7, 65.5 + 10118, 9999, -999999, -9999, -9999, 124458, 3205, -999999 + 10268, -9999, -999999, -9999, -9999, 123160, 2421, -999999 + 10418, -9999, -999999, -9999, -9999, 121221, 1582, -999999 + 10568, -9999, -999999, -9999, -9999, 120950, 1523, -999999 + 10718, -9999, -999999, -9999, -9999, 117339, 680, -999999 + 10868, -9999, -999999, -9999, -9999, 122751, 2423, -999999 + 11018, -9999, -999999, -9999, -9999, 124230, 3491, -999999 + 11168, -9999, -999999, -9999, -9999, 124039, 3424, -999999 diff --git a/tests/examples/HOX_DC8_20040712_R0.ict b/tests/examples/HOX_DC8_20040712_R0.ict new file mode 100644 index 0000000000000000000000000000000000000000..a4ac3a3279894fcdfd0efa9c08fcbad0d1af56d7 --- /dev/null +++ b/tests/examples/HOX_DC8_20040712_R0.ict @@ -0,0 +1,43 @@ +36, 1001 +Brune, William +Penn State University +ATHOS - OH and HO2 concentrations using cryo water mix ratio data for quenching corrections +ICARTT_INTEX +1, 1 +2004, 07, 12, 2005, 01, 12 +0 +Start_UTC, seconds +4 +1, 1, 1, 1 +-9999, -9999, -9999, -9999 +Stop_UTC, seconds +Mid_UTC, seconds +OH_pptv, pptv +HO2_pptv, pptv +0 +18 +PI_CONTACT_INFO: Address: 503 Walker Building, University Park, PA 16802; email: brune@essc.psu.edu; +PLATFORM: NASA DFRC DC8 - sampling underneath aircraft forward cargo bay location +LOCATION: Aircraft location data in nav_dc8_20040712_R0.ict file +ASSOCIATED_DATA: see ftp://ftp-air.larc.nasa.gov/pub-air/INTEXNA/ +INSTRUMENT_INFO: OH/HO2 LIF +DATA_INFO: Units are pptv. +UNCERTAINTY: The absolute accuracy is conservatively estimated to be +/- 32% at two sigma confidence +ULOD_FLAG: -7777 +ULOD_VALUE: N/A +LLOD_FLAG: -8888 +LLOD_VALUE: N/A +DM_CONTACT_INFO: Bob Lesher; Penn State University; blesher@psu.edu +PROJECT_INFO: INTEX Mission 26 June-14 August 2004; California, Illinois, and New Hampshire +STIPULATIONS_ON_USE: Use of these data requires prior approval from William Brune +OTHER_COMMENTS: N/A +REVISION: R0 +R0: Final Data +Start_UTC, Stop_UTC, Mid_UTC, OH_pptv, HO2_pptv +55526, 55545, 55535, 0.171, 9.791 +55546, 55565, 55555, 0.180, 9.218 +55566, 55585, 55575, 0.186, 9.767 +55586, 55605, 55595, 0.176, 9.996 +55606, 55625, 55615, 0.192, 9.513 +55626, 55645, 55635, 0.185, 9.798 +55646, 55665, 55655, 0.160, 9.834 diff --git a/tests/examples/LIDARO3_WP3_20040830_R0.ict b/tests/examples/LIDARO3_WP3_20040830_R0.ict new file mode 100644 index 0000000000000000000000000000000000000000..f5b1e5c5f70154e168a8ebc058bfa5ee2235b825 --- /dev/null +++ b/tests/examples/LIDARO3_WP3_20040830_R0.ict @@ -0,0 +1,50 @@ +46, 2310 +Williams, Eric +NOAA/Earth System Research Laboratory +Ozone number density profile from WP3 aircraft LIDAR +ICARTT_ITCT +1, 1 +2004, 08, 30, 2009, 09, 04 +1 +Geo_Alt, meters, Geometric_altitude_of_observation +UT_TIME, seconds, Elapsed_time_from_0_hours_on_day_given_by_date +1 ;{Number of PRIMARY variables} +1.0e9 +-9999 +O3_NumDensity[], molecules/cc, Ozone_NumDensity_Array +9 ;{Number of AUXILIARY variable} +1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 +-9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999 +Num_Altitudes, none, number_of_altitudes_at_current_time_mark +Geo_Alt_Begin, meters, geometric_altitude_at_which_data_begin +Alt_Increment, meters, altitude_increment_between_observations +Geo_Alt_Aircraft, meters, geometric_altitude_of_aircraft +UT_hour, hours +UT_min, minutes +UT_sec, seconds +Lon_aircraft, degrees_E +Lat_aircraft, degrees_N +0 +18 +PI_CONTACT_INFO: 325 Broadway, Boulder, CO 80305; 303-497-3226; eric.j.williams@noaa.gov +PLATFORM: NOAA WP3 +LOCATION: Lat, Lon, and Alt data included in the data records +ASSOCIATED_DATA: N/A +INSTRUMENT_INFO: Differential absorption LIDAR. See Williams et al., BigScience, 42, p. 50-51, 2001 +DATA_INFO: The units are number density (#/cc). The vertical averaging interval is 975 m at 1-7 km above the aircraft and 2025 m > 7 km above the aircraft. Horizontal averaging interval: 60 km. +UNCERTAINTY: Contact PI +ULOD_FLAG: -7777 +ULOD_VALUE: N/A +LLOD_FLAG: -8888 +LLOD_VALUE: N/A +DM_CONTACT_INFO: Contact PI +PROJECT_INFO: ICARTT study; 1 July-15 August 2004 +STIPULATIONS_ON_USE: Use of these data requires PRIOR OK from the PI +OTHER_COMMENTS: N/A +REVISION: R0 +R0: No comments for this revision. +UT_TIME, Num_Altitudes, Geo_Alt_Begin, Alt_Increment, Geo_Alt_Aircraft, UT_hour, UT_min, UT_sec, Lon_aircraft, Lat_aircraft, O3_NumDensity[] +30335, 26, 12819, 75, 10389, 8, 25, 35, -133.24, -9.45 + 1340, 1519, 1660, 1779, 1868, 1939, 1973, 1992, 1989, 1955, 1934, 1897, 1817, 1721, 1619, 1514, 1434, 1343, 1258, 1203, 1140, 1088, 1037, 956, 892, 878 +30336, 22, 12819, 75, 10383, 8, 26, 0, -133.22, -9.93 + 1351, 1523, 1658, 1774, 1860,1930, 1962, 1974, 1966, 1932, 1909, 1877, 1803, 1706, 1600, 1493, 1407, 1310, -9999, -9999, 1094, 1045 diff --git a/tests/examples/NOx_RHBrown_20040830_R0.ict b/tests/examples/NOx_RHBrown_20040830_R0.ict new file mode 100644 index 0000000000000000000000000000000000000000..e7c85db88294f830e027764a8d302435490b3912 --- /dev/null +++ b/tests/examples/NOx_RHBrown_20040830_R0.ict @@ -0,0 +1,43 @@ +41, 1001 +Williams, Eric +Earth System Research Laboratory/NOAA +Nitric oxide and nitrogen dioxide mixing ratios from R/V Ronald H. Brown +ICARTT_NEAQS +1, 1 +2004, 08, 30, 2004, 12, 25 +0 +Start_UTC, seconds, number_of_seconds_from_0000_UTC +9 +1, 1, 1, 1, 1, 1, 1, 1, 1 +-9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999, -9999 +Stop_UTC, seconds +Mid_UTC, seconds +DLat, deg_N +DLon, deg_E +Elev, meters +NO_ppbv, ppbv +NO_1sig, ppbv +NO2_ppbv, ppbv +NO2_1sig, ppbv +0 +18 +PI_CONTACT_INFO: 325 Broadway, Boulder, CO 80305; 303-497-3226; email:eric.j.williams@noaa.gov +PLATFORM: NOAA research vessel Ronald H. Brown +LOCATION: Latitude, longitude and elevation data are included in the data records +ASSOCIATED_DATA: N/A +INSTRUMENT_INFO: NO: chemiluminescence; NO2: narrow-band photolysis/chemiluminescence +DATA_INFO: All data with the exception of the location data are in ppbv. All oneminute averages contain at least 35 seconds of data, otherwise missing. +UNCERTAINTY: included in the data records as variables with a _1sig suffix +ULOD_FLAG: -7777 +ULOD_VALUE: N/A +LLOD_FLAG: -8888 +LLOD_VALUE: N/A, N/A, N/A, N/A, N/A, 0.005, N/A, 0.025, N/A +DM_CONTACT_INFO: N/A +PROJECT_INFO: ICARTT study; 1 July-15 August 2004; Gulf of Maine and North Atlantic Ocean +STIPULATIONS_ON_USE: Use of these data requires PRIOR OK from the PI +OTHER_COMMENTS: N/A +REVISION: R0 +R0: No comments for this revision. +Start_UTC, Stop_UTC, Mid_UTC, DLat, DLon, Elev, NO_ppbv, NO_1sig, NO2_ppv, NO2_1sig +43200, 43259, 43229, 41.00000, -71.00000, 15, 0.555, 0.033, 2.220, 0.291 +43260, 43319, 43289, 41.01234, -71.01234, 15, 10.333, 0.522, 31.000, 0.375 diff --git a/tests/test_dataset.py b/tests/test_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..2057e1bf7db719d074891e372a9c560a5d73bfe0 --- /dev/null +++ b/tests/test_dataset.py @@ -0,0 +1,99 @@ +import unittest +import os +import io +import sys +import icartt + +def compare_files(str_in, str_out, diff=0): + str_out.seek(0) + str_in.seek(0) + input = str_in.readlines() + output = str_out.readlines() + str_in.close() + str_out.close() + + if not len(input) == len(output): + raise Exception("Input ({:d}) and output ({:d}) do not have the same number of lines!".format(len(input), len(output))) + + for i in range(len(input)): + inline = input[i].strip().replace(" ", "") + outline = output[i].strip().replace(" ", "") + if not inline == outline: + print("Line {:d} differs:".format(i)) + print(" input: {:s}".format(inline)) + print(" output: {:s}".format(outline)) + + return True + +class DatasetTestCase(unittest.TestCase): + def test_simple_1001_NOx(self): + fn = 'tests/examples/NOx_RHBrown_20040830_R0.ict' + + print(fn) + + str_in = open(fn) + str_out = io.StringIO() + + ict = icartt.Dataset(fn, loadData=False) + + ict.read_data() + ict.write(str_out) + + assert compare_files(str_in, str_out) + + def test_simple_1001_HOx(self): + fn = 'tests/examples/HOX_DC8_20040712_R0.ict' + + print(fn) + + str_in = open(fn) + str_out = io.StringIO() + + ict = icartt.Dataset(fn, loadData=False) + + ict.read_data() + ict.write(str_out) + + assert compare_files(str_in, str_out) + + def test_simple_2110_AR(self): + fn = 'tests/examples/AR_DC8_20050203_R0.ict' + + print(fn) + + str_in = open(fn) + str_out = io.StringIO() + + ict = icartt.Dataset(fn, loadData=False) + + ict.read_data() + ict.write(str_out) + + assert compare_files(str_in, str_out) + + def test_simple_2110_AROTAL(self): + fn = 'tests/examples/AROTAL-RAY_DC8_20040715_R1.ict' + + print(fn) + + str_in = open(fn) + str_out = io.StringIO() + + ict = icartt.Dataset(fn, loadData=False) + + ict.read_data() + ict.write(str_out) + + assert compare_files(str_in, str_out) + +def suite(): + suite = unittest.TestSuite() + suite.addTest( DatasetTestCase('test_simple_1001_NOx') ) + suite.addTest( DatasetTestCase('test_simple_1001_HOx') ) + suite.addTest( DatasetTestCase('test_simple_2110_AR') ) + suite.addTest( DatasetTestCase('test_simple_2110_AROTAL') ) + return suite + +if __name__ == '__main__': + runner = unittest.TextTestRunner() + runner.run(suite())