diff --git a/src/icartt/dataset.py b/src/icartt/dataset.py index 7167b0cb7861876c31c1319de0943d648b10a76a..4bef8669254ba42bd7a610d653db89c0336af94a 100644 --- a/src/icartt/dataset.py +++ b/src/icartt/dataset.py @@ -1,3 +1,4 @@ +from ctypes import ArgumentError import datetime import sys import pathlib @@ -9,13 +10,13 @@ from enum import IntEnum import numpy as np - DEFAULT_NUM_FORMAT = "%f" DEFAULT_FIELD_DELIM = ", " class Formats(IntEnum): """File Format Indices (FFI)""" + FFI1001 = 1001 FFI2110 = 2110 @@ -26,20 +27,17 @@ class VariableType(IntEnum): AuxiliaryVariable = 3 DependentVariable = 4 - -def vmiss_to_npnan(val, miss): - """converts value to np.nan if is (almost) equal to miss""" - val, miss = map(float, (val, miss)) - return np.NaN if np.isclose(val, miss) else val - - class DataStore1001: def __init__(self, ivar, dvars): self.ivarname = ivar.shortname self.varnames = [ivar.shortname] + [x for x in dvars] - self.missvals = {x: dvars[x].miss for x in dvars} - self.missvals.update({self.ivarname: ivar.miss}) + self.missingValues = {x: dvars[x].miss for x in dvars} + self.missingValues.update({self.ivarname: ivar.miss}) + + self.default_dtype = np.float64 + + self.dtypes = [(name, self.default_dtype) for name in self.varnames] self.data = None @@ -49,44 +47,49 @@ class DataStore1001: return self.data[s] # returns None implicitly if self.data is None - def addBulk(self, raw): - nlines, nvars = raw.shape - if not nvars == len(self.varnames): - raise Exception("Number of data columns does not match variable count!") - self._addBulk(raw, nlines) - - def addBulkFromTxt(self, raw): - if not len(raw[0]) == len(self.varnames): - raise Exception("Number of data columns does not match variable count!") - self._addBulk(raw, len(raw)) - - def _addBulk(self, raw, n): - for cur in range(n): - newdata = {x: raw[cur][i] for i, x in enumerate(self.varnames)} - self.add(**newdata) - - def add(self, **kwargs): - if not self.ivarname in kwargs.keys(): - raise Exception("Need independent variable data.") - - ivarvalue = vmiss_to_npnan(kwargs[self.ivarname], self.missvals[self.ivarname]) - - newline = np.array(np.NaN, dtype=[(v, "f8") for v in self.varnames]) - for key in kwargs: - if key in self.varnames: - newline[key] = vmiss_to_npnan(kwargs[key], self.missvals[key]) + def addFromTxt(self, f, splitChar, max_rows=None): + # genfromtxt would warn if file is empty. We do not want that. + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + newData = np.genfromtxt( + f, + names=self.varnames, + dtype=self.dtypes, + missing_values=self.missingValues, + usemask=True, + delimiter=splitChar, + max_rows=max_rows, + deletechars="", + ).filled(fill_value=np.nan) + self.add(newData) + + def add(self, newData): + """bulk add data, providing a (structured) numpy array. + + Array has to have shape [ (ivar, dvar, dvar, ...), ... ], + missing values have to be set to np.nan. + + :param newData: data to be added + :type newData: numpy.ndarray + """ + if not type(newData) is np.ndarray: + raise ArgumentError("Input data needs to be numpy ndarray.") + if newData.dtype.names is None: + try: + newData.dtype = [(name, newData.dtype) for name in self.varnames] + except: + ArgumentError( + "Could not assign names to data structure, are you providing an array containing all variables?" + ) if self.data is None: - self.data = newline - self.data = self.data.reshape(1) # don't even ask + self.data = newData else: - if ivarvalue in self.data[self.ivarname]: - raise Exception("Cannot replace data (yet).") - self.data = np.append(self.data, newline) + self.data = np.append(self.data, newData) def denanify(self, d): dd = d.copy() - for k, miss in self.missvals.items(): + for k, miss in self.missingValues.items(): dd[k][np.isnan(dd[k])] = miss return dd @@ -94,6 +97,9 @@ class DataStore1001: self, f=sys.stdout, fmt=DEFAULT_NUM_FORMAT, delimiter=DEFAULT_FIELD_DELIM ): d = self.denanify(self.data) + # single line data is 0D, savetxt cannot work with 0D. Make 1D. + if d.ndim == 0: + d = np.array( [ d ] ) np.savetxt(f, d, fmt=fmt, delimiter=delimiter) @@ -105,10 +111,10 @@ class DataStore2110(collections.UserDict): self.auxvarnames = [x for x in auxvars] self.dvarnames = [x for x in dvars] - self.missvals = {x: dvars[x].miss for x in dvars} - self.missvals.update({x: auxvars[x].miss for x in auxvars}) - self.missvals.update({self.ibvarname: ibvar.miss}) - self.missvals.update({self.ivarname: ivar.miss}) + self.missingValues = {x: dvars[x].miss for x in dvars} + self.missingValues.update({x: auxvars[x].miss for x in auxvars}) + self.missingValues.update({self.ibvarname: ibvar.miss}) + self.missingValues.update({self.ivarname: ivar.miss}) self.nauxvarname = self.auxvarnames[0] # convention! @@ -125,69 +131,37 @@ class DataStore2110(collections.UserDict): return self.data[s] # returns None implicitly if self.data is None - def _addAuxline(self, auxline): - newdata = { - x: auxline[i] for i, x in enumerate([self.ivarname] + self.auxvarnames) - } - self.add(**newdata) - - def addBulkDep(self, ivar, raw): - nlines, _ = raw.shape # _ : nvars not used - self._addDeplines(ivar, raw, nlines) - - def _addDeplines(self, ivar, raw, n): - for cur in range(n): - newdata = { - x: raw[cur][i] for i, x in enumerate([self.ibvarname] + self.dvarnames) - } - newdata.update({self.ivarname: ivar}) - self.add(**newdata) - - def addBulkFromTxt(self, raw): - self._addBulk(raw, len(raw)) - - def _addBulk(self, raw, n): - cur = 0 - while cur < n: - ivarvalue = vmiss_to_npnan(raw[cur][0], self.missvals[self.ivarname]) - - self._addAuxline(raw[cur]) - cur += 1 - - # stupid, but at first auxline added, nprimaryData ist a 0-dim array... - ndepData = self.data[ivarvalue]["AUX"][self.nauxvarname] - ndepData = int(ndepData) if ndepData.shape == () else int(ndepData[-1]) - - self._addDeplines(ivarvalue, raw[cur : (cur + ndepData)], ndepData) + def addFromTxt(self, f, splitChar): + while f: + auxds = DataStore1001(self.ivar, self.auxvars) + depds = DataStore1001(self.ibvar, self.dvars) + try: + auxds.addFromTxt(f, splitChar, max_rows=1) + except: + # we are at the end of the file if this happens + break - cur += ndepData + ndeprows = int( auxds[self.nauxvarname] ) - def add(self, **kwargs): - # whatever we do, an independent variable is needed - if not self.ivarname in kwargs.keys(): - raise Exception("Need independent variable data.") + try: + depds.addFromTxt(f, splitChar, max_rows=ndeprows) + except: + break - ivarvalue = vmiss_to_npnan(kwargs[self.ivarname], self.missvals[self.ivarname]) + ivarValue = float(auxds[self.ivar.shortname]) - # this is an AUX line - if any(x in self.auxvarnames for x in kwargs): - # and we create the whole dataset if needed - if not ivarvalue in self.data.keys(): - self.data[ivarvalue] = { - "AUX": DataStore1001(self.ivar, self.auxvars), - "DEP": DataStore1001(self.ibvar, self.dvars), - } - self.data[ivarvalue]["AUX"].add(**kwargs) + self.data[ivarValue] = { "AUX": auxds, "DEP": depds } + + def add(self, newAuxData, newDepData): + auxds = DataStore1001(self.ivar, self.auxvars) + depds = DataStore1001(self.ibvar, self.dvars) - # this is a DEP line - if any(x in self.dvarnames for x in kwargs): - if not self.ibvarname in kwargs.keys(): - raise Exception("Need independent (bounded) variable data.") + auxds.add(newAuxData) + depds.add(newDepData) - if not ivarvalue in self.data.keys(): - raise Exception("Aux data line needs to be added first.") + ivarValue = float(auxds[self.ivar.shortname]) - self.data[ivarvalue]["DEP"].add(**kwargs) + self.data[ivarValue] = { "AUX": auxds, "DEP": depds } def write( self, f=sys.stdout, fmt=DEFAULT_NUM_FORMAT, delimiter=DEFAULT_FIELD_DELIM @@ -215,13 +189,13 @@ class StandardNormalComments(collections.UserList): @property def nlines(self): """calculates the number of lines in the normal comments section""" - n = 1 # shortnames line - n += len(self.freeform) # freeform comment + n = 1 # shortnames line + n += len(self.freeform) # freeform comment for k in self.keywords.values(): try: - n += len(k.data[0].split("\n")) # and keywords might be multiline... - except IndexError: # ok we have no list, - n += 1 # just add 1 + n += len(k.data[0].split("\n")) # and keywords might be multiline... + except IndexError: # ok we have no list, + n += 1 # just add 1 return n @property @@ -333,12 +307,12 @@ class Variable: descstr += [str(self.longname)] return splitChar.join(descstr) - def isValidVariablename(self, name): # TODO: this could be a 'utils' function + def isValidVariablename(self, name): # TODO: this could be a 'utils' function # ICARTT Standard v2 2.1.1 2) # Variable short names and variable standard names: # Uppercase and lowercase ASCII alphanumeric characters # and underscores. - def isAsciiAlphaOrUnderscore(x): # TODO: this could be a 'utils' function + def isAsciiAlphaOrUnderscore(x): # TODO: this could be a 'utils' function return re.match("[a-zA-Z0-9_]", x) allAreAlphaOrUnderscore = all(isAsciiAlphaOrUnderscore(x) for x in name) @@ -432,12 +406,14 @@ class Dataset: :rtype: list """ if self.data.data is None or self.independentVariable is None: - return np.datetime64('NaT') + return np.datetime64("NaT") - ref_dt = np.datetime64(datetime.datetime(*self.dateOfCollection), 'ns') + ref_dt = np.datetime64(datetime.datetime(*self.dateOfCollection), "ns") # ivar unit is seconds as per standard; need to convert to ns to use timedelta64[ns] type. - return ref_dt + (self.data[self.independentVariable.shortname]*10**9).astype('timedelta64[ns]') + return ref_dt + ( + self.data[self.independentVariable.shortname] * 10**9 + ).astype("timedelta64[ns]") @property def variables(self): @@ -461,7 +437,7 @@ class Dataset: def readHeader(self, splitChar=","): """Read the ICARTT header (from file)""" - class FilehandleWithLinecounter: # TODO: this could be a 'utils' class + class FilehandleWithLinecounter: # TODO: this could be a 'utils' class def __init__(self, f, splitChar): self.f = f self.line = 0 @@ -476,7 +452,7 @@ class Dataset: if self.inputFhandle: if self.inputFhandle.closed: - self.inputFhandle = open(self.inputFhandle.name, encoding='utf-8') + self.inputFhandle = open(self.inputFhandle.name, encoding="utf-8") f = FilehandleWithLinecounter(self.inputFhandle, splitChar) self._readHeader(f) @@ -541,8 +517,10 @@ class Dataset: # here that the independent variable should monotonically increase even when # crossing over to a second day. - def extractVardesc(dmp): # TODO: could be a 'utils' function or one line, - shortname = dmp[0] # shortname, units, standardname, longname, *_ = dmp + [None] * 3 + def extractVardesc(dmp): # TODO: could be a 'utils' function or one line, + shortname = dmp[ + 0 + ] # shortname, units, standardname, longname, *_ = dmp + [None] * 3 units = dmp[1] standardname = dmp[2] if len(dmp) > 2 else None longname = dmp[3] if len(dmp) > 3 else None @@ -609,8 +587,8 @@ class Dataset: d = {} for shortname, unit, standardname, longname, scale, miss in zip( - vshortname, vunits, vstandardname, vlongname, vscale, vmiss - ): + vshortname, vunits, vstandardname, vlongname, vscale, vmiss + ): d[shortname] = Variable( shortname, unit, @@ -623,8 +601,6 @@ class Dataset: return d - - self.dependentVariables = readVars(f, VariableType.DependentVariable) if self.format == Formats.FFI2110: @@ -674,13 +650,12 @@ class Dataset: """Read ICARTT data (from file)""" if self.inputFhandle: if self.inputFhandle.closed: - self.inputFhandle = open(self.inputFhandle.name, encoding='utf-8') + self.inputFhandle = open(self.inputFhandle.name, encoding="utf-8") for _ in range(self.nHeaderFile): self.inputFhandle.readline() - raw = [line.split(splitChar) for line in self.inputFhandle] - self.data.addBulkFromTxt(raw) + self.data.addFromTxt(self.inputFhandle, splitChar) self.inputFhandle.close() def read(self, splitChar=","): @@ -703,7 +678,9 @@ class Dataset: + "_" + self.locationID + "_" - + datetime.datetime.strftime(datetime.datetime(*self.dateOfCollection), dateFormat) + + datetime.datetime.strftime( + datetime.datetime(*self.dateOfCollection), dateFormat + ) ) fn += "_R" + str(self.revision) if not self.revision is None else "" fn += "_L" + str(self.launch) if not self.launch is None else "" @@ -715,19 +692,19 @@ class Dataset: return fn + ".ict" - def isValidFileName(self, name): # TODO: this could be a 'utils' function + def isValidFileName(self, name): # TODO: this could be a 'utils' function # ICARTT standard v2 2.1.1 3) # Filename: Uppercase and lowercase ASCII alphanumeric # characters (i.e. A-Z, a-z, 0-9), underscore, period, # and hyphen. File names can be a maximum 127 # characters in length. - def isAsciiAlpha(x): # TODO: this could be a 'utils' function + def isAsciiAlpha(x): # TODO: this could be a 'utils' function return re.match("[a-zA-Z0-9-_.]", x) allAsciiAlpha = all(isAsciiAlpha(x) for x in name) lessThan128Characters = len(name) < 128 - return allAsciiAlpha and lessThan128Characters and name.endswith('.ict') + return allAsciiAlpha and lessThan128Characters and name.endswith(".ict") def writeHeader(self, f=sys.stdout, delimiter=DEFAULT_FIELD_DELIM): """Write header @@ -762,7 +739,9 @@ class Dataset: ) # UTC date when data begin, UTC date of data reduction or revision - comma delimited (yyyy, mm, dd, yyyy, mm, dd). write_to_file( - delimiter.join(f"{x:02d}" for x in (*self.dateOfCollection, *self.dateOfRevision)) + delimiter.join( + f"{x:02d}" for x in (*self.dateOfCollection, *self.dateOfRevision) + ) ) # Data Interval (This value describes the time spacing (in seconds) between consecutive data records. It is the (constant) interval between values of the independent variable. For 1 Hz data the data interval value is 1 and for 10 Hz data the value is 0.1. All intervals longer than 1 second must be reported as Start and Stop times, and the Data Interval value is set to 0. The Mid-point time is required when it is not at the average of Start and Stop times. For additional information see Section 2.5 below.). write_to_file(delimiter.join([str(x) for x in self.dataIntervalCode])) @@ -921,7 +900,7 @@ class Dataset: # read data if f is not None if f is not None: if isinstance(f, (str, pathlib.Path)): - self.inputFhandle = open(f, "r", encoding='utf-8') + self.inputFhandle = open(f, "r", encoding="utf-8") else: self.inputFhandle = f diff --git a/tests/test_1001.py b/tests/test_1001.py index 61819d7c6387e1b6627f1bbd266d75005ef5c4f0..bcc2436f4d7286b3f8fcc953d8475d3ea68623da 100644 --- a/tests/test_1001.py +++ b/tests/test_1001.py @@ -8,6 +8,7 @@ import icartt # working directory, example files wd = pathlib.Path(__file__).parent + def compareFiles(fn, strIn, strOut, skiplines=0, nlines=-1): # pragma: no cover strOut.seek(0) strIn.seek(0) @@ -335,17 +336,36 @@ class Create1001TestCase(unittest.TestCase): # and times must be NaT self.assertTrue(np.isnat(ict.times)) - ict.data.add(Time_Start=12.3, Time_Stop=12.5, Payload=23789423.2e5) - - mydict = {"Time_Start": 12.6, "Time_Stop": 13.1, "Payload": 324235644.1e5} - ict.data.add(**mydict) + # single line data adding + # it is the users job to ensure data consistency! + # Time_Start, Time_Stop, Payload + oneLineData = np.array([(12.3, 12.5, 23789423.2e5)]) + ict.data.add(oneLineData) - data = np.array([(13.4, 14.0, 2348925e5), (14.1, 14.9, 23425634e5)]) - ict.data.addBulk(data) + # this also works for bulk data dumps + multiLineData = np.array( + [ + (12.6, 13.0, 2348925e5), + (13.4, 14.0, 23425634e5), + (14.1, 14.7, 23422344e5), + ] + ) + ict.data.add(multiLineData) + + # you can also be explicit, and define names and datatypes + structuredData = np.array( + [(14.5, 14.6, 24824525e5), (14.6, 14.7, 41225634e5)], + dtype=[ + ("Time_Start", ict.data.default_dtype), + ("Time_Stop", ict.data.default_dtype), + ("Payload", ict.data.default_dtype), + ], + ) + ict.data.add(structuredData) # elements of the time array must be equal to our input t0 = np.datetime64(datetime.datetime(*now.timetuple()[:3]), "ns") - for have, want in zip(ict.times, (12.3, 12.6, 13.4, 14.1)): + for have, want in zip(ict.times, (12.3, 12.6, 13.4, 14.1, 14.5, 14.6)): self.assertEqual(int(have - t0), int(want * 10**9)) strOut = io.StringIO() diff --git a/tests/usage_examples/create_ffi1001.py b/tests/usage_examples/create_ffi1001.py index 2261599cc13110282735f2fe120c809a74646be7..15602ee9b4b4f06880b9577b21d01394d38e5dbe 100644 --- a/tests/usage_examples/create_ffi1001.py +++ b/tests/usage_examples/create_ffi1001.py @@ -3,76 +3,69 @@ import datetime import icartt -ict = icartt.Dataset(format=icartt.Formats.FFI1001) - -ict.PIName = 'Knote, Christoph' -ict.PIAffiliation = 'Faculty of Medicine, University Augsburg, Germany' -ict.dataSourceDescription = 'Example data' -ict.missionName = 'MBEES' -ict.dateOfCollection = datetime.datetime.utcnow().timetuple()[:3] -ict.dateOfRevision = datetime.datetime.utcnow().timetuple()[:3] - -ict.dataIntervalCode = [ 0 ] - -ict.independentVariable = icartt.Variable( 'Time_Start', - 'seconds_from_0_hours_on_valid_date', - 'Time_Start', - 'Time_Start', - vartype=icartt.VariableType.IndependentVariable, - scale=1.0, miss=-9999999) - -ict.dependentVariables['Time_Stop'] = icartt.Variable( 'Time_Stop', - 'seconds_from_0_hours_on_valid_date', - 'Time_Stop', - 'Time_Stop', - scale=1.0, miss=-9999999) - -ict.dependentVariables['Payload'] = icartt.Variable( 'Payload', - 'some_units', - 'Payload', - 'Payload', - scale=1.0, miss=-9999999) +ict = icartt.Dataset(format=icartt.Formats.FFI1001) + +ict.PIName = "Knote, Christoph" +ict.PIAffiliation = "Faculty of Medicine, University Augsburg, Germany" +ict.dataSourceDescription = "Example data" +ict.missionName = "MBEES" +ict.dateOfCollection = datetime.datetime.utcnow().timetuple()[:3] +ict.dateOfRevision = datetime.datetime.utcnow().timetuple()[:3] + +ict.dataIntervalCode = [0] + +ict.independentVariable = icartt.Variable( + "Time_Start", + "seconds_from_0_hours_on_valid_date", + "Time_Start", + "Time_Start", + vartype=icartt.VariableType.IndependentVariable, + scale=1.0, + miss=-9999999, +) + +ict.dependentVariables["Time_Stop"] = icartt.Variable( + "Time_Stop", + "seconds_from_0_hours_on_valid_date", + "Time_Stop", + "Time_Stop", + scale=1.0, + miss=-9999999, +) + +ict.dependentVariables["Payload"] = icartt.Variable( + "Payload", "some_units", "Payload", "Payload", scale=1.0, miss=-9999999 +) ict.specialComments.append("Some comments on this dataset:") ict.specialComments.append("They are just examples!") ict.specialComments.append("Adapt as needed.") -ict.normalComments.freeform.append('free comment line 1') -ict.normalComments.freeform.append('free comment line 2') +ict.normalComments.freeform.append("free comment line 1") +ict.normalComments.freeform.append("free comment line 2") # ict.normalComments are all set to N/A if not specified ict.endDefineMode() -# Three ways to add data: +# Add data -# 1) simple (single data line) -ict.data.add( Time_Start = 12.3, Time_Stop = 12.5, Payload = 23789423.2e5 ) - -# Let's check: -ict.write() - -# 2) as dictionary (single data line) -mydict = { 'Time_Start': 12.6, 'Time_Stop': 13.1, 'Payload': 324235644.1e5 } -ict.data.add( **mydict ) -# (note, exploding the dictionary is necessary) - -# 3) as NumPy array (bulk) import numpy as np -data = np.array( [ (13.4, 14.0, 2348925e5), (14.1, 14.9, 23425634e5) ] ) -ict.data.addBulk( data ) + +data = np.array([(13.4, 14.0, 2348925e5), (14.1, 14.9, 23425634e5)]) +ict.data.add(data) # Note 1: you are responsible to ensure that the order of elements in a data line # corresponds to variable listing below: -print( [ x for x in ict.variables ] ) +print([x for x in ict.variables]) # Note 2: for single lines, you still need to make it an array! -data = np.array( [ (15.4, 15.0, 52452495290e5) ] ) -ict.data.addBulk( data ) +data = np.array([(15.4, 15.0, 52452495290e5)]) +ict.data.add(data) # Now, look at it in ICARTT form: ict.write() # And you could simply write to file: -#with open('output.ict', 'w') as f: +# with open('output.ict', 'w') as f: # ict.write(f=f) diff --git a/tests/usage_examples/create_ffi2110.py b/tests/usage_examples/create_ffi2110.py index 78ca1973e03a751ffc3d0f72b4075cafcb96698c..563557b481dad16185c61b8fd50d310d4852bb4e 100644 --- a/tests/usage_examples/create_ffi2110.py +++ b/tests/usage_examples/create_ffi2110.py @@ -5,67 +5,76 @@ import numpy as np import icartt -ict = icartt.Dataset(format=icartt.Formats.FFI2110) - -ict.PIName = 'Knote, Christoph' -ict.PIAffiliation = 'Faculty of Medicine, University Augsburg, Germany' -ict.dataSourceDescription = 'Example data' -ict.missionName = 'MBEES' -ict.dateOfCollection = datetime.datetime.utcnow().timetuple()[:3] -ict.dateOfRevision = datetime.datetime.utcnow().timetuple()[:3] - -ict.dataIntervalCode = [ 0 ] - -ict.independentVariable = icartt.Variable( 'Time_Start', - 'seconds_from_0_hours_on_valid_date', - 'Time_Start', - 'Time_Start', - vartype=icartt.VariableType.IndependentVariable, - scale=1.0, miss=-9999999) - -ict.independentBoundedVariable = icartt.Variable( 'Altitude', - 'altitude_above_ground_in_meters', - 'Altitude', - 'Altitude', - vartype=icartt.VariableType.IndependentBoundedVariable, - scale=1.0, miss=-9999999) +ict = icartt.Dataset(format=icartt.Formats.FFI2110) + +ict.PIName = "Knote, Christoph" +ict.PIAffiliation = "Faculty of Medicine, University Augsburg, Germany" +ict.dataSourceDescription = "Example data" +ict.missionName = "MBEES" +ict.dateOfCollection = datetime.datetime.utcnow().timetuple()[:3] +ict.dateOfRevision = datetime.datetime.utcnow().timetuple()[:3] + +ict.dataIntervalCode = [0] + +ict.independentVariable = icartt.Variable( + "Time_Start", + "seconds_from_0_hours_on_valid_date", + "Time_Start", + "Time_Start", + vartype=icartt.VariableType.IndependentVariable, + scale=1.0, + miss=-9999999, +) + +ict.independentBoundedVariable = icartt.Variable( + "Altitude", + "altitude_above_ground_in_meters", + "Altitude", + "Altitude", + vartype=icartt.VariableType.IndependentBoundedVariable, + scale=1.0, + miss=-9999999, +) # ICARTT convention: first aux variable contains number of dependent elements -ict.auxiliaryVariables['nAltitudes'] = icartt.Variable( 'nAltitudes', - 'number_of_dependent_variable_items', - 'variable', - 'nAltitudes', - scale=1.0, miss=-9999999) - -ict.auxiliaryVariables['Time_Stop'] = icartt.Variable( 'Time_Stop', - 'seconds_from_0_hours_on_valid_date', - 'Time_Stop', - 'Time_Stop', - scale=1.0, miss=-9999999) - -ict.auxiliaryVariables['Longitude'] = icartt.Variable( 'Longitude', - 'longitude_in_degrees', - 'Longitude', - 'Longitude', - scale=1.0, miss=-9999999) - -ict.auxiliaryVariables['Latitude'] = icartt.Variable( 'Latitude', - 'latitude_in_degrees', - 'Latitude', - 'Latitude', - scale=1.0, miss=-9999999) - -ict.dependentVariables['Payload1'] = icartt.Variable( 'Payload1', - 'some_units', - 'Payload1', - 'Payload1', - scale=1.0, miss=-9999999) - -ict.dependentVariables['Payload2'] = icartt.Variable( 'Payload2', - 'some_units', - 'Payload2', - 'Payload2', - scale=1.0, miss=-9999999) +ict.auxiliaryVariables["nAltitudes"] = icartt.Variable( + "nAltitudes", + "number_of_dependent_variable_items", + "variable", + "nAltitudes", + scale=1.0, + miss=-9999999, +) + +ict.auxiliaryVariables["Time_Stop"] = icartt.Variable( + "Time_Stop", + "seconds_from_0_hours_on_valid_date", + "Time_Stop", + "Time_Stop", + scale=1.0, + miss=-9999999, +) + +ict.auxiliaryVariables["Longitude"] = icartt.Variable( + "Longitude", + "longitude_in_degrees", + "Longitude", + "Longitude", + scale=1.0, + miss=-9999999, +) + +ict.auxiliaryVariables["Latitude"] = icartt.Variable( + "Latitude", "latitude_in_degrees", "Latitude", "Latitude", scale=1.0, miss=-9999999 +) + +ict.dependentVariables["Payload1"] = icartt.Variable( + "Payload1", "some_units", "Payload1", "Payload1", scale=1.0, miss=-9999999 +) + +ict.dependentVariables["Payload2"] = icartt.Variable( + "Payload2", "some_units", "Payload2", "Payload2", scale=1.0, miss=-9999999 +) ict.specialComments.append("Some comments on this dataset:") ict.specialComments.append("They are just examples!") @@ -75,33 +84,29 @@ ict.endDefineMode() # Add data -# the three ways to add data (see FFI 1001) are still possible for FFI 2110 +# for 2110, data can be added as chunks for a given ivar! -# a new independent variable item is created by adding data for the new item -# with its auxiliary data information: - -ict.data.add( Time_Start = 12.3, nAuxiliary=4, Time_Stop = 12.5, Latitude = 48.21, Longitude = 10.3 ) -ict.data.add( Time_Start = 13.3, nAuxiliary=2, Time_Stop = 13.5, Latitude = 48.31, Longitude = 10.4 ) - -# then, dependent data can be added: +import numpy as np +# note, the second variable ('4') is the number of dependent lines to follow +# ivar, ndepvar, auxvar1, auxvar2, auxvar3 +auxData = np.array([(12.3, 4, 12.5, 48.21, 10.3)]) # ibvar, dvar1, dvar2 -data = np.array( [ ( 0, 123, 8.4e4), - (100, 122, 9.1e4), - (250, 115, 9.3e4), - (500, 106, 9.8e4) ] ) +depData = np.array( + [(0, 123, 8.4e4), (100, 122, 9.1e4), (250, 115, 9.3e4), (500, 106, 9.8e4)] +) -ict.data.addBulkDep(12.3, data) +# ... and so forth +auxData = np.array([(12.4, 2, 12.8, 48.41, 12.1)]) # ibvar, dvar1, dvar2 -data = np.array( [ ( 0, 153, 7.3e4), - (270, 172, 8.9e4) ] ) +depData = np.array([(0, 153, 7.3e4), (270, 172, 8.9e4)]) -ict.data.addBulkDep(13.3, data) +ict.data.add(auxData, depData) # Now, look at it in ICARTT form: ict.write() # And you could simply write to file: -#with open('output.ict', 'w') as f: +# with open('output.ict', 'w') as f: # ict.write(f=f) diff --git a/tests/usage_examples/read_ffi1001.py b/tests/usage_examples/read_ffi1001.py index eeab1ba568c4cbf335ff500c12b5f588f3fd7ff2..5a4f703bec021ca0e9682766c62a62b4f1fadd38 100644 --- a/tests/usage_examples/read_ffi1001.py +++ b/tests/usage_examples/read_ffi1001.py @@ -3,7 +3,7 @@ import pathlib # load a new dataset from an existing file wd = pathlib.Path(__file__).parent -ict = icartt.Dataset( wd / ".." / "example_data" / 'DC8-20160517.ict') +ict = icartt.Dataset( wd / ".." / "example_data" / 'expect_ok' / 'DC8-20160517.ict') # read some metadata ict.PIName diff --git a/tests/usage_examples/read_ffi2110.py b/tests/usage_examples/read_ffi2110.py index 4b19101fd596e94f216a5c43957055866eeb2d91..742bca744081b1e4f3e5071b263257a255f84357 100644 --- a/tests/usage_examples/read_ffi2110.py +++ b/tests/usage_examples/read_ffi2110.py @@ -3,32 +3,35 @@ import pathlib # load a new dataset from an existing file wd = pathlib.Path(__file__).parent -ict = icartt.Dataset( wd / ".." / "example_data" / 'AR_DC8_20050203_R0.ict') +ict = icartt.Dataset( wd / ".." / "example_data" / 'expect_warn' / 'AR_DC8_20050203_R0.ict') # list variable names [ x for x in ict.variables ] # independent, independent bounded, dependent, auxiliary variables? -print(ict.independentVariable.shortname) -print(ict.independentBoundedVariable.shortname) -print([ x for x in ict.auxiliaryVariables]) -print([ x for x in ict.dependentVariables]) +print(f"Independent variable: {ict.independentVariable.shortname}") +print(f"Independent bounded variable: {ict.independentBoundedVariable.shortname}") +print(f"Auxiliary variables: {', '.join([ x for x in ict.auxiliaryVariables])}") +print(f"Dependent variables: {', '.join([ x for x in ict.dependentVariables])}") # some info on a variable -ict.variables['Latitude'].units -ict.variables['Latitude'].miss +print(f"Units of variable Latitude are {ict.variables['Latitude'].units}") +print(f"... and its missing value is {ict.variables['Latitude'].miss}") # get steps for which data is available: tsteps = [ x for x in ict.data ] # let's look at the first time step data -ict.data[ tsteps[0] ] +print("First time step data:") +print(ict.data[ tsteps[0] ]) # auxiliary data at this time step: -ict.data[ tsteps[0] ]['AUX'][:] +print("First time step auxiliary data:") +print(ict.data[ tsteps[0] ]['AUX'][:]) # dependent data at this time step: tstepdata = ict.data[ tsteps[0] ]['DEP'][:] # get the ozone mixing ratio for those data where Altitude < 10000.0: +print(f"Ozone mixing ratio for altitudes < 10000 at time step {tsteps[0]}") print(tstepdata[ tstepdata['Altitude[]'] < 10000.0 ]['O3_MR[]'])