From f625d88e0f5e92ec1b010f67d160d5707ecccfe9 Mon Sep 17 00:00:00 2001 From: Christoph Knote Date: Thu, 15 Apr 2021 15:00:35 +0200 Subject: [PATCH] Big cleanup --- icartt/dataset.py | 101 +++++++++++++++++++++------------------------- 1 file changed, 47 insertions(+), 54 deletions(-) diff --git a/icartt/dataset.py b/icartt/dataset.py index 6a7cee2..7d8c596 100644 --- a/icartt/dataset.py +++ b/icartt/dataset.py @@ -55,49 +55,6 @@ class StandardNormalComments(collections.UserList): if not contents is []: self.ingest(contents) -class Container_1001(collections.UserList): - def write(self, prnt=lambda x: sys.stdout.write(x)): - def p(val, var): - return var.miss if math.isnan(val) else val - - for i in range(len(self.IVAR)): - prnt( [ p(self.IVAR[i],self.IVAR) ] + [ p(DVAR[i][1],DVAR) for DVAR in self.DVARS.values() ] ) - - def extract_items(self, raw): - for cur in range(len(raw)): - self.IVAR.append(raw[cur][0]) - nul = [ self.DVARS[key].append(raw[cur][0], raw[cur][i+1]) for i, key in enumerate(self.DVARS) ] - - def __init__(self, raw=[], IVAR=None, IBVAR=None, AUXVARS=None, DVARS=None): - self.IVAR = IVAR - self.IBVAR = IBVAR - self.AUXVARS = AUXVARS - self.DVARS = DVARS - # - self.extract_items(raw) - -class Container_2110(Container_1001): - def write(self, prnt=lambda x: sys.stdout.write(x)): - def p(val, var): - return var.miss if math.isnan(val) else val - - for ival in self.IVAR: - prnt( [ p(ival, self.IVAR) ] + [ p(auxval[1], AUXVAR) for AUXVAR in self.AUXVARS.values() for auxval in AUXVAR if auxval[0] == ival ] ) - for ibval in [ b[1] for b in self.IBVAR if b[0] == ival ]: - prnt([ p(ibval, self.IBVAR) ] + [ p(dval[1], DVAR) for DVAR in self.DVARS.values() for dval in DVAR if (dval[0][0] == ival) and (dval[0][1] == ibval) ]) - - def extract_items(self, raw): - cur = 0 - num_var_name = list(self.AUXVARS.keys())[0] - while cur < len(raw): - self.IVAR.append(raw[cur][0]) - nul = [ self.AUXVARS[key].append(raw[cur][0], raw[cur][i+1]) for i, key in enumerate(self.AUXVARS) ] - nprimary = int(self.AUXVARS[num_var_name][-1][1]) - for i in range(nprimary): - self.IBVAR.append(raw[cur][0], raw[cur+i+1][0]) - nul = [ self.DVARS[key].append(raw[cur][0], raw[cur+i+1][0], raw[cur+i+1][j+1]) for j, key in enumerate(self.DVARS) ] - cur += 1 + nprimary - class Variable(collections.UserList): ''' A Variable is a ICARTT variable description with name, units, scale and missing value. @@ -258,6 +215,22 @@ class Dataset: # Normal comments (SUPPORTING information: This is the place for investigators to more completely describe the data and measurement parameters. The supporting information structure is described below as a list of key word: value pairs. Specifically include here information on the platform used, the geo-location of data, measurement technique, and data revision comments. Note the non-optional information regarding uncertainty, the upper limit of detection (ULOD) and the lower limit of detection (LLOD) for each measured variable. The ULOD and LLOD are the values, in the same units as the measurements that correspond to the flags -7777s and -8888s within the data, respectively. The last line of this section should contain all the short variable names on one line. The key words in this section are written in BOLD below and must appear in this section of the header along with the relevant data listed after the colon. For key words where information is not needed or applicable, simply enter N/A.). nul = [ prnt(x) for x in self.NCOM ] + def _write_data_1001(self, prnt=lambda x: sys.stdout.write(x)): + def p(val, var): + return var.miss if math.isnan(val) else val + + for i in range(len(self.IVAR)): + prnt( [ p(self.IVAR[i],self.IVAR) ] + [ p(DVAR[i][1],DVAR) for DVAR in self.DVARS.values() ] ) + + def _write_data_2110(self, prnt=lambda x: sys.stdout.write(x)): + def p(val, var): + return var.miss if math.isnan(val) else val + + for ival in self.IVAR: + prnt( [ p(ival, self.IVAR) ] + [ p(auxval[1], AUXVAR) for AUXVAR in self.AUXVARS.values() for auxval in AUXVAR if auxval[0] == ival ] ) + for ibval in [ b[1] for b in self.IBVAR if b[0] == ival ]: + prnt([ p(ibval, self.IBVAR) ] + [ p(dval[1], DVAR) for DVAR in self.DVARS.values() for dval in DVAR if (dval[0][0] == ival) and (dval[0][1] == ibval) ]) + def write_data(self, f=sys.stdout): ''' Write data to file handle @@ -265,7 +238,12 @@ class Dataset: def prnt_data(vars): f.write( str(self.splitChar.join([ str(x) for x in vars ])) + "\n") - nul = self.data.write(prnt=prnt_data) + if self.format == 1001: + nul = self._write_data_1001(prnt=prnt_data) + elif self.format == 2110: + nul = self._write_data_2110(prnt=prnt_data) + else: + print("huh?") def write(self, f=sys.stdout): ''' @@ -448,6 +426,23 @@ class Dataset: if self.nheader != nheader_suggested: warnings.warn("Number of header lines suggested in line 1 ({:d}) do not match actual header lines read ({:d})".format(nheader_suggested, self.nheader)) + def extract_items_1001(self, raw): + for cur in range(len(raw)): + self.IVAR.append(raw[cur][0]) + nul = [ self.DVARS[key].append(raw[cur][0], raw[cur][i+1]) for i, key in enumerate(self.DVARS) ] + + def extract_items_2110(self, raw): + cur = 0 + num_var_name = list(self.AUXVARS.keys())[0] + while cur < len(raw): + self.IVAR.append(raw[cur][0]) + nul = [ self.AUXVARS[key].append(raw[cur][0], raw[cur][i+1]) for i, key in enumerate(self.AUXVARS) ] + nprimary = int(self.AUXVARS[num_var_name][-1][1]) + for i in range(nprimary): + self.IBVAR.append(raw[cur][0], raw[cur+i+1][0]) + nul = [ self.DVARS[key].append(raw[cur][0], raw[cur+i+1][0], raw[cur+i+1][j+1]) for j, key in enumerate(self.DVARS) ] + cur += 1 + nprimary + def read_data(self): ''' Read ICARTT data (from file) @@ -457,12 +452,13 @@ class Dataset: nul = [ self.input_fhandle.readline() for i in range(self.nheader_file) ] - if self.format == 1001: - self.data = Container_1001([ line.split(self.splitChar) for line in self.input_fhandle ], IVAR=self.IVAR, DVARS=self.DVARS) + raw = [ line.split(self.splitChar) for line in self.input_fhandle ] + if self.format == 1001: + nul = self.extract_items_1001(raw) elif self.format == 2110: - self.data = Container_2110([ line.split(self.splitChar) for line in self.input_fhandle ], IVAR=self.IVAR, IBVAR=self.IBVAR, AUXVARS=self.AUXVARS, DVARS=self.DVARS) + nul = self.extract_items_1001(raw) else: - print("Unknown format") + print("huh?") self.input_fhandle.close() @@ -508,6 +504,8 @@ class Dataset: 'seconds_from_0_hours_on_valid_date', 'seconds_from_0_hours_on_valid_date', scale=1.0, miss=-9999999, splitChar=splitChar) + self.IBVAR = None + self.AUXVARS = {} self.DVARS = { 'Time_Stop': Variable('Time_Stop', @@ -520,15 +518,10 @@ class Dataset: 'ppbv', scale=1.0, miss=-9999999, splitChar=splitChar) } + self.SCOM = [] self.NCOM = [] - # for 2210 - self.IBVAR = None - self.AUXVARS = {} - - self.data = Container_1001([], IVAR=self.IVAR, IBVAR=self.IBVAR, AUXVARS=self.AUXVARS, DVARS=self.DVARS) - self.splitChar = splitChar # Standard v2.0 for normal comments requires all keywords present, -- GitLab