Skip to content
dataset.py 26.2 KiB
Newer Older
Christoph.Knote's avatar
sdf
Christoph.Knote committed
import datetime
import sys
Christoph Knote's avatar
Christoph Knote committed
import collections
Christoph Knote's avatar
Christoph Knote committed
import decimal
Christoph Knote's avatar
Christoph Knote committed
import math
Christoph Knote's avatar
Christoph Knote committed
import string
import warnings
Christoph.Knote's avatar
sdf
Christoph.Knote committed

Christoph Knote's avatar
Christoph Knote committed
IMPLEMENTED_FORMATS = [ 1001, 2110 ]

Christoph Knote's avatar
Christoph Knote committed
class StandardNormalComments(collections.UserList):
Christoph Knote's avatar
Christoph Knote committed
    @property
    def data(self):
Christoph Knote's avatar
Christoph Knote committed
        return self.freeform + [ k + ": " + str(v) for k, v in self.keywords.items() ] + [ self.shortnames ]
Christoph Knote's avatar
Christoph Knote committed
    def ingest(self, raw):
        # last line is always shortname
        self.shortnames = raw.pop()
        
        # rest is either keyword, or free form
        for l in raw:
            is_keyword = False
Christoph Knote's avatar
Christoph Knote committed
            for k in self.possible_keywords:
Christoph Knote's avatar
Christoph Knote committed
                if l.startswith(k + ":"):
                    self.keywords[k] = l.replace(k + ":", "").strip()
                    is_keyword = True
                    break
            if not is_keyword:
                self.freeform.append(l)
        
    def __init__(self, contents=[]):
        self.freeform = []
Christoph Knote's avatar
Christoph Knote committed
        self.possible_keywords = [
            "PI_CONTACT_INFO",
            "PLATFORM",
            "LOCATION",
            "ASSOCIATED_DATA",
            "INSTRUMENT_INFO",
            "DATA_INFO",
            "UNCERTAINTY",
            "ULOD_FLAG",
            "ULOD_VALUE",
            "LLOD_FLAG",
            "LLOD_VALUE",
            "DM_CONTACT_INFO",
            "PROJECT_INFO",
            "STIPULATIONS_ON_USE",
            "OTHER_COMMENTS",
            "REVISION"
            ] + \
            [ "R{:d}".format(x) for x in range(9) ] + \
            [ "R{:s}".format(y) for y in string.ascii_uppercase ]
        self.keywords   = {}
Christoph Knote's avatar
Christoph Knote committed
        self.shortnames = []
        
        if not contents is []:
            self.ingest(contents)

Christoph Knote's avatar
Christoph Knote committed
class Container_1001(collections.UserList):
Christoph Knote's avatar
Christoph Knote committed
    def write(self, prnt=lambda x: sys.stdout.write(x)):
Christoph Knote's avatar
Christoph Knote committed
        def p(val, var):
            return var.miss if math.isnan(val) else val

        for i in range(len(self.IVAR)):
            prnt( [ p(self.IVAR[i],self.IVAR) ] + [ p(DVAR[i][1],DVAR) for DVAR in self.DVARS.values() ] )
Christoph Knote's avatar
Christoph Knote committed

    def extract_items(self, raw):
        for cur in range(len(raw)):
Christoph Knote's avatar
Christoph Knote committed
            self.IVAR.append(raw[cur][0])
            nul = [ self.DVARS[key].append(raw[cur][0], raw[cur][i+1]) for i, key in enumerate(self.DVARS) ]

    def __init__(self, raw=[], IVAR=None, IBVAR=None, AUXVARS=None, DVARS=None):
        self.IVAR     = IVAR
        self.IBVAR    = IBVAR
        self.AUXVARS  = AUXVARS
        self.DVARS    = DVARS
Christoph Knote's avatar
Christoph Knote committed
        #
        self.extract_items(raw)

Christoph Knote's avatar
Christoph Knote committed
class Container_2110(Container_1001):
Christoph Knote's avatar
Christoph Knote committed
    def write(self, prnt=lambda x: sys.stdout.write(x)):
Christoph Knote's avatar
Christoph Knote committed
        def p(val, var):
            return var.miss if math.isnan(val) else val

        for ival in self.IVAR:
            prnt( [ p(ival, self.IVAR) ] + [ p(auxval[1], AUXVAR) for AUXVAR in self.AUXVARS.values() for auxval in AUXVAR if auxval[0] == ival ] )
            for ibval in [ b[1] for b in self.IBVAR if b[0] == ival ]:
                prnt([ p(ibval, self.IBVAR) ] + [ p(dval[1], DVAR) for DVAR in self.DVARS.values() for dval in DVAR if (dval[0][0] == ival) and (dval[0][1] == ibval) ])
Christoph Knote's avatar
Christoph Knote committed
    
    def extract_items(self, raw):
        cur = 0
Christoph Knote's avatar
Christoph Knote committed
        num_var_name = list(self.AUXVARS.keys())[0]
Christoph Knote's avatar
Christoph Knote committed
        while cur < len(raw):
Christoph Knote's avatar
Christoph Knote committed
            self.IVAR.append(raw[cur][0])
            nul = [ self.AUXVARS[key].append(raw[cur][0], raw[cur][i+1]) for i, key in enumerate(self.AUXVARS) ]
            nprimary = int(self.AUXVARS[num_var_name][-1][1])
Christoph Knote's avatar
Christoph Knote committed
            for i in range(nprimary):
Christoph Knote's avatar
Christoph Knote committed
                self.IBVAR.append(raw[cur][0], raw[cur+i+1][0])
                nul = [ self.DVARS[key].append(raw[cur][0], raw[cur+i+1][0], raw[cur+i+1][j+1]) for j, key in enumerate(self.DVARS) ]
Christoph Knote's avatar
Christoph Knote committed
            cur += 1 + nprimary

Christoph Knote's avatar
Christoph Knote committed
class Variable(collections.UserList):
Christoph.Knote's avatar
Christoph.Knote committed
    '''
Christoph.Knote's avatar
Christoph.Knote committed
    A Variable is a ICARTT variable description with name, units, scale and missing value.
Christoph.Knote's avatar
Christoph.Knote committed
    '''
Christoph.Knote's avatar
sdf
Christoph.Knote committed
    @property
    def desc(self):
        '''
        Return variable description string as it appears in an ICARTT file
        '''
Christoph Knote's avatar
Christoph Knote committed
        return self.splitChar.join( [ self.name, self.units, self.longname ] )
Christoph Knote's avatar
Christoph Knote committed
    
Christoph Knote's avatar
Christoph Knote committed
    def append(self, *argv):
        '''
        Append data to a variable. Depending on type (independent, dependent variable),
        all identifying (bounded and unbounded) independent variables need to be given.
        
        Examples:
        
        - file type 1001, add value of independent variable:
        ivar.append(234.4)
        - file type 1001, add value of dependent variable:
        ivar.append(234.4, 18.2)
        
        - file type 2110, add value of independent (unbounded) variable:
        ivar.append(234.4)
        - file type 2110, add value of independent (bounded) variable:
        ivar.append(234.4, 9148.2)
        - file type 2110, add value of dependent variable:
        ivar.append(234.4, 9148.2, 34.2)
        '''
        sanitized = lambda z: float(z) if not float(z) == float(self.miss) else float('NaN')
        
        v = [ sanitized(y) for y in argv ]
        
        if len(v) > 2:
            # ( (a, b, ...), c )
            x = ( tuple( [ y for y in v[:-1] ] ), v[-1] )
        elif len(v) > 1:
            # ( a, b )
            x = ( ( v[0] ), v[1] )
        else:
            # ( a )
            x = ( v[0] )
        
        self.data.append( x )
Christoph Knote's avatar
Christoph Knote committed
    
Christoph Knote's avatar
Christoph Knote committed
    def __init__(self, name, units, longname, scale=1.0, miss=-99999.0, splitChar=","):
Christoph Knote's avatar
Christoph Knote committed
        self.name           = name
        self.units          = units
        self.longname       = longname
        self.scale          = scale
Christoph Knote's avatar
Christoph Knote committed
        self.miss           = miss
        
Christoph Knote's avatar
Christoph Knote committed
        self.splitChar      = splitChar
Christoph Knote's avatar
Christoph Knote committed
        
        self.data           = []
Christoph.Knote's avatar
sdf
Christoph.Knote committed

class Dataset:
Christoph.Knote's avatar
Christoph.Knote committed
    '''
    An ICARTT dataset that can be created from scratch or read from a file,
    manipulated, and then written to a file.
    '''
Christoph.Knote's avatar
sdf
Christoph.Knote committed
    @property
    def nheader(self):
Christoph.Knote's avatar
Christoph.Knote committed
        '''
        Header line count
        '''
Christoph Knote's avatar
Christoph Knote committed
        total = -1
        if self.format == 1001:
Christoph Knote's avatar
Christoph Knote committed
            total = 14 + len(self.DVARS) + len(self.SCOM) + len(self.NCOM)
Christoph.Knote's avatar
sdf
Christoph.Knote committed
        if self.format == 2110:
Christoph Knote's avatar
Christoph Knote committed
            # 2: IVAR + IBVAR
            total = 16 + 2 + len(self.AUXVARS) + len(self.DVARS) + len(self.SCOM) + len(self.NCOM)
Christoph.Knote's avatar
sdf
Christoph.Knote committed
        return total
    @property
Christoph Knote's avatar
Christoph Knote committed
    def VARS(self):
Christoph.Knote's avatar
Christoph.Knote committed
        '''
Christoph Knote's avatar
Christoph Knote committed
        Variables (independent + dependent + auxiliary)
Christoph.Knote's avatar
Christoph.Knote committed
        '''
Christoph Knote's avatar
Christoph Knote committed
        vars = { self.IVAR.name: self.IVAR, **self.DVARS }
Christoph Knote's avatar
Christoph Knote committed
        if self.format == 2110:
Christoph Knote's avatar
Christoph Knote committed
            vars = { self.IBVAR.name: self.IBVAR, **vars, **self.AUXVARS }
Christoph Knote's avatar
Christoph Knote committed
        return vars
Christoph.Knote's avatar
Christoph.Knote committed
    @property
    def varnames(self):
Christoph.Knote's avatar
Christoph.Knote committed
        '''
        Names of variables (independent and dependent)
        '''
Christoph Knote's avatar
Christoph Knote committed
        return [ x.name for x in self.VARS ]
Christoph.Knote's avatar
Christoph.Knote committed
    @property
    def times(self):
        '''
        Time steps of the data contained.
        '''
Christoph Knote's avatar
Christoph Knote committed
        return [ self.dateValid + datetime.timedelta(seconds=x) for x in self.IVAR ]
    
Christoph.Knote's avatar
Christoph.Knote committed
    def __getitem__(self, name):
        '''
Christoph Knote's avatar
Christoph Knote committed
        Shortcut to enable access to variable data by name
Christoph.Knote's avatar
Christoph.Knote committed
        '''
Christoph Knote's avatar
Christoph Knote committed
        return self.VARS[name]
    
Christoph Knote's avatar
Christoph Knote committed
    def write_header(self, f=sys.stdout):
Christoph.Knote's avatar
Christoph.Knote committed
        '''
Christoph Knote's avatar
Christoph Knote committed
        Write header to file handle <f>
Christoph.Knote's avatar
Christoph.Knote committed
        '''
Christoph.Knote's avatar
sdf
Christoph.Knote committed
        def prnt(txt):
            f.write(str(txt) + "\n")

        # Number of lines in header, file format index (most files use 1001) - comma delimited.
Christoph Knote's avatar
Christoph Knote committed
        txt = "{:d}, {:d}".format(self.nheader, self.format)
        if self.version is not None:
            txt = "{:d}, {:d}, {:s}".format(self.nheader, self.format, self.version) 
        prnt(txt)
Christoph.Knote's avatar
sdf
Christoph.Knote committed
        # PI last name, first name/initial.
        prnt(self.PI)
        # Organization/affiliation of PI.
        prnt(self.organization)
        # Data source description (e.g., instrument name, platform name, model name, etc.).
        prnt(self.dataSource)
        # Mission name (usually the mission acronym).
        prnt(self.mission)
        # File volume number, number of file volumes (these integer values are used when the data require more than one file per day; for data that require only one file these values are set to 1, 1) - comma delimited.
Christoph Knote's avatar
Christoph Knote committed
        prnt(self.splitChar.join([ str(self.volume), str(self.nvolumes) ]))
Christoph.Knote's avatar
sdf
Christoph.Knote committed
        # UTC date when data begin, UTC date of data reduction or revision - comma delimited (yyyy, mm, dd, yyyy, mm, dd).
Christoph Knote's avatar
Christoph Knote committed
        prnt(self.splitChar.join([ datetime.datetime.strftime(x, self.splitChar.join(["%Y","%m","%d"])) for x in [ self.dateValid, self.dateRevised ] ]))
Christoph.Knote's avatar
sdf
Christoph.Knote committed
        # Data Interval (This value describes the time spacing (in seconds) between consecutive data records. It is the (constant) interval between values of the independent variable. For 1 Hz data the data interval value is 1 and for 10 Hz data the value is 0.1. All intervals longer than 1 second must be reported as Start and Stop times, and the Data Interval value is set to 0. The Mid-point time is required when it is not at the average of Start and Stop times. For additional information see Section 2.5 below.).
Christoph Knote's avatar
Christoph Knote committed
        prnt(self.dataInterval)
Christoph.Knote's avatar
sdf
Christoph.Knote committed
        if self.format == 2110:
            # Description or name of independent (bound) variable (This is the name chosen for the start time. It always refers to the number of seconds UTC from the start of the day on which measurements began. It should be noted here that the independent variable should monotonically increase even when crossing over to a second day.).
            prnt(self.IBVAR.desc)
Christoph Knote's avatar
Christoph Knote committed
        # Description or name of independent variable (This is the name chosen for the start time. It always refers to the number of seconds UTC from the start of the day on which measurements began. It should be noted here that the independent variable should monotonically increase even when crossing over to a second day.).
        prnt(self.IVAR.desc)
Christoph.Knote's avatar
sdf
Christoph.Knote committed
        # Number of variables (Integer value showing the number of dependent variables: the total number of columns of data is this value plus one.).
Christoph Knote's avatar
Christoph Knote committed
        prnt(len(self.DVARS))
Christoph.Knote's avatar
sdf
Christoph.Knote committed
        # Scale factors (1 for most cases, except where grossly inconvenient) - comma delimited.
Christoph Knote's avatar
Christoph Knote committed
        prnt(self.splitChar.join( [ "{:.1g}".format(DVAR.scale) for DVAR in self.DVARS.values() ]))
Christoph.Knote's avatar
sdf
Christoph.Knote committed
        # Missing data indicators (This is -9999 (or -99999, etc.) for any missing data condition, except for the main time (independent) variable which is never missing) - comma delimited.
Christoph Knote's avatar
Christoph Knote committed
        prnt(self.splitChar.join( [ str(DVAR.miss) for DVAR in self.DVARS.values() ]))
Christoph.Knote's avatar
sdf
Christoph.Knote committed
        # Variable names and units (Short variable name and units are required, and optional long descriptive name, in that order, and separated by commas. If the variable is unitless, enter the keyword "none" for its units. Each short variable name and units (and optional long name) are entered on one line. The short variable name must correspond exactly to the name used for that variable as a column header, i.e., the last header line prior to start of data.).
Christoph Knote's avatar
Christoph Knote committed
        nul = [ prnt(DVAR.desc) for DVAR in self.DVARS.values() ]
Christoph.Knote's avatar
sdf
Christoph.Knote committed
        if self.format == 2110:
            # Number of variables (Integer value showing the number of dependent variables: the total number of columns of data is this value plus one.).
Christoph Knote's avatar
Christoph Knote committed
            prnt(len(self.AUXVARS))
Christoph.Knote's avatar
sdf
Christoph.Knote committed
            # Scale factors (1 for most cases, except where grossly inconvenient) - comma delimited.
Christoph Knote's avatar
Christoph Knote committed
            prnt(self.splitChar.join( [ "{:.1g}".format(AUXVAR.scale) for AUXVAR in self.AUXVARS.values() ]))
Christoph.Knote's avatar
sdf
Christoph.Knote committed
            # Missing data indicators (This is -9999 (or -99999, etc.) for any missing data condition, except for the main time (independent) variable which is never missing) - comma delimited.
Christoph Knote's avatar
Christoph Knote committed
            prnt(self.splitChar.join( [ str(AUXVAR.miss) for AUXVAR in self.AUXVARS.values() ]))
Christoph.Knote's avatar
sdf
Christoph.Knote committed
            # Variable names and units (Short variable name and units are required, and optional long descriptive name, in that order, and separated by commas. If the variable is unitless, enter the keyword "none" for its units. Each short variable name and units (and optional long name) are entered on one line. The short variable name must correspond exactly to the name used for that variable as a column header, i.e., the last header line prior to start of data.).
Christoph Knote's avatar
Christoph Knote committed
            nul = [ prnt(AUXVAR.desc) for AUXVAR in self.AUXVARS.values() ]
Christoph.Knote's avatar
sdf
Christoph.Knote committed

        # Number of SPECIAL comment lines (Integer value indicating the number of lines of special comments, NOT including this line.).
Christoph Knote's avatar
Christoph Knote committed
        prnt("{:d}".format(len(self.SCOM)))
Christoph.Knote's avatar
sdf
Christoph.Knote committed
        # Special comments (Notes of problems or special circumstances unique to this file. An example would be comments/problems associated with a particular flight.).
        nul = [ prnt(x) for x in self.SCOM ]
        # Number of Normal comments (i.e., number of additional lines of SUPPORTING information: Integer value indicating the number of lines of additional information, NOT including this line.).
Christoph Knote's avatar
Christoph Knote committed
        prnt("{:d}".format(len(self.NCOM)))
Christoph.Knote's avatar
sdf
Christoph.Knote committed
        # Normal comments (SUPPORTING information: This is the place for investigators to more completely describe the data and measurement parameters. The supporting information structure is described below as a list of key word: value pairs. Specifically include here information on the platform used, the geo-location of data, measurement technique, and data revision comments. Note the non-optional information regarding uncertainty, the upper limit of detection (ULOD) and the lower limit of detection (LLOD) for each measured variable. The ULOD and LLOD are the values, in the same units as the measurements that correspond to the flags -7777s and -8888s within the data, respectively. The last line of this section should contain all the short variable names on one line. The key words in this section are written in BOLD below and must appear in this section of the header along with the relevant data listed after the colon. For key words where information is not needed or applicable, simply enter N/A.).
        nul = [ prnt(x) for x in self.NCOM ]
Christoph Knote's avatar
Christoph Knote committed
    
    def write_data(self, f=sys.stdout):
        '''
        Write data to file handle <f>
        '''
Christoph Knote's avatar
Christoph Knote committed
        def prnt_data(vars):
Christoph Knote's avatar
Christoph Knote committed
            f.write( str(self.splitChar.join([ str(x) for x in vars ])) + "\n")
Christoph Knote's avatar
Christoph Knote committed
        
        nul = self.data.write(prnt=prnt_data)
Christoph Knote's avatar
Christoph Knote committed
    
    def write(self, f=sys.stdout):
        '''
        Write to file handle <f>
        '''
        self.write_header(f=f)
        self.write_data(f=f)
    
Christoph Knote's avatar
Christoph Knote committed
    def make_filename(self, date_format='%Y%m%d'):
        '''
        Create ICARTT-compliant file name based on the information contained in the dataset
        '''
Christoph Knote's avatar
Christoph Knote committed
        fn  = self.dataID + "_" +self.locationID + "_" +datetime.datetime.strftime(self.dateValid, date_format)
        fn += "_R" + str(self.revision) if not self.revision is None else ""
        fn += "_L" + str(self.launch)   if not self.launch is None   else ""
        fn += "_V" + str(self.volume)   if self.nvolumes > 1         else ""
        
        return fn + ".ict"
Christoph.Knote's avatar
Christoph.Knote committed

    def read_header(self):
        '''
        Read the ICARTT header (from file)
Christoph Knote's avatar
Christoph Knote committed
        '''        
        class Filehandle_with_linecounter:
            def __init__(self, f, splitChar):
                self.f         = f
                self.line      = 0
                self.splitChar = splitChar
            def readline(self, do_split=True):
                self.line += 1
                dmp = self.f.readline().replace('\n', '').replace('\r','')
                if do_split:
                    dmp = [ word.strip(' ') for word in dmp.split(self.splitChar) ]
                return dmp
        
Christoph.Knote's avatar
Christoph.Knote committed
        if self.input_fhandle.closed:
            self.input_fhandle = open(self.input_fhandle.name)
Christoph Knote's avatar
Christoph Knote committed
        
        f = Filehandle_with_linecounter(self.input_fhandle, self.splitChar)
        
Christoph.Knote's avatar
Christoph.Knote committed
        # line 1 - Number of lines in header, file format index (most files use
        # 1001) - comma delimited.
Christoph Knote's avatar
Christoph Knote committed
        dmp = f.readline()
Christoph Knote's avatar
Christoph Knote committed
        
        nheader_suggested = int(dmp[0])
Christoph Knote's avatar
Christoph Knote committed
        self.format = int(dmp[1])
        if len(dmp) > 2:
            self.version = dmp[2]
        
Christoph Knote's avatar
Christoph Knote committed
        if not self.format in IMPLEMENTED_FORMATS:
            raise ValueError("ICARTT format {:d} not implemented".format(self.format))
Christoph Knote's avatar
Christoph Knote committed
        
Christoph.Knote's avatar
Christoph.Knote committed
        # line 2 - PI last name, first name/initial.
Christoph Knote's avatar
Christoph Knote committed
        self.PI = f.readline(do_split=False)
Christoph Knote's avatar
Christoph Knote committed
        
Christoph.Knote's avatar
Christoph.Knote committed
        # line 3 - Organization/affiliation of PI.
Christoph Knote's avatar
Christoph Knote committed
        self.organization = f.readline(do_split=False)
Christoph Knote's avatar
Christoph Knote committed
        
Christoph.Knote's avatar
Christoph.Knote committed
        # line 4 - Data source description (e.g., instrument name, platform name,
        # model name, etc.).
Christoph Knote's avatar
Christoph Knote committed
        self.dataSource = f.readline(do_split=False)
Christoph Knote's avatar
Christoph Knote committed
        
Christoph.Knote's avatar
Christoph.Knote committed
        # line 5 - Mission name (usually the mission acronym).
Christoph Knote's avatar
Christoph Knote committed
        self.mission = f.readline(do_split=False)
Christoph.Knote's avatar
Christoph.Knote committed

        # line 6 - File volume number, number of file volumes (these integer values
        # are used when the data require more than one file per day; for data that
        # require only one file these values are set to 1, 1) - comma delimited.
Christoph Knote's avatar
Christoph Knote committed
        dmp = f.readline()
        self.volume   = int(dmp[0])
        self.nvolumes = int(dmp[1])
Christoph Knote's avatar
Christoph Knote committed
        
Christoph.Knote's avatar
Christoph.Knote committed
        # line 7 - UTC date when data begin, UTC date of data reduction or revision
        # - comma delimited (yyyy, mm, dd, yyyy, mm, dd).
Christoph Knote's avatar
Christoph Knote committed
        dmp = f.readline()
Christoph.Knote's avatar
Christoph.Knote committed
        self.dateValid   = datetime.datetime.strptime("".join([ "{:s}".format(x) for x in dmp[0:3] ]), '%Y%m%d')
        self.dateRevised = datetime.datetime.strptime("".join([ "{:s}".format(x) for x in dmp[3:6] ]), '%Y%m%d')

        # line 8 - Data Interval (This value describes the time spacing (in seconds)
        # between consecutive data records. It is the (constant) interval between
        # values of the independent variable. For 1 Hz data the data interval value
        # is 1 and for 10 Hz data the value is 0.1. All intervals longer than 1
        # second must be reported as Start and Stop times, and the Data Interval
        # value is set to 0. The Mid-point time is required when it is not at the
        # average of Start and Stop times. For additional information see Section
        # 2.5 below.).
Christoph Knote's avatar
Christoph Knote committed
        self.dataInterval = float(f.readline()[0])
        
Christoph.Knote's avatar
Christoph.Knote committed
        # line 9 - Description or name of independent variable (This is the name
        # chosen for the start time. It always refers to the number of seconds UTC
        # from the start of the day on which measurements began. It should be noted
        # here that the independent variable should monotonically increase even when
        # crossing over to a second day.
Christoph Knote's avatar
Christoph Knote committed
        if self.format == 2110:
            dmp = f.readline()
            self.IBVAR = Variable(dmp[0], dmp[1], dmp[2 if len(dmp) > 2 else 1], splitChar=self.splitChar)
Christoph Knote's avatar
Christoph Knote committed
        
Christoph Knote's avatar
Christoph Knote committed
        dmp = f.readline()
        self.IVAR = Variable(dmp[0], dmp[1], dmp[2 if len(dmp) > 2 else 1], splitChar=self.splitChar)
        
Christoph Knote's avatar
Christoph Knote committed
        def read_vars(f):
Christoph Knote's avatar
Christoph Knote committed
            # line 10 - Number of variables (Integer value showing the number of
            # dependent variables: the total number of columns of data is this value
            # plus one.).
Christoph Knote's avatar
Christoph Knote committed
            nvar = int(f.readline()[0])
Christoph Knote's avatar
Christoph Knote committed
    
            # line 11- Scale factors (1 for most cases, except where grossly
            # inconvenient) - comma delimited.
Christoph Knote's avatar
Christoph Knote committed
            vscale = [ float(x) for x in f.readline() ]
Christoph Knote's avatar
Christoph Knote committed
    
            # line 12 - Missing data indicators (This is -9999 (or -99999, etc.) for
            # any missing data condition, except for the main time (independent)
            # variable which is never missing) - comma delimited.
Christoph Knote's avatar
Christoph Knote committed
            vmiss = [ float(x) for x in f.readline() ]
Christoph Knote's avatar
Christoph Knote committed
            # no float casting here, as we need to do string comparison lateron when reading data...
    
            # line 13 - Variable names and units (Short variable name and units are
            # required, and optional long descriptive name, in that order, and separated
            # by commas. If the variable is unitless, enter the keyword "none" for its
            # units. Each short variable name and units (and optional long name) are
            # entered on one line. The short variable name must correspond exactly to
            # the name used for that variable as a column header, i.e., the last header
            # line prior to start of data.).
            dmp = f.readline()
Christoph Knote's avatar
Christoph Knote committed
            vname     = [ dmp[0] ]
            vunits    = [ dmp[1] ]
            vlongname = [ dmp[2 if len(dmp) > 2 else 1] ]
Christoph Knote's avatar
Christoph Knote committed
    
Christoph Knote's avatar
Christoph Knote committed
            for i in range(1, nvar):
                dmp = f.readline()
                vname     += [ dmp[0] ]
                vunits    += [ dmp[1] ]
                vlongname += [ dmp[2 if len(dmp) > 2 else 1] ]
            
            return { name: Variable(name, unit, longname, scale=scale, miss=miss, splitChar=self.splitChar) for name, unit, longname, scale, miss in zip(vname, vunits, vlongname, vscale, vmiss) }

        self.DVARS = read_vars(f)

        if self.format == 2110:
            self.AUXVARS = read_vars(f)
Christoph.Knote's avatar
Christoph.Knote committed

        # line 14 + nvar - Number of SPECIAL comment lines (Integer value
        # indicating the number of lines of special comments, NOT including this
        # line.).
Christoph Knote's avatar
Christoph Knote committed
        nscom = int(f.readline()[0])
Christoph.Knote's avatar
Christoph.Knote committed

        # line 15 + nvar - Special comments (Notes of problems or special
        # circumstances unique to this file. An example would be comments/problems
        # associated with a particular flight.).
Christoph Knote's avatar
Christoph Knote committed
        self.SCOM          = [ f.readline(do_split=False) for i in range(0, nscom) ]
Christoph.Knote's avatar
Christoph.Knote committed

        # line 16 + nvar + nscom - Number of Normal comments (i.e., number of
        # additional lines of SUPPORTING information: Integer value indicating the
        # number of lines of additional information, NOT including this line.).
Christoph Knote's avatar
Christoph Knote committed
        nncom = int(f.readline()[0])
Christoph.Knote's avatar
Christoph.Knote committed

        # line 17 + nvar + nscom - Normal comments (SUPPORTING information: This is
        # the place for investigators to more completely describe the data and
        # measurement parameters. The supporting information structure is described
        # below as a list of key word: value pairs. Specifically include here
        # information on the platform used, the geo-location of data, measurement
        # technique, and data revision comments. Note the non-optional information
        # regarding uncertainty, the upper limit of detection (ULOD) and the lower
        # limit of detection (LLOD) for each measured variable. The ULOD and LLOD
        # are the values, in the same units as the measurements that correspond to
        # the flags -7777's and -8888's within the data, respectively. The last line
        # of this section should contain all the "short" variable names on one line.
        # The key words in this section are written in BOLD below and must appear in
        # this section of the header along with the relevant data listed after the
        # colon. For key words where information is not needed or applicable, simply
        # enter N/A.).
Christoph Knote's avatar
Christoph Knote committed
        raw_ncom = [ f.readline(do_split=False) for i in range(0, nncom) ]
Christoph Knote's avatar
Christoph Knote committed
        self.NCOM         = StandardNormalComments(raw_ncom)
Christoph Knote's avatar
Christoph Knote committed
            
        self.nheader_file = f.line
        
        del f
Christoph.Knote's avatar
Christoph.Knote committed
        self.input_fhandle.close()
Christoph Knote's avatar
Christoph Knote committed
        
        if self.nheader != nheader_suggested:
            warnings.warn("Number of header lines suggested in line 1 ({:d}) do not match actual header lines read ({:d})".format(nheader_suggested, self.nheader))
Christoph.Knote's avatar
Christoph.Knote committed

    def read_data(self):
        '''
        Read ICARTT data (from file)
        '''
Christoph.Knote's avatar
Christoph.Knote committed
        if self.input_fhandle.closed:
            self.input_fhandle = open(self.input_fhandle.name)

Christoph Knote's avatar
Christoph Knote committed
        nul = [ self.input_fhandle.readline() for i in range(self.nheader_file) ]
Christoph.Knote's avatar
Christoph.Knote committed

Christoph Knote's avatar
Christoph Knote committed
        if   self.format == 1001:
Christoph Knote's avatar
Christoph Knote committed
            self.data = Container_1001([ line.split(self.splitChar) for line in self.input_fhandle ], IVAR=self.IVAR, DVARS=self.DVARS)
Christoph Knote's avatar
Christoph Knote committed
        elif self.format == 2110:
Christoph Knote's avatar
Christoph Knote committed
            self.data = Container_2110([ line.split(self.splitChar) for line in self.input_fhandle ], IVAR=self.IVAR, IBVAR=self.IBVAR, AUXVARS=self.AUXVARS, DVARS=self.DVARS)
Christoph Knote's avatar
Christoph Knote committed
        else:
            print("Unknown format")
        
Christoph.Knote's avatar
Christoph.Knote committed
        self.input_fhandle.close()

    def read(self):
Christoph.Knote's avatar
Christoph.Knote committed
        '''
        Read ICARTT data and header
        '''
Christoph.Knote's avatar
Christoph.Knote committed
        self.read_header()
        self.read_data()
Christoph Knote's avatar
Christoph Knote committed
        
    def __del__(self):
        try:
            if not self.input_fhandle.closed:
                self.input_fhandle.close()
        except:
            pass
        
    def __init__(self, f=None, loadData=True, splitChar=","):
Christoph.Knote's avatar
Christoph.Knote committed
        '''
        :param string/file f: file path or file object to use
        :param bool loadData: load data as well (or only header if False)?
        :param string splitChar: the splitting character used to separate fields in a line
        '''
Christoph.Knote's avatar
sdf
Christoph.Knote committed
        self.format       = 1001
Christoph Knote's avatar
Christoph Knote committed
        self.version      = None
Christoph.Knote's avatar
Christoph.Knote committed

        self.dataID       = 'dataID'
        self.locationID   = 'locationID'

Christoph Knote's avatar
Christoph Knote committed
        self.revision     = 0
        self.launch       = None
        self.volume       = 1
        self.nvolumes     = 1

Christoph.Knote's avatar
sdf
Christoph.Knote committed
        self.PI           = 'Mustermann, Martin'
        self.organization = 'Musterinstitut'
        self.dataSource   = 'Musterdatenprodukt'
        self.mission      = 'MUSTEREX'
        self.dateValid    = datetime.datetime.today()
        self.dateRevised  = datetime.datetime.today()
Christoph Knote's avatar
Christoph Knote committed
        self.IVAR         = Variable('Time_Start',
Christoph Knote's avatar
Christoph Knote committed
                                     'seconds_from_0_hours_on_valid_date',
Christoph.Knote's avatar
sdf
Christoph.Knote committed
                                     'seconds_from_0_hours_on_valid_date',
Christoph Knote's avatar
Christoph Knote committed
                                     scale=1.0, miss=-9999999, splitChar=splitChar)
Christoph Knote's avatar
Christoph Knote committed
        self.DVARS        = {
                            'Time_Stop':
                            Variable('Time_Stop',
Christoph.Knote's avatar
sdf
Christoph.Knote committed
                                     'seconds_from_0_hours_on_valid_date',
Christoph Knote's avatar
Christoph Knote committed
                                     'seconds_from_0_hours_on_valid_date',
                                     scale=1.0, miss=-9999999, splitChar=splitChar),
Christoph Knote's avatar
Christoph Knote committed
                            'Some_Variable':
                            Variable('Some_Variable',
Christoph Knote's avatar
Christoph Knote committed
                                     'ppbv',
Christoph.Knote's avatar
Christoph.Knote committed
                                     'ppbv',
Christoph Knote's avatar
Christoph Knote committed
                                     scale=1.0, miss=-9999999, splitChar=splitChar)
Christoph Knote's avatar
Christoph Knote committed
                            }
Christoph.Knote's avatar
sdf
Christoph.Knote committed
        self.SCOM         = []
        self.NCOM         = []
Christoph Knote's avatar
Christoph Knote committed
        
Christoph Knote's avatar
Christoph Knote committed
        self.data         = Container_1001([])
Christoph Knote's avatar
Christoph Knote committed
        
Christoph.Knote's avatar
sdf
Christoph.Knote committed
        # for 2210
        self.IBVAR        = None
        self.AUXVAR       = []
Christoph Knote's avatar
Christoph Knote committed
        
        self.splitChar    = splitChar
Christoph Knote's avatar
Christoph Knote committed
        
        # Standard v2.0 for normal comments requires all keywords present,
        # might not be the case - then reading data will fail
        self.nheader_file = -1
        
Christoph.Knote's avatar
Christoph.Knote committed
        # read data if f is not None
        if f is not None:
Christoph Knote's avatar
Christoph Knote committed
            if isinstance(f, str):
Christoph.Knote's avatar
Christoph.Knote committed
                self.input_fhandle = open(f, 'r')
            else:
                self.input_fhandle = f
Christoph Knote's avatar
Christoph Knote committed
            
Christoph.Knote's avatar
Christoph.Knote committed
            self.read_header()
            if loadData:
                self.read_data()