# based on http://www-air.larc.nasa.gov/missions/etc/ESDS-RFC-019-v1.1_0.pdf

# sanitize function
def sane_read_line(f, split_char=',', do_split=True):
    dmp = f.readline().replace('\n', '').replace('\r','')
    if do_split:
        dmp = [word.strip(' ') for word in dmp.split(split_char)]
    return dmp

def read_header(ict_file):

    f = open( ict_file, 'r' )

    out= { 'filename' : ict_file }

    # line 1 - Number of lines in header, file format index (most files use
    # 1001) - comma delimited.
    dmp = sane_read_line(f)
    out['nheader'] = int(dmp[0])
    out['fformat'] = int(dmp[1])

    # line 2 - PI last name, first name/initial.
    dmp = sane_read_line(f, do_split=False)
    out['PI'] = dmp

    # line 3 - Organization/affiliation of PI.
    dmp = sane_read_line(f, do_split=False)
    out['organization'] = dmp

    # line 4 - Data source description (e.g., instrument name, platform name,
    # model name, etc.).
    dmp = sane_read_line(f, do_split=False)
    out['instrument'] = dmp

    # line 5 - Mission name (usually the mission acronym).
    dmp = sane_read_line(f, do_split=False)
    out['mission'] = dmp

    # line 6 - File volume number, number of file volumes (these integer values
    # are used when the data require more than one file per day; for data that
    # require only one file these values are set to 1, 1) - comma delimited.
    dmp = sane_read_line(f)
    out['vol']  = int(dmp[0])
    out['nvol'] = int(dmp[1])

    # line 7 - UTC date when data begin, UTC date of data reduction or revision
    # - comma delimited (yyyy, mm, dd, yyyy, mm, dd).
    dmp = sane_read_line(f)
    out['date']         = [ int(x) for x in dmp[0:3] ]
    out['rev_date']     = [ int(x) for x in dmp[3:6] ]

    # line 8 - Data Interval (This value describes the time spacing (in seconds)
    # between consecutive data records. It is the (constant) interval between
    # values of the independent variable. For 1 Hz data the data interval value
    # is 1 and for 10 Hz data the value is 0.1. All intervals longer than 1
    # second must be reported as Start and Stop times, and the Data Interval
    # value is set to 0. The Mid-point time is required when it is not at the
    # average of Start and Stop times. For additional information see Section
    # 2.5 below.).
    dmp = sane_read_line(f)
    out['dt'] = int(dmp[0])

    # line 9 - Description or name of independent variable (This is the name
    # chosen for the start time. It always refers to the number of seconds UTC
    # from the start of the day on which measurements began. It should be noted
    # here that the independent variable should monotonically increase even when
    # crossing over to a second day.
    dmp = sane_read_line(f)
    out['ivar'] = dmp[0]
    out['ivar_units'] = dmp[1]

    # line 10 - Number of variables (Integer value showing the number of
    # dependent variables: the total number of columns of data is this value
    # plus one.).
    dmp = sane_read_line(f)
    out['ndvar'] = int(dmp[0])

    # line 11- Scale factors (1 for most cases, except where grossly
    # inconvenient) - comma delimited.
    dmp = sane_read_line(f)
    out['dvscale'] = [ float(x) for x in dmp ]

    # line 12 - Missing data indicators (This is -9999 (or -99999, etc.) for
    # any missing data condition, except for the main time (independent)
    # variable which is never missing) - comma delimited.
    dmp = sane_read_line(f)
    out['dvmiss'] = dmp

    # line 13 - Variable names and units (Short variable name and units are
    # required, and optional long descriptive name, in that order, and separated
    # by commas. If the variable is unitless, enter the keyword "none" for its
    # units. Each short variable name and units (and optional long name) are
    # entered on one line. The short variable name must correspond exactly to
    # the name used for that variable as a column header, i.e., the last header
    # line prior to start of data.).
    dmp = sane_read_line(f)
    out['dvar']          = [ dmp[0] ]
    out['dvar_units']    = [ dmp[1] ]

    for i in range(1, out['ndvar']):
        dmp = sane_read_line(f)
        out['dvar']          += [ dmp[0] ]
        out['dvar_units']    += [ dmp[1] ]

    # add-on: create list including the independent variable
    out['nvar']         = 1                     + out['ndvar']
    out['var']          = [ out['ivar'] ]       + out['dvar']
    out['var_units']    = [ out['ivar_units'] ] + out['dvar_units']
    out['vscale']       = [ 1.0 ]               + out['dvscale']
    out['vmiss']        = [ 'DONOTREPLACE' ]    + out['dvmiss']

    # line 14 + nvar - Number of SPECIAL comment lines (Integer value
    # indicating the number of lines of special comments, NOT including this
    # line.).
    dmp = sane_read_line(f)
    out['nscom'] = int(dmp[0])

    # line 15 + nvar - Special comments (Notes of problems or special
    # circumstances unique to this file. An example would be comments/problems
    # associated with a particular flight.).
    if out['nscom'] > 0:
        dmp = sane_read_line(f, do_split=False)
        out['scom']          = [ dmp ]
        for i in range(1, out['nscom']):
            dmp = sane_read_line(f, do_split=False)
            out['scom']          += [ dmp ]

    # line 16 + nvar + nscom - Number of Normal comments (i.e., number of
    # additional lines of SUPPORTING information: Integer value indicating the
    # number of lines of additional information, NOT including this line.).
    dmp = sane_read_line(f)
    out['ncom'] = int(dmp[0])

    # line 17 + nvar + nscom - Normal comments (SUPPORTING information: This is
    # the place for investigators to more completely describe the data and
    # measurement parameters. The supporting information structure is described
    # below as a list of key word: value pairs. Specifically include here
    # information on the platform used, the geo-location of data, measurement
    # technique, and data revision comments. Note the non-optional information
    # regarding uncertainty, the upper limit of detection (ULOD) and the lower
    # limit of detection (LLOD) for each measured variable. The ULOD and LLOD
    # are the values, in the same units as the measurements that correspond to
    # the flags -7777's and -8888's within the data, respectively. The last line
    # of this section should contain all the "short" variable names on one line.
    # The key words in this section are written in BOLD below and must appear in
    # this section of the header along with the relevant data listed after the
    # colon. For key words where information is not needed or applicable, simply
    # enter N/A.).
    if out['ncom'] > 0:
        dmp = sane_read_line(f, do_split=False)
        out['com']          = [ dmp ]
        for i in range(1, out['ncom']):
            dmp = sane_read_line(f, do_split=False)
            out['com']          += [ dmp ]

    f.close()

    return(out)

def read(ict_file):

    out = read_header(ict_file)

    f = open( ict_file, 'r' )

    nul = [f.readline() for i in xrange(out['nheader'])]

    def nan_miss_float(raw, vmiss):
        return [ float(x.replace(vmiss[i], 'NaN')) for i, x in enumerate(raw) ]

    out['data'] = [ nan_miss_float(line.split(','), out['vmiss']) for line in f ]

    f.close()

    return(out)

def read_first_and_last(ict_file):

    out = read_header(ict_file)

    f = open( ict_file, 'r' )

    nul = [f.readline() for i in xrange(out['nheader'])]

    def nan_miss_float(raw, vmiss):
        return [ float(x.replace(vmiss[i], 'NaN')) for i, x in enumerate(raw) ]

    out['data']  = [ nan_miss_float(f.readline().split(','), out['vmiss']) ]
    for line in f:
        pass
    last = line
    out['data'] += [ nan_miss_float(last.split(','), out['vmiss']) ]

    f.close()

    return(out)






