Skip to content
Snippets Groups Projects
Commit e8831b80 authored by Christoph Knote's avatar Christoph Knote
Browse files

Final touches on data model overhaul

parent 0ba20a71
No related branches found
No related tags found
No related merge requests found
......@@ -6,7 +6,7 @@ icartt
Documentation
=============
maintained at https://boxmodeling.meteo.physik.uni-muenchen.de/documentation
.. include:: docs/source/usage.rst
Contributing
============
......
......@@ -6,7 +6,7 @@
icartt
============================
icartt is an ICARTT file format reader and writer
icartt is an ICARTT file format reader and writer for Python
The ICARTT data format is described here: https://www-air.larc.nasa.gov/missions/etc/IcarttDataFormat.htm
......@@ -22,129 +22,7 @@ Installation
Example
------------
Using an existing dataset
^^^^^^^^^^^^^^^^^^^^^^^^^^
::
import icartt
# load a new dataset from an existing file
ict = icartt.Dataset('path/to/example.ict')
# list variable names
ict.varnames
# e.g. ['Fractional_Day', 'UTC', 'JDAY', 'INDEX', 'FLIGHT', 'LOCAL_SUN_TIME', ...
# get data for variable 'UTC':
ict['UTC']
# some metadata
ict.organization
ict.dataSource
ict.mission
# add data, depending of format and type of variable:
- file type 1001, add value of independent variable:
ivar.append(234.4)
- file type 1001, add value of dependent variable:
ivar.append(234.4, 18.2)
- file type 2110, add value of independent (unbounded) variable:
ivar.append(234.4)
- file type 2110, add value of independent (bounded) variable:
ivar.append(234.4, 9148.2)
- file type 2110, add value of dependent variable:
ivar.append(234.4, 9148.2, 34.2)
# write to (other) file:
with open('path/to/output.ict', 'wb') as f:
ict.write(f)
Creating a new dataset
^^^^^^^^^^^^^^^^^^^^^^^
::
import icartt
import datetime
ict = icartt.Dataset(format=icartt.Formats.FFI1001)
ict.PIName = 'Knote, Christoph'
ict.PIAffiliation = 'Faculty of Medicine, University Augsburg, Germany'
ict.dataSourceDescription = 'Example data'
ict.missionName = 'MBEES'
ict.dateOfCollection = datetime.datetime.today()
ict.dateOfRevision = datetime.datetime.today()
ict.dataIntervalCode = [ 0 ]
ict.independentVariable = icartt.Variable( 'Time_Start',
'seconds_from_0_hours_on_valid_date',
'Time_Start',
'Time_Start',
vartype=icartt.VariableType.IndependentVariable,
scale=1.0, miss=-9999999)
#ict.independentBoundedVariable = None
#ict.auxiliaryVariables = ...
ict.dependentVariables['Time_Stop'] = icartt.Variable( 'Time_Stop',
'seconds_from_0_hours_on_valid_date',
'Time_Stop',
'Time_Stop',
scale=1.0, miss=-9999999)
ict.dependentVariables['Payload'] = icartt.Variable( 'Payload',
'some_units',
'Payload',
'Payload',
scale=1.0, miss=-9999999)
ict.specialComments.append("Some comments on this dataset:")
ict.specialComments.append("They are just examples!")
ict.specialComments.append("Adapt as needed.")
ict.endDefineMode()
# Three ways to add data:
# 1) simple (single data line)
ict.data.add( Time_Start = 12.3, Time_Stop = 12.5, Payload = 23789423.2e5 )
# Let's check:
ict.write()
# Seems to have worked!
# 2) as dictionary (single data line)
ict.data.add( **{ 'Time_Start': 12.6, 'Time_Stop': 13.1, 'Payload': 324235644.1e5 } )
# (note, we are merely exploding the dictionary to resemble method 1)
# 3) as NumPy array (bulk)
import numpy as np
data = np.array( [ (13.4, 14.0, 2348925e5), (14.1, 14.9, 23425634e5) ] )
ict.data.addBulk( data )
# Note: you are responsible to ensure that the order of elements in a data line
# corresponds to variable listing below:
print( [ x for x in ict.variables ] )
# Note: for single lines, you still need to make it an array!
data = np.array( [ (15.4, 15.0, 52452495290e5) ] )
.. include:: usage.rst
API
----
......
Reading an existing dataset
^^^^^^^^^^^^^^^^^^^^^^^^^^
::
import icartt
# load a new dataset from an existing file
ict = icartt.Dataset('tests/examples/DC8-20160517.ict')
# list variable names
[ x for x in ict.variables ]
# get data for variable 'UTC' (shortcut):
ict.data['UTC']
# get all data as NumPy array:
ict.data.data
# read some metadata
ict.PIName
ict.PIAffiliation
ict.missionName
ict.dataSourceDescription
# some info on a variable
ict.variables['Alt_ft'].units
ict.variables['Alt_ft'].miss
Creating a new dataset
^^^^^^^^^^^^^^^^^^^^^^^
::
import icartt
import datetime
ict = icartt.Dataset(format=icartt.Formats.FFI1001)
ict.PIName = 'Knote, Christoph'
ict.PIAffiliation = 'Faculty of Medicine, University Augsburg, Germany'
ict.dataSourceDescription = 'Example data'
ict.missionName = 'MBEES'
ict.dateOfCollection = datetime.datetime.today()
ict.dateOfRevision = datetime.datetime.today()
ict.dataIntervalCode = [ 0 ]
ict.independentVariable = icartt.Variable( 'Time_Start',
'seconds_from_0_hours_on_valid_date',
'Time_Start',
'Time_Start',
vartype=icartt.VariableType.IndependentVariable,
scale=1.0, miss=-9999999)
ict.dependentVariables['Time_Stop'] = icartt.Variable( 'Time_Stop',
'seconds_from_0_hours_on_valid_date',
'Time_Stop',
'Time_Stop',
scale=1.0, miss=-9999999)
ict.dependentVariables['Payload'] = icartt.Variable( 'Payload',
'some_units',
'Payload',
'Payload',
scale=1.0, miss=-9999999)
ict.specialComments.append("Some comments on this dataset:")
ict.specialComments.append("They are just examples!")
ict.specialComments.append("Adapt as needed.")
ict.endDefineMode()
# Three ways to add data:
# 1) simple (single data line)
ict.data.add( Time_Start = 12.3, Time_Stop = 12.5, Payload = 23789423.2e5 )
# Let's check:
ict.write()
# 2) as dictionary (single data line)
mydict = { 'Time_Start': 12.6, 'Time_Stop': 13.1, 'Payload': 324235644.1e5 }
ict.data.add( **mydict )
# (note, exploding the dictionary is necessary)
# 3) as NumPy array (bulk)
import numpy as np
data = np.array( [ (13.4, 14.0, 2348925e5), (14.1, 14.9, 23425634e5) ] )
ict.data.addBulk( data )
# Note 1: you are responsible to ensure that the order of elements in a data line
# corresponds to variable listing below:
print( [ x for x in ict.variables ] )
# Note 2: for single lines, you still need to make it an array!
data = np.array( [ (15.4, 15.0, 52452495290e5) ] )
ict.data.addBulk( data )
# Now write to file:
with open('path/to/output.ict', 'w') as f:
ict.write(f=f)
This diff is collapsed.
import unittest
import os
import io
import datetime
import numpy as np
import icartt
def compare_files(fn, str_in, str_out, skiplines=0, nlines=-1): # pragma: no cover
str_out.seek(0)
str_in.seek(0)
input = str_in.readlines()
output = str_out.readlines()
str_in.close()
str_out.close()
def compareFiles(fn, strIn, strOut, skiplines=0, nlines=-1): # pragma: no cover
strOut.seek(0)
strIn.seek(0)
input = strIn.readlines()
output = strOut.readlines()
strIn.close()
strOut.close()
if nlines > 0:
input = input[skiplines:(skiplines+nlines)]
......@@ -59,149 +61,200 @@ def compare_files(fn, str_in, str_out, skiplines=0, nlines=-1): # pragma: no cov
class Simple1001TestCase(unittest.TestCase):
def setUp(self):
self.fn = 'tests/examples/NOx_RHBrown_20040830_R0.ict'
self.nheader = 41
self.nHeader = 41
def tearDown(self):
a = 1
def test_open(self):
def testOpen(self):
ict = icartt.Dataset(self.fn, loadData=False)
self.assertEqual( type(ict), icartt.Dataset )
def test_format(self):
def testFormat(self):
ict = icartt.Dataset(self.fn, loadData=False)
self.assertEqual( ict.format, 1001 )
self.assertEqual( ict.format, icartt.Formats.FFI1001 )
def test_n(self):
def testN(self):
ict = icartt.Dataset(self.fn, loadData=False)
self.assertEqual( ict.nheader, self.nheader )
self.assertEqual( len(ict.DVARS), 9 )
self.assertEqual( len(ict.NCOM), 18 )
self.assertEqual( len(ict.SCOM), 0 )
self.assertEqual( ict.nHeader, self.nHeader )
self.assertEqual( len(ict.dependentVariables), 9 )
self.assertEqual( len(ict.normalComments), 18 )
self.assertEqual( len(ict.specialComments), 0 )
def test_ivar(self):
def testIvar(self):
ict = icartt.Dataset(self.fn, loadData=False)
self.assertEqual( ict.IVAR.shortname, "Start_UTC" )
self.assertEqual( ict.IVAR.units, "seconds" )
self.assertEqual( ict.IVAR.standardname, "number_of_seconds_from_0000_UTC" )
self.assertEqual( ict.IVAR.longname, None )
self.assertEqual( ict.IVAR.scale, 1.0 )
self.assertEqual( ict.IVAR.miss, -99999.0 )
def test_dvar(self):
self.assertEqual( ict.independentVariable.shortname, "Start_UTC" )
self.assertEqual( ict.independentVariable.units, "seconds" )
self.assertEqual( ict.independentVariable.standardname, "number_of_seconds_from_0000_UTC" )
self.assertEqual( ict.independentVariable.longname, None )
self.assertEqual( ict.independentVariable.scale, 1.0 )
self.assertEqual( ict.independentVariable.miss, -99999.0 )
def testDvar(self):
ict = icartt.Dataset(self.fn, loadData=False)
self.assertEqual( [ DVAR.shortname for DVAR in ict.DVARS.values() ],
self.assertEqual( [ DVAR.shortname for DVAR in ict.dependentVariables.values() ],
[ "Stop_UTC", "Mid_UTC", "DLat", "DLon", "Elev", "NO_ppbv", "NO_1sig", "NO2_ppbv", "NO2_1sig" ] )
self.assertEqual( [ DVAR.units for DVAR in ict.DVARS.values() ],
self.assertEqual( [ DVAR.units for DVAR in ict.dependentVariables.values() ],
[ "seconds", "seconds", "deg_N", "deg_E", "meters", "ppbv", "ppbv", "ppbv", "ppbv" ] )
self.assertEqual( [ DVAR.standardname for DVAR in ict.DVARS.values() ],
self.assertEqual( [ DVAR.standardname for DVAR in ict.dependentVariables.values() ],
[ None, None, None, None, None, None, None, None, None])
self.assertEqual( [ DVAR.longname for DVAR in ict.DVARS.values() ],
self.assertEqual( [ DVAR.longname for DVAR in ict.dependentVariables.values() ],
[ None, None, None, None, None, None, None, None, None])
self.assertEqual( [ DVAR.scale for DVAR in ict.DVARS.values() ],
[ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ] )
self.assertEqual( [ DVAR.scale for DVAR in ict.dependentVariables.values() ],
[ '1', '1', '1', '1', '1', '1', '1', '1', '1' ] )
self.assertEqual( [ DVAR.miss for DVAR in ict.DVARS.values() ],
[ -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0, -9999.0 ] )
self.assertEqual( [ DVAR.miss for DVAR in ict.dependentVariables.values() ],
[ '-9999', '-9999', '-9999', '-9999', '-9999', '-9999', '-9999', '-9999', '-9999' ] )
def test_NCOM(self):
def testNCOM(self):
ict = icartt.Dataset(self.fn, loadData=False)
self.assertEqual( ict.NCOM.keywords['PI_CONTACT_INFO'].data, [ "325 Broadway, Boulder, CO 80305; 303-497-3226; email:eric.j.williams@noaa.gov" ])
self.assertEqual( ict.NCOM.keywords['PLATFORM'].data, [ "NOAA research vessel Ronald H. Brown" ])
self.assertEqual( ict.NCOM.keywords['LOCATION'].data, [ "Latitude, longitude and elevation data are included in the data records" ])
self.assertEqual( ict.NCOM.keywords['ASSOCIATED_DATA'].data, [ "N/A" ])
self.assertEqual( ict.NCOM.keywords['INSTRUMENT_INFO'].data, [ "NO: chemiluminescence; NO2: narrow-band photolysis/chemiluminescence" ])
self.assertEqual( ict.NCOM.keywords['DATA_INFO'].data, [ "All data with the exception of the location data are in ppbv. All oneminute averages contain at least 35 seconds of data, otherwise missing." ])
self.assertEqual( ict.NCOM.keywords['UNCERTAINTY'].data, [ "included in the data records as variables with a _1sig suffix" ])
self.assertEqual( ict.NCOM.keywords['ULOD_FLAG'].data, [ "-7777" ])
self.assertEqual( ict.NCOM.keywords['ULOD_VALUE'].data, [ "N/A" ])
self.assertEqual( ict.NCOM.keywords['LLOD_FLAG'].data, [ "-8888" ])
self.assertEqual( ict.NCOM.keywords['LLOD_VALUE'].data, [ "N/A, N/A, N/A, N/A, N/A, 0.005, N/A, 0.025, N/A" ])
self.assertEqual( ict.NCOM.keywords['DM_CONTACT_INFO'].data, [ "N/A" ])
self.assertEqual( ict.NCOM.keywords['PROJECT_INFO'].data, [ "ICARTT study; 1 July-15 August 2004; Gulf of Maine and North Atlantic Ocean" ])
self.assertEqual( ict.NCOM.keywords['STIPULATIONS_ON_USE'].data, [ "Use of these data requires PRIOR OK from the PI" ])
self.assertEqual( ict.NCOM.keywords['OTHER_COMMENTS'].data, [ "N/A" ])
def test_read_data(self):
self.assertEqual( ict.normalComments.keywords['PI_CONTACT_INFO'].data, [ "325 Broadway, Boulder, CO 80305; 303-497-3226; email:eric.j.williams@noaa.gov" ])
self.assertEqual( ict.normalComments.keywords['PLATFORM'].data, [ "NOAA research vessel Ronald H. Brown" ])
self.assertEqual( ict.normalComments.keywords['LOCATION'].data, [ "Latitude, longitude and elevation data are included in the data records" ])
self.assertEqual( ict.normalComments.keywords['ASSOCIATED_DATA'].data, [ "N/A" ])
self.assertEqual( ict.normalComments.keywords['INSTRUMENT_INFO'].data, [ "NO: chemiluminescence; NO2: narrow-band photolysis/chemiluminescence" ])
self.assertEqual( ict.normalComments.keywords['DATA_INFO'].data, [ "All data with the exception of the location data are in ppbv. All oneminute averages contain at least 35 seconds of data, otherwise missing." ])
self.assertEqual( ict.normalComments.keywords['UNCERTAINTY'].data, [ "included in the data records as variables with a _1sig suffix" ])
self.assertEqual( ict.normalComments.keywords['ULOD_FLAG'].data, [ "-7777" ])
self.assertEqual( ict.normalComments.keywords['ULOD_VALUE'].data, [ "N/A" ])
self.assertEqual( ict.normalComments.keywords['LLOD_FLAG'].data, [ "-8888" ])
self.assertEqual( ict.normalComments.keywords['LLOD_VALUE'].data, [ "N/A, N/A, N/A, N/A, N/A, 0.005, N/A, 0.025, N/A" ])
self.assertEqual( ict.normalComments.keywords['DM_CONTACT_INFO'].data, [ "N/A" ])
self.assertEqual( ict.normalComments.keywords['PROJECT_INFO'].data, [ "ICARTT study; 1 July-15 August 2004; Gulf of Maine and North Atlantic Ocean" ])
self.assertEqual( ict.normalComments.keywords['STIPULATIONS_ON_USE'].data, [ "Use of these data requires PRIOR OK from the PI" ])
self.assertEqual( ict.normalComments.keywords['OTHER_COMMENTS'].data, [ "N/A" ])
def testReadData(self):
ict = icartt.Dataset(self.fn, loadData=True)
self.assertEqual( type(ict), icartt.Dataset )
def test_write_header(self):
def testWriteHeader(self):
ict = icartt.Dataset(self.fn, loadData=False)
str_in = open(self.fn)
str_out = io.StringIO()
strIn = open(self.fn)
strOut = io.StringIO()
ict.write_header(str_out)
ict.writeHeader(f=strOut)
self.assertTrue( compare_files(self.fn, str_in, str_out, nlines=self.nheader) )
self.assertTrue( compareFiles(self.fn, strIn, strOut, nlines=self.nHeader) )
def test_write_data(self):
def testWriteData(self):
ict = icartt.Dataset(self.fn, loadData=True)
str_in = open(self.fn)
str_out = io.StringIO()
strIn = open(self.fn)
strOut = io.StringIO()
ict.write(str_out)
ict.write(f=strOut)
self.assertTrue( compare_files(self.fn, str_in, str_out, skiplines=self.nheader) )
self.assertTrue( compareFiles(self.fn, strIn, strOut, skiplines=self.nHeader) )
def test_write(self):
def testWrite(self):
ict = icartt.Dataset(self.fn, loadData=True)
str_in = open(self.fn)
str_out = io.StringIO()
strIn = open(self.fn)
strOut = io.StringIO()
ict.write(str_out)
ict.write(f=strOut)
self.assertTrue( compareFiles(self.fn, strIn, strOut) )
class Create1001TestCase(unittest.TestCase):
def testCreateDs(self):
ict = icartt.Dataset(format=icartt.Formats.FFI1001)
ict.PIName = 'Knote, Christoph'
ict.PIAffiliation = 'Faculty of Medicine, University Augsburg, Germany'
ict.dataSourceDescription = 'Example data'
ict.missionName = 'MBEES'
ict.dateOfCollection = datetime.datetime.today()
ict.dateOfRevision = datetime.datetime.today()
ict.dataIntervalCode = [ 0 ]
ict.independentVariable = icartt.Variable( 'Time_Start',
'seconds_from_0_hours_on_valid_date',
'Time_Start',
'Time_Start',
vartype=icartt.VariableType.IndependentVariable,
scale=1.0, miss=-9999999)
ict.dependentVariables['Time_Stop'] = icartt.Variable( 'Time_Stop',
'seconds_from_0_hours_on_valid_date',
'Time_Stop',
'Time_Stop',
scale=1.0, miss=-9999999)
ict.dependentVariables['Payload'] = icartt.Variable( 'Payload',
'some_units',
'Payload',
'Payload',
scale=1.0, miss=-9999999)
ict.specialComments.append("Some comments on this dataset:")
ict.specialComments.append("They are just examples!")
ict.specialComments.append("Adapt as needed.")
ict.endDefineMode()
ict.data.add( Time_Start = 12.3, Time_Stop = 12.5, Payload = 23789423.2e5 )
mydict = { 'Time_Start': 12.6, 'Time_Stop': 13.1, 'Payload': 324235644.1e5 }
ict.data.add( **mydict )
data = np.array( [ (13.4, 14.0, 2348925e5), (14.1, 14.9, 23425634e5) ] )
ict.data.addBulk( data )
strOut = io.StringIO()
self.assertTrue( compare_files(self.fn, str_in, str_out) )
ict.write(f=strOut)
return True
fns = [ os.path.join("tests", "examples", fn) for fn in os.listdir(os.path.join("tests", "examples")) if fn.endswith(".ict")]
#fns = [ "tests/examples/AROTAL-RAY_DC8_20040715_R1.ict" ]
class BulkIOTestCase(unittest.TestCase):
def test_open(self):
def testOpen(self):
for fn in fns:
with self.subTest(msg="Opening test file {:s}".format(fn)):
ict = icartt.Dataset(fn, loadData=False)
self.assertEqual( type(ict), icartt.Dataset )
def test_read_data(self):
def testReadData(self):
for fn in fns:
with self.subTest(msg="Reading data from test file {:s}".format(fn)):
ict = icartt.Dataset(fn, loadData=True)
self.assertEqual( type(ict), icartt.Dataset )
def test_write_header(self):
def testWriteHeader(self):
for fn in fns:
with self.subTest(msg="Writing header for test file {:s}".format(fn)):
ict = icartt.Dataset(fn, loadData=False)
str_in = open(fn)
str_out = io.StringIO()
strIn = open(fn)
strOut = io.StringIO()
ict.write_header(str_out)
ict.writeHeader(f=strOut)
self.assertTrue( compare_files(fn, str_in, str_out, nlines=ict.nheader) )
self.assertTrue( compareFiles(fn, strIn, strOut, nlines=ict.nHeader) )
def test_write(self):
def testWrite(self):
for fn in fns:
with self.subTest(msg="Writing data for test file {:s}".format(fn)):
ict = icartt.Dataset(fn, loadData=True)
str_in = open(fn)
str_out = io.StringIO()
strIn = open(fn)
strOut = io.StringIO()
ict.write(str_out)
ict.write(f=strOut)
self.assertTrue( compare_files(fn, str_in, str_out) )
self.assertTrue( compareFiles(fn, strIn, strOut) )
if __name__ == '__main__': # pragma: no cover
unittest.main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment