Skip to content
Commits on Source (14)
...@@ -16,7 +16,7 @@ make your changes and then [submit a merge request](https://mbees.med.uni-augsbu ...@@ -16,7 +16,7 @@ make your changes and then [submit a merge request](https://mbees.med.uni-augsbu
## Installation of the development version ## Installation of the development version
Clone this repository / or your fork and install as "editable": Clone this repository / or your fork, then install e.g. as "editable":
``` ```
git clone https://mbees.med.uni-augsburg.de/gitlab/mbees/icartt_pypackage.git or <URL of your fork> git clone https://mbees.med.uni-augsburg.de/gitlab/mbees/icartt_pypackage.git or <URL of your fork>
...@@ -24,9 +24,11 @@ cd icartt_pypackage ...@@ -24,9 +24,11 @@ cd icartt_pypackage
pip install -e . pip install -e .
``` ```
Note: the package is managed with [poetry](https://python-poetry.org/).
# Changelog # Changelog
## 2.0.0 (2022-02-x) ## 2.0.0 (2022-04-x)
- Compatible with ICARTT v2 standard - Compatible with ICARTT v2 standard
- Formats 1001 and 2110 - Formats 1001 and 2110
......
[tool.poetry] [tool.poetry]
name = "icartt" name = "icartt"
version = "1.9.1" version = "2.0.0-rc1"
description = "ICARTT format reader and writer" description = "ICARTT format reader and writer"
license = "GPL-3.0-or-later" license = "GPL-3.0-or-later"
authors = ["Christoph Knote <christoph.knote@med.uni-augsburg.de>"] authors = ["Christoph Knote <christoph.knote@med.uni-augsburg.de>"]
readme = "README.md" readme = "README.md"
homepage = "https://mbees.med.uni-augsburg.de/" homepage = "https://mbees.med.uni-augsburg.de/"
repository = "https://mbees.med.uni-augsburg.de/gitlab/mbees/icartt_pypackage" repository = "https://mbees.med.uni-augsburg.de/gitlab/mbees/icartt_pypackage"
keywords = [ "atmosphere", "file format", "ames", "nasa" ] keywords = [ "atmosphere", "file format", "icartt", "ames", "nasa" ]
classifiers = [ classifiers = [
"Programming Language :: Python :: 3", "Programming Language :: Python :: 3",
"Development Status :: 5 - Production/Stable", "Development Status :: 5 - Production/Stable",
......
...@@ -8,6 +8,8 @@ from enum import IntEnum ...@@ -8,6 +8,8 @@ from enum import IntEnum
import numpy as np import numpy as np
from . import ictutils as utl
DEFAULT_NUM_FORMAT = "%g" DEFAULT_NUM_FORMAT = "%g"
"""Default number format for output. Provides the `fmt` parameter of :func:`numpy.savetxt` internally.""" """Default number format for output. Provides the `fmt` parameter of :func:`numpy.savetxt` internally."""
...@@ -317,6 +319,9 @@ class StandardNormalComments(collections.UserList): ...@@ -317,6 +319,9 @@ class StandardNormalComments(collections.UserList):
self.keywords["UNCERTAINTY"].naAllowed = False self.keywords["UNCERTAINTY"].naAllowed = False
self.keywords["REVISION"].naAllowed = False self.keywords["REVISION"].naAllowed = False
def __str__(self):
return "\n".join(f"{str(v)}" for v in self.keywords.values())
class Variable: class Variable:
"""An ICARTT variable description with name, units, scale and missing value.""" """An ICARTT variable description with name, units, scale and missing value."""
...@@ -337,21 +342,19 @@ class Variable: ...@@ -337,21 +342,19 @@ class Variable:
descstr += [str(self.longname)] descstr += [str(self.longname)]
return delimiter.join(descstr) return delimiter.join(descstr)
def isValidVariablename(self, name): # TODO: this could be a 'utils' function def isValidVariablename(self, name):
# ICARTT Standard v2 2.1.1 2) # ICARTT Standard v2 2.1.1 2)
# Variable short names and variable standard names: # Variable short names and variable standard names:
# Uppercase and lowercase ASCII alphanumeric characters # Uppercase and lowercase ASCII alphanumeric characters
# and underscores. # and underscores.
def isAsciiAlphaOrUnderscore(x): # TODO: this could be a 'utils' function
return re.match("[a-zA-Z0-9_]", x)
allAreAlphaOrUnderscore = all(isAsciiAlphaOrUnderscore(x) for x in name) allAreAlphaOrUnderscore = all(re.match("[a-zA-Z0-9_]", c) for c in name)
# The first character must be a letter, # The first character must be a letter,
firstIsAlpha = bool(re.match("[a-zA-Z]", name[0])) firstIsAlpha = bool(re.match("[a-zA-Z]", name[0]))
# and the name can be at most 31 characters in length. # and the name can be at most 31 characters in length.
lessThan31Chars = len(name) <= 31 le31Chars = len(name) <= 31
return allAreAlphaOrUnderscore and firstIsAlpha and lessThan31Chars return allAreAlphaOrUnderscore and firstIsAlpha and le31Chars
def __init__( def __init__(
self, self,
...@@ -399,8 +402,7 @@ class Variable: ...@@ -399,8 +402,7 @@ class Variable:
self.miss = miss self.miss = miss
def __repr__(self): def __repr__(self):
# TODO: this sould be something else than __str__ ? return f"[{self.units}], {self.vartype.name}"
return self.desc()
def __str__(self): def __str__(self):
return self.desc() return self.desc()
...@@ -449,7 +451,7 @@ class Dataset: ...@@ -449,7 +451,7 @@ class Dataset:
if self.defineMode: if self.defineMode:
return np.datetime64("NaT") return np.datetime64("NaT")
# for 1001, its an array, for 2110 a dict # for 1001 it's an array, for 2110 a dict
if not isinstance(self.data.data, (np.ndarray, dict)): if not isinstance(self.data.data, (np.ndarray, dict)):
return np.datetime64("NaT") return np.datetime64("NaT")
...@@ -499,25 +501,10 @@ class Dataset: ...@@ -499,25 +501,10 @@ class Dataset:
:param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM :param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM
:type delimiter: str, optional :type delimiter: str, optional
""" """
class FilehandleWithLinecounter: # TODO: this could be a 'utils' class
def __init__(self, f, delimiter):
self.f = f
self.line = 0
self.delimiter = delimiter
def readline(self, doSplit=True):
self.line += 1
dmp = self.f.readline().replace("\n", "").replace("\r", "")
if doSplit:
dmp = [word.strip(" ") for word in dmp.split(self.delimiter)]
return dmp
if self.inputFhandle: if self.inputFhandle:
if self.inputFhandle.closed: if self.inputFhandle.closed:
self.inputFhandle = open(self.inputFhandle.name, encoding="utf-8") self.inputFhandle = open(self.inputFhandle.name, encoding="utf-8")
f = utl.FilehandleWithLinecounter(self.inputFhandle, delimiter)
f = FilehandleWithLinecounter(self.inputFhandle, delimiter)
self._readHeader(f) self._readHeader(f)
self.inputFhandle.close() self.inputFhandle.close()
...@@ -580,18 +567,9 @@ class Dataset: ...@@ -580,18 +567,9 @@ class Dataset:
# here that the independent variable should monotonically increase even when # here that the independent variable should monotonically increase even when
# crossing over to a second day. # crossing over to a second day.
def extractVardesc(dmp): # TODO: could be a 'utils' function or one line,
shortname = dmp[
0
] # shortname, units, standardname, longname, *_ = dmp + [None] * 3
units = dmp[1]
standardname = dmp[2] if len(dmp) > 2 else None
longname = dmp[3] if len(dmp) > 3 else None
return shortname, units, standardname, longname
if self.format == Formats.FFI2110: if self.format == Formats.FFI2110:
dmp = f.readline() dmp = f.readline()
shortname, units, standardname, longname = extractVardesc(dmp) shortname, units, standardname, longname = utl.extractVardesc(dmp)
self.independentBoundedVariable = Variable( self.independentBoundedVariable = Variable(
shortname, shortname,
units, units,
...@@ -601,7 +579,7 @@ class Dataset: ...@@ -601,7 +579,7 @@ class Dataset:
) )
dmp = f.readline() dmp = f.readline()
shortname, units, standardname, longname = extractVardesc(dmp) shortname, units, standardname, longname = utl.extractVardesc(dmp)
self.independentVariable = Variable( self.independentVariable = Variable(
shortname, shortname,
units, units,
...@@ -634,7 +612,7 @@ class Dataset: ...@@ -634,7 +612,7 @@ class Dataset:
# the name used for that variable as a column header, i.e., the last header # the name used for that variable as a column header, i.e., the last header
# line prior to start of data.). # line prior to start of data.).
dmp = f.readline() dmp = f.readline()
shortname, units, standardname, longname = extractVardesc(dmp) shortname, units, standardname, longname = utl.extractVardesc(dmp)
vshortname = [shortname] vshortname = [shortname]
vunits = [units] vunits = [units]
vstandardname = [standardname] vstandardname = [standardname]
...@@ -642,7 +620,7 @@ class Dataset: ...@@ -642,7 +620,7 @@ class Dataset:
for _ in range(1, nvar): for _ in range(1, nvar):
dmp = f.readline() dmp = f.readline()
shortname, units, standardname, longname = extractVardesc(dmp) shortname, units, standardname, longname = utl.extractVardesc(dmp)
vshortname += [shortname] vshortname += [shortname]
vunits += [units] vunits += [units]
vstandardname += [standardname] vstandardname += [standardname]
...@@ -702,8 +680,14 @@ class Dataset: ...@@ -702,8 +680,14 @@ class Dataset:
rawNcom = [f.readline(doSplit=False) for _ in range(nncom)] rawNcom = [f.readline(doSplit=False) for _ in range(nncom)]
self.normalComments.ingest(rawNcom) self.normalComments.ingest(rawNcom)
r = self.normalComments.keywords["REVISION"].data
r = "0" if not r else r[0].strip("R")
self.revision = r
self.nHeaderFile = f.line self.nHeaderFile = f.line
# TODO this warning might be missleading since it assumes all normalComment keywords
# had been defined - which is not guaranteed.
if self.nHeader != nHeaderSuggested: if self.nHeader != nHeaderSuggested:
warnings.warn( warnings.warn(
f"Number of header lines suggested in line 1 ({int(nHeaderSuggested)}) do not match actual header lines read ({int(self.nHeader)})" f"Number of header lines suggested in line 1 ({int(nHeaderSuggested)}) do not match actual header lines read ({int(self.nHeader)})"
...@@ -763,7 +747,7 @@ class Dataset: ...@@ -763,7 +747,7 @@ class Dataset:
return fn + ".ict" return fn + ".ict"
def isValidFileName(self, name): # TODO: this could be a 'utils' function def isValidFileName(self, name):
"""test whether file name complies with ICARTT standard: """test whether file name complies with ICARTT standard:
ICARTT standard v2 2.1.1 3) ICARTT standard v2 2.1.1 3)
...@@ -776,11 +760,7 @@ class Dataset: ...@@ -776,11 +760,7 @@ class Dataset:
:return: is file name valid according to ICARTT standard? :return: is file name valid according to ICARTT standard?
:rtype: bool :rtype: bool
""" """
allAsciiAlpha = all(re.match("[a-zA-Z0-9-_.]", c) for c in name)
def isAsciiAlpha(x): # TODO: this could be a 'utils' function
return re.match("[a-zA-Z0-9-_.]", x)
allAsciiAlpha = all(isAsciiAlpha(x) for x in name)
lessThan128Characters = len(name) < 128 lessThan128Characters = len(name) < 128
return allAsciiAlpha and lessThan128Characters and name.endswith(".ict") return allAsciiAlpha and lessThan128Characters and name.endswith(".ict")
...@@ -952,12 +932,18 @@ class Dataset: ...@@ -952,12 +932,18 @@ class Dataset:
if not self.inputFhandle.closed: if not self.inputFhandle.closed:
self.inputFhandle.close() self.inputFhandle.close()
def __repr__(self):
# TODO: this could be more meaningful
return "icartt.Dataset()"
def __str__(self): def __str__(self):
return f"ICARTT Dataset {self.makeFileName()}" s = [
f"ICARTT Dataset {self.makeFileName()}, format index {self.format.value}",
f"data ID: {self.dataID}",
f"location ID: {self.locationID}",
f"PI: {self.PIName}",
f"Affiliation: {self.PIAffiliation}",
f"Mission: {self.missionName}",
f"Collection date, Revision date: {self.dateOfCollection}, {self.dateOfRevision}",
f"Variables ({len(self.variables)}):\n{', '.join(x for x in self.variables)}",
]
return "\n".join(s)
def __init__( def __init__(
self, self,
...@@ -981,7 +967,7 @@ class Dataset: ...@@ -981,7 +967,7 @@ class Dataset:
""" """
self.format = format self.format = format
self.version = None self.version = None # TODO: should this be 2.0 by default?
self.dataID = "dataID" self.dataID = "dataID"
self.locationID = "locationID" self.locationID = "locationID"
...@@ -1028,6 +1014,13 @@ class Dataset: ...@@ -1028,6 +1014,13 @@ class Dataset:
if not self.isValidFileName(pathlib.Path(f).name): if not self.isValidFileName(pathlib.Path(f).name):
warnings.warn(f"{pathlib.Path(f).name} is not a valid ICARTT filename") warnings.warn(f"{pathlib.Path(f).name} is not a valid ICARTT filename")
else:
# try to obtain dataID and locationID from file name
parts = pathlib.Path(f).name.split("_")
# there should be at least 3 parts; data ID, location ID and revision date + file name extension
if len(parts) > 2:
self.dataID = parts[0]
self.locationID = parts[1]
self.readHeader(delimiter) self.readHeader(delimiter)
if loadData: if loadData:
......
# -*- coding: utf-8 -*-
class FilehandleWithLinecounter:
"""a file handle that counts the number of files that were read"""
def __init__(self, f, delimiter):
self.f = f
self.line = 0
self.delimiter = delimiter
def readline(self, doSplit=True):
self.line += 1
dmp = self.f.readline().replace("\n", "").replace("\r", "")
if doSplit:
dmp = [word.strip(" ") for word in dmp.split(self.delimiter)]
return dmp
def extractVardesc(line_parts: list) -> str:
"""extract variable description from ict header line parts (splitted line)"""
shortname, units, standardname, longname, *_ = line_parts + [None] * 3
return shortname, units, standardname, longname
...@@ -163,7 +163,6 @@ class Simple1001TestCase(unittest.TestCase): ...@@ -163,7 +163,6 @@ class Simple1001TestCase(unittest.TestCase):
["Use of these data requires PRIOR OK from the PI"], ["Use of these data requires PRIOR OK from the PI"],
) )
self.assertEqual(ict.normalComments.keywords["OTHER_COMMENTS"].data, ["N/A"]) self.assertEqual(ict.normalComments.keywords["OTHER_COMMENTS"].data, ["N/A"])
# TODO test revision information
def testReadData(self): def testReadData(self):
ict = icartt.Dataset(self.fn, loadData=True) ict = icartt.Dataset(self.fn, loadData=True)
......
import unittest import unittest
import pathlib import pathlib
import io import io
import re
# import pytest
import icartt import icartt
...@@ -44,7 +44,7 @@ fileinfo = { ...@@ -44,7 +44,7 @@ fileinfo = {
} }
# TODO: dataset -> close file pointer after read ?! # TODO? dataset -> close file pointer after read
class BulkIOTestCase(unittest.TestCase): class BulkIOTestCase(unittest.TestCase):
...@@ -75,6 +75,9 @@ class BulkIOTestCase(unittest.TestCase): ...@@ -75,6 +75,9 @@ class BulkIOTestCase(unittest.TestCase):
with self.subTest(msg=f"Reading data from test file {str(fn)}"): with self.subTest(msg=f"Reading data from test file {str(fn)}"):
ict = icartt.Dataset(fn, loadData=True) ict = icartt.Dataset(fn, loadData=True)
self.assertEqual(type(ict), icartt.Dataset) self.assertEqual(type(ict), icartt.Dataset)
m = re.search("R([a-zA-Z0-9]).ict", fn.name)
if m:
self.assertEqual(m.groups()[0], ict.revision)
def testWriteHeader(self): def testWriteHeader(self):
for fn in self.files_ok: for fn in self.files_ok:
......