diff --git a/src/icartt/__init__.py b/src/icartt/__init__.py index d94e8e256ee46a2e640f2ed4ef0d6f93cfc02a89..5eb136f6589da099392ae197d919ed68e6ab3882 100644 --- a/src/icartt/__init__.py +++ b/src/icartt/__init__.py @@ -1,6 +1,4 @@ -from .dataset import Dataset, StandardNormalComments, Variable, Formats, VariableType - - +# VERSION def get_version(): try: # Python >= 3.8 @@ -13,8 +11,12 @@ def get_version(): return pkg_resources.get_distribution("icartt").version - __version__ = get_version() +del get_version -# TODO: add __all__ = ("names of exported classes etc.",) ? + +# EXPORTED TYPES +from .dataset import Dataset, StandardNormalComments, Variable, Formats, VariableType + +__all__ = ("Dataset", "StandardNormalComments", "Variable", "Formats", "VariableType") \ No newline at end of file diff --git a/src/icartt/dataset.py b/src/icartt/dataset.py index 4bef8669254ba42bd7a610d653db89c0336af94a..563d2b189dfd12372bfd0a2d785ddbdb54c1b9c0 100644 --- a/src/icartt/dataset.py +++ b/src/icartt/dataset.py @@ -72,7 +72,7 @@ class DataStore1001: :param newData: data to be added :type newData: numpy.ndarray """ - if not type(newData) is np.ndarray: + if not type(newData) is np.ndarray: # TODO: isinstance(arr, np.ndarray)? raise ArgumentError("Input data needs to be numpy ndarray.") if newData.dtype.names is None: try: @@ -151,7 +151,7 @@ class DataStore2110(collections.UserDict): ivarValue = float(auxds[self.ivar.shortname]) self.data[ivarValue] = { "AUX": auxds, "DEP": depds } - + def add(self, newAuxData, newDepData): auxds = DataStore1001(self.ivar, self.auxvars) depds = DataStore1001(self.ibvar, self.dvars) @@ -189,13 +189,13 @@ class StandardNormalComments(collections.UserList): @property def nlines(self): """calculates the number of lines in the normal comments section""" - n = 1 # shortnames line - n += len(self.freeform) # freeform comment + # shortnames line is always there: + n = 1 + # freeform comment might or might not be there: + n += sum(len(s.split('\n')) for s in self.freeform) + # tagged comments have at least one line: for k in self.keywords.values(): - try: - n += len(k.data[0].split("\n")) # and keywords might be multiline... - except IndexError: # ok we have no list, - n += 1 # just add 1 + n += sum(len(s.split('\n')) for s in k.data) or 1 return n @property @@ -216,26 +216,26 @@ class StandardNormalComments(collections.UserList): # in the order listed in the ICARTT documentation. currentKeyword = None + keywordLine = False for l in raw: possibleKeyword = l.split(":")[0].strip() - if possibleKeyword in self.keywords or re.match( - "R[a-zA-Z0-9]{1,2}[ ]*", possibleKeyword - ): + if possibleKeyword in self.keywords or re.match("R[a-zA-Z0-9]{1,2}[ ]*", possibleKeyword): currentKeyword = possibleKeyword + keywordLine = True if not currentKeyword in self.keywords: # for the revisions only... - self.keywords[currentKeyword] = KeywordComment( - currentKeyword, False - ) + self.keywords[currentKeyword] = KeywordComment(currentKeyword, False) + else: + keywordLine = False if currentKeyword is None: self.freeform.append(l) + elif keywordLine: + self.keywords[currentKeyword].append(l.replace(l.split(":")[0] + ":", "").strip()) else: - self.keywords[currentKeyword].append( - l.replace(l.split(":")[0] + ":", "").strip() - ) + self.keywords[currentKeyword].append(l.strip()) for key in self.keywords: - if self.keywords[key].data == []: + if not self.keywords[key].data: warnings.warn( f"Normal comments: required keyword {str(key)} is missing." ) @@ -301,9 +301,9 @@ class Variable: :rtype: str """ descstr = [str(self.shortname), str(self.units)] - if not self.standardname is None: + if self.standardname is not None: descstr += [str(self.standardname)] - if not self.longname is None: + if self.longname is not None: descstr += [str(self.longname)] return splitChar.join(descstr) @@ -402,8 +402,8 @@ class Dataset: def times(self): """Time steps of the data - :return: list of time steps - :rtype: list + :return: numpy array of time steps + :rtype: numpy.ndarray """ if self.data.data is None or self.independentVariable is None: return np.datetime64("NaT") @@ -614,7 +614,7 @@ class Dataset: # line 15 + nvar - Special comments (Notes of problems or special # circumstances unique to this file. An example would be comments/problems # associated with a particular flight.). - self.specialComments = [f.readline(doSplit=False) for i in range(0, nscom)] + self.specialComments = [f.readline(doSplit=False) for _ in range(nscom)] # line 16 + nvar + nscom - Number of Normal comments (i.e., number of # additional lines of SUPPORTING information: Integer value indicating the @@ -855,7 +855,7 @@ class Dataset: # TODO: this could be more meaningful return "ICARTT Dataset string representation" - # TODO: why is init comming last? + def __init__(self, f=None, loadData=True, splitChar=",", format=Formats.FFI1001): """Constructor method""" self.format = format @@ -910,4 +910,4 @@ class Dataset: self.readHeader(splitChar) if loadData: self.endDefineMode() - self.readData(splitChar) + self.readData(splitChar) \ No newline at end of file diff --git a/tests/_utils.py b/tests/_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..0d7d6823fa6a2431e41592a4c82b4c2e276de4d3 --- /dev/null +++ b/tests/_utils.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- + +import numpy as np + +def compareFiles(fn, strIn, strOut, skiplines=0, nlines=-1): # pragma: no cover + """compare two icartt files line by line""" + strOut.seek(0) + strIn.seek(0) + content_in = strIn.readlines() + content_out = strOut.readlines() + strIn.close() + strOut.close() + + if nlines > 0: + content_in = content_in[skiplines : (skiplines + nlines)] + content_out = content_out[skiplines : (skiplines + nlines)] + else: + content_in = content_in[skiplines:] + content_out = content_out[skiplines:] + + if not len(content_in) == len(content_out): + return False + + for i, inline, outline in zip(range(len(content_in)), content_in, content_out): + inline = inline.strip().replace(" ", "") + outline = outline.strip().replace(" ", "") + + if inline != outline: + valid_data_line = False + insteps = [float(x) for x in inline.split(",")] + outsteps = [float(x) for x in outline.split(",")] + + if len(insteps) == len(outsteps): + valid_data_line = np.isclose(insteps, outsteps).all() + + valid_var_line = False + + insteps = [x.strip() for x in inline.split(",")] + outsteps = [x.strip() for x in outline.split(",")] + if len(insteps) == 2 and len(outsteps) == 3: + valid_var_line = ( + insteps[0] == outsteps[0] + and insteps[1] == outsteps[1] + and insteps[1] == outsteps[2] + ) + + if not valid_data_line and not valid_var_line: + print(f"{str(fn)}: line {i:d} differs:") + print(f" input: {inline}") + print(f" output: {outline}") + + return False + + return True diff --git a/tests/example_data/TODO/DISCOVERAQ-NOXYO3_P3B_20140720_R0.ict b/tests/example_data/expect_ok/DISCOVERAQ-NOXYO3_P3B_20140720_R0.ict similarity index 100% rename from tests/example_data/TODO/DISCOVERAQ-NOXYO3_P3B_20140720_R0.ict rename to tests/example_data/expect_ok/DISCOVERAQ-NOXYO3_P3B_20140720_R0.ict diff --git a/tests/example_data/TODO/SEAC4RS-PTRMS-acetaldehyde_DC8_20130806_R1.ict b/tests/example_data/expect_ok/SEAC4RS-PTRMS-acetaldehyde_DC8_20130806_R1.ict similarity index 100% rename from tests/example_data/TODO/SEAC4RS-PTRMS-acetaldehyde_DC8_20130806_R1.ict rename to tests/example_data/expect_ok/SEAC4RS-PTRMS-acetaldehyde_DC8_20130806_R1.ict diff --git a/tests/example_data/TODO/AROTAL-RAY_DC8_20040715_R1.ict b/tests/example_data/expect_warn/AROTAL-RAY_DC8_20040715_R1.ict similarity index 100% rename from tests/example_data/TODO/AROTAL-RAY_DC8_20040715_R1.ict rename to tests/example_data/expect_warn/AROTAL-RAY_DC8_20040715_R1.ict diff --git a/tests/example_data/TODO/korusaq-mrg10-dc8_merge_20160510_R4.ict b/tests/example_data/expect_warn/korusaq-mrg10-dc8_merge_20160510_R4.ict similarity index 100% rename from tests/example_data/TODO/korusaq-mrg10-dc8_merge_20160510_R4.ict rename to tests/example_data/expect_warn/korusaq-mrg10-dc8_merge_20160510_R4.ict diff --git a/tests/test_1001.py b/tests/test_1001.py index bcc2436f4d7286b3f8fcc953d8475d3ea68623da..acdc08e573f45eb0547563f7fc08590a4d1c8720 100644 --- a/tests/test_1001.py +++ b/tests/test_1001.py @@ -3,68 +3,17 @@ import pathlib import io import datetime import numpy as np + import icartt +try: + from _utils import compareFiles # we're executing from the directory of this script +except ImportError: + from ._utils import compareFiles # we're executing from another directory # working directory, example files wd = pathlib.Path(__file__).parent -def compareFiles(fn, strIn, strOut, skiplines=0, nlines=-1): # pragma: no cover - strOut.seek(0) - strIn.seek(0) - content_in = strIn.readlines() - content_out = strOut.readlines() - strIn.close() - strOut.close() - - if nlines > 0: - content_in = content_in[skiplines : (skiplines + nlines)] - content_out = content_out[skiplines : (skiplines + nlines)] - else: - content_in = content_in[skiplines:] - content_out = content_out[skiplines:] - - if not len(content_in) == len(content_out): - return False - - for inline, outline in zip(content_in, content_out): - inline = inline.strip().replace(" ", "") - outline = outline.strip().replace(" ", "") - if not inline == outline: - valid_data_line = False - # maybe this is a data line in which we only have different number formatting? - # compare as floats - # try: - insteps = [float(x) for x in inline.split(",")] - outsteps = [float(x) for x in outline.split(",")] - if len(insteps) == len(outsteps): - valid_data_line = True - for i in range(len(insteps)): - valid_data_line = valid_data_line and insteps[i] == outsteps[i] - # except: - # pass - - valid_var_line = False - # try: - insteps = [x.strip() for x in inline.split(",")] - outsteps = [x.strip() for x in outline.split(",")] - if len(insteps) == 2 and len(outsteps) == 3: - valid_var_line = ( - insteps[0] == outsteps[0] - and insteps[1] == outsteps[1] - and insteps[1] == outsteps[2] - ) - # except: - # pass - - if not valid_data_line and not valid_var_line: - print(f"{str(fn)}: line {i:d} differs:") - print(f" input: {inline}") - print(f" output: {outline}") - - return False - - return True class Simple1001TestCase(unittest.TestCase): @@ -376,4 +325,4 @@ class Create1001TestCase(unittest.TestCase): if __name__ == "__main__": # pragma: no cover - unittest.main() + unittest.main() \ No newline at end of file diff --git a/tests/test_bulkIO.py b/tests/test_bulkIO.py index 248bb824ab6cae5b0286622c690c4b77efb55e45..b6426946747eafb3869b338e41187e1eca7cff06 100644 --- a/tests/test_bulkIO.py +++ b/tests/test_bulkIO.py @@ -5,13 +5,17 @@ import io # import pytest import icartt +try: + from _utils import compareFiles # we're executing from the directory of this script +except ImportError: + from ._utils import compareFiles # we're executing from another directory # working directory, example files wd = pathlib.Path(__file__).parent / "example_data" # file : (ffi, nlscom, nlncom, nHeaderLines, exception) <- want fileinfo = { - # warns # not imported correctly! + # should warn; has multiple keywords per line in normalComments 'AROTAL-RAY_DC8_20040715_R1.ict': (2110, 1, 19, 68, None), 'AR_DC8_20050203_R0.ict': (2110, 0, 18, 54, None), # warns @@ -22,7 +26,7 @@ fileinfo = { 'discoveraq-CO2_p3b_20140721_R0.ict': (1001, 1, 18, 37, None), # ok - # warns # not imported correctly! + # ok 'DISCOVERAQ-NOXYO3_P3B_20140720_R0.ict': (1001, 0, 27, 47, None), 'Dongdaemun_NIER_20160520_RA.ict': (1001, 0, 18, 36, None), # warns @@ -45,69 +49,17 @@ fileinfo = { 'PAVE-AR_DC8_20050203_R0.ict': (2110, 1, 18, 55, None), # warns - # warns # not imported correctly! + # ok 'SEAC4RS-PTRMS-acetaldehyde_DC8_20130806_R1.ict': (1001, 0, 26, 44, None), 'bt_Munich_2020061000_72.ict.txt': (1001, 29, 18, 91, None), # warns - # large file, needs improved reader + # warns (variable names) 'korusaq-mrg10-dc8_merge_20160510_R4.ict': (1001, 0, 29, 397, None), } -# TODO: dataset -> close file pointer after read ?! - -def compareFiles(fn, strIn, strOut, skiplines=0, nlines=-1): # pragma: no cover - strOut.seek(0) - strIn.seek(0) - content_in = strIn.readlines() - content_out = strOut.readlines() - strIn.close() - strOut.close() - - if nlines > 0: - content_in = content_in[skiplines : (skiplines + nlines)] - content_out = content_out[skiplines : (skiplines + nlines)] - else: - content_in = content_in[skiplines:] - content_out = content_out[skiplines:] - - if not len(content_in) == len(content_out): - return False - - for inline, outline in zip(content_in, content_out): - inline = inline.strip().replace(" ", "") - outline = outline.strip().replace(" ", "") - if not inline == outline: - valid_data_line = False - # maybe this is a data line in which we only have different number formatting? - # compare as floats - insteps = [float(x) for x in inline.split(",")] - outsteps = [float(x) for x in outline.split(",")] - if len(insteps) == len(outsteps): - valid_data_line = True - for i in range(len(insteps)): - valid_data_line = valid_data_line and insteps[i] == outsteps[i] - - valid_var_line = False - - insteps = [x.strip() for x in inline.split(",")] - outsteps = [x.strip() for x in outline.split(",")] - if len(insteps) == 2 and len(outsteps) == 3: - valid_var_line = ( - insteps[0] == outsteps[0] - and insteps[1] == outsteps[1] - and insteps[1] == outsteps[2] - ) - - if not valid_data_line and not valid_var_line: - print(f"{str(fn)}: line {i:d} differs:") - print(f" input: {inline}") - print(f" output: {outline}") - - return False - - return True +# TODO: dataset -> close file pointer after read ?! class BulkIOTestCase(unittest.TestCase): @@ -143,7 +95,7 @@ class BulkIOTestCase(unittest.TestCase): for fn in self.files_ok: with self.subTest(msg=f"Writing header for test file {str(fn)}"): ict = icartt.Dataset(fn, loadData=False) - strIn = open(fn) + strIn = open(fn, "r", encoding="utf-8") strOut = io.StringIO() ict.writeHeader(f=strOut) self.assertTrue(compareFiles(fn, strIn, strOut, nlines=ict.nHeader)) @@ -152,7 +104,7 @@ class BulkIOTestCase(unittest.TestCase): for fn in self.files_ok: with self.subTest(msg=f"Writing data for test file {str(fn)}"): ict = icartt.Dataset(fn, loadData=True) - strIn = open(fn) + strIn = open(fn, "r", encoding="utf-8") strOut = io.StringIO() ict.write(f=strOut) self.assertTrue(compareFiles(fn, strIn, strOut))