Compare revisions

Florian Obersteiner · Florian Obersteiner · Florian Obersteiner · Florian Obersteiner · Florian Obersteiner · Florian Obersteiner
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
 image: python:latest

+variables:
+  PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"
+
 before_script:
  - curl -sSL https://install.python-poetry.org | python3 -
  - export PATH="/root/.local/bin:$PATH"
  - poetry --version
+  - poetry config virtualenvs.in-project true
+
+cache:
+  paths:
+    - .cache/pip
+    - .venv

 stages:
  - build_package
  - testing
  - create_docs
+  - deploy

 build_package:
  stage: build_package
@@ -27,6 +37,9 @@ test:
 create_docs:
  stage: create_docs
  needs: [test]
+  variables:
+    docs_upload_host: hosted-024-173.rz.uni-augsburg.de
+    docs_path_component: icartt
  script:
    - poetry install
    - cd docs
@@ -34,3 +47,24 @@ create_docs:
  artifacts:
    paths:
      - public
+
+deploy_docs:
+  stage: deploy
+  needs: [create_docs]
+  variables:
+    docs_upload_host: hosted-024-173.rz.uni-augsburg.de
+    docs_path_component: icartt
+  script:
+    # Following lines are ssh-agent setup and key injection, to allow upload of docs
+    - 'command -v ssh-agent >/dev/null || ( apt-get update -y && apt-get install openssh-client -y )'
+    - 'command -v rsync >/dev/null || ( apt-get update -y && apt-get install --no-install-recommends rsync -y )'
+    - eval $(ssh-agent -s)
+    # Encode with cat <key> | base64 -w0, then configure variable as admin in Gitlab Web UI
+    # Use the mbees.docs.user state via pillar to create users and ssh config on our docs web server
+    - echo "$docs_ssh_key" | tr -d ' ' | base64 --decode | ssh-add -
+    - mkdir -p ~/.ssh && chmod 700 ~/.ssh
+    - ssh-keyscan ${docs_upload_host} > ~/.ssh/known_hosts
+    - rsync -ax public/ ${CI_PROJECT_NAME}-docs@${docs_upload_host}:/srv/docs/${docs_path_component}/public/$(poetry version -s)/
+  rules:
+    # only deploy docs if the SSH key masked variable is set.
+    - if: $docs_ssh_key
--- a/README.md
+++ b/README.md
@@ -2,9 +2,17 @@

 ``icartt`` is an ICARTT format reader and writer

+## Installation
+
+The package is available on [PyPI](https://pypi.org/project/icartt/), install via
+
+```
+pip install icartt
+```
+
 ## Documentation

-Please have a look at docs/source/usage.rst for usage examples. Full documentation is in preparation.
+Available [here](https://mbees.med.uni-augsburg.de/docs/icartt/2.0.0).

 ## Contributing

@@ -16,17 +24,18 @@ make your changes and then [submit a merge request](https://mbees.med.uni-augsbu

 ## Installation of the development version

-Clone this repository / or your fork and install as "editable":
+Clone this repository / or your fork and install. We use [poetry](https://python-poetry.org/) for packaging, which needs to be installed.

 ```
 git clone https://mbees.med.uni-augsburg.de/gitlab/mbees/icartt_pypackage.git or <URL of your fork>
 cd icartt_pypackage
-pip install -e .
+poetry install
+poetry shell
 ```

 # Changelog

-## 2.0.0 (2022-02-x)
+## 2.0.0 (2022-04-28)

 - Compatible with ICARTT v2 standard
 - Formats 1001 and 2110

--- a/docs/conf.py
+++ b/docs/conf.py
@@ -18,11 +18,11 @@
 # -- Project information -----------------------------------------------------

 project = "ICARTT"
-copyright = "2022, Christoph Knote"
+copyright = "2022, Christoph Knote, Model-based Environmental Exposure Science, University Augsburg, Augsburg, Germany"
 author = "Christoph Knote"

 # The full version, including alpha/beta/rc tags
-release = "2.0"
+release = "2.0.0"


 # -- General configuration ---------------------------------------------------

--- a/pyproject.toml
+++ b/pyproject.toml
 [tool.poetry]
 name = "icartt"
-version = "1.9.1"
+version = "2.0.0"
 description = "ICARTT format reader and writer"
 license = "GPL-3.0-or-later"
-authors = ["Christoph Knote <christoph.knote@med.uni-augsburg.de>"]
+authors = ["Christoph Knote <christoph.knote@med.uni-augsburg.de>", "Florian Obersteiner <florian.obersteiner@kit.edu>"]
 readme = "README.md"
 homepage = "https://mbees.med.uni-augsburg.de/"
 repository = "https://mbees.med.uni-augsburg.de/gitlab/mbees/icartt_pypackage"
+documentation = "https://mbees.med.uni-augsburg.de/docs/icartt/2.0.0/"
 keywords = [ "atmosphere", "file format", "ames", "nasa" ]
 classifiers = [
-    "Programming Language :: Python :: 3",
    "Development Status :: 5 - Production/Stable",
    "Environment :: Console",
    "Intended Audience :: Developers",
    "Intended Audience :: Education",
    "Intended Audience :: End Users/Desktop",
    "Intended Audience :: Science/Research",
-    "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
    "Operating System :: POSIX",
    "Topic :: Education",
    "Topic :: Scientific/Engineering",
@@ -28,12 +27,12 @@ packages = [


 [tool.poetry.urls]
-issues = "http://mbees.med.uni-augsburg.de/gitlab/mbees/icartt_pypackage/issues"
+"Bug Tracker" = "http://mbees.med.uni-augsburg.de/gitlab/mbees/icartt_pypackage/issues"


 [tool.poetry.dependencies]
-python = ">=3.8,<4"
-numpy  = ">= 1.12"
+python = ">= 3.7, < 4"
+numpy  = ">= 1.19"


 [tool.poetry.dev-dependencies]
@@ -41,7 +40,7 @@ pytest = "^5.2"
 coverage = ">= 6.3.2"
 sphinx = ">= 4.4"
 sphinx-rtd-theme = ">= 1.0"
-enum-tools = ">= 0.9"
+enum-tools = ">=0.9"
 sphinx-toolbox = ">= 2.16.0"
 black = "^22.3.0"


--- a/src/icartt/dataset.py
+++ b/src/icartt/dataset.py
@@ -8,6 +8,8 @@ from enum import IntEnum

 import numpy as np

+from . import utils
+
 DEFAULT_NUM_FORMAT = "%g"
 """Default number format for output. Provides the `fmt` parameter of :func:`numpy.savetxt` internally."""

@@ -317,6 +319,9 @@ class StandardNormalComments(collections.UserList):
        self.keywords["UNCERTAINTY"].naAllowed = False
        self.keywords["REVISION"].naAllowed = False

+    def __str__(self):
+        return "\n".join(f"{str(v)}" for v in self.keywords.values())
+

 class Variable:
    """An ICARTT variable description with name, units, scale and missing value."""
@@ -337,21 +342,19 @@ class Variable:
            descstr += [str(self.longname)]
        return delimiter.join(descstr)

-    def isValidVariablename(self, name):  # TODO: this could be a 'utils' function
+    def isValidVariablename(self, name):
        # ICARTT Standard v2 2.1.1 2)
        # Variable short names and variable standard names:
        # Uppercase and lowercase ASCII alphanumeric characters
        # and underscores.
-        def isAsciiAlphaOrUnderscore(x):  # TODO: this could be a 'utils' function
-            return re.match("[a-zA-Z0-9_]", x)

-        allAreAlphaOrUnderscore = all(isAsciiAlphaOrUnderscore(x) for x in name)
+        allAreAlphaOrUnderscore = all(re.match("[a-zA-Z0-9_]", c) for c in name)
        # The first character must be a letter,
        firstIsAlpha = bool(re.match("[a-zA-Z]", name[0]))
        # and the name can be at most 31 characters in length.
-        lessThan31Chars = len(name) <= 31
+        le31Chars = len(name) <= 31

-        return allAreAlphaOrUnderscore and firstIsAlpha and lessThan31Chars
+        return allAreAlphaOrUnderscore and firstIsAlpha and le31Chars

    def __init__(
        self,
@@ -399,8 +402,7 @@ class Variable:
        self.miss = miss

    def __repr__(self):
-        # TODO: this sould be something else than __str__ ?
-        return self.desc()
+        return f"[{self.units}], {self.vartype.name}"

    def __str__(self):
        return self.desc()
@@ -449,7 +451,7 @@ class Dataset:
        if self.defineMode:
            return np.datetime64("NaT")

-        # for 1001, its an array, for 2110 a dict
+        # for 1001 it's an array, for 2110 a dict
        if not isinstance(self.data.data, (np.ndarray, dict)):
            return np.datetime64("NaT")

@@ -499,25 +501,10 @@ class Dataset:
        :param delimiter: field delimiter character(s), defaults to DEFAULT_FIELD_DELIM
        :type delimiter: str, optional
        """
-
-        class FilehandleWithLinecounter:  # TODO: this could be a 'utils' class
-            def __init__(self, f, delimiter):
-                self.f = f
-                self.line = 0
-                self.delimiter = delimiter
-
-            def readline(self, doSplit=True):
-                self.line += 1
-                dmp = self.f.readline().replace("\n", "").replace("\r", "")
-                if doSplit:
-                    dmp = [word.strip(" ") for word in dmp.split(self.delimiter)]
-                return dmp
-
        if self.inputFhandle:
            if self.inputFhandle.closed:
                self.inputFhandle = open(self.inputFhandle.name, encoding="utf-8")
-
-            f = FilehandleWithLinecounter(self.inputFhandle, delimiter)
+            f = utils.FilehandleWithLinecounter(self.inputFhandle, delimiter)
            self._readHeader(f)
            self.inputFhandle.close()

@@ -580,18 +567,9 @@ class Dataset:
        # here that the independent variable should monotonically increase even when
        # crossing over to a second day.

-        def extractVardesc(dmp):  # TODO: could be a 'utils' function or one line,
-            shortname = dmp[
-                0
-            ]  # shortname, units, standardname, longname, *_ = dmp + [None] * 3
-            units = dmp[1]
-            standardname = dmp[2] if len(dmp) > 2 else None
-            longname = dmp[3] if len(dmp) > 3 else None
-            return shortname, units, standardname, longname
-
        if self.format == Formats.FFI2110:
            dmp = f.readline()
-            shortname, units, standardname, longname = extractVardesc(dmp)
+            shortname, units, standardname, longname = utils.extractVardesc(dmp)
            self.independentBoundedVariable = Variable(
                shortname,
                units,
@@ -601,7 +579,7 @@ class Dataset:
            )

        dmp = f.readline()
-        shortname, units, standardname, longname = extractVardesc(dmp)
+        shortname, units, standardname, longname = utils.extractVardesc(dmp)
        self.independentVariable = Variable(
            shortname,
            units,
@@ -634,7 +612,7 @@ class Dataset:
            # the name used for that variable as a column header, i.e., the last header
            # line prior to start of data.).
            dmp = f.readline()
-            shortname, units, standardname, longname = extractVardesc(dmp)
+            shortname, units, standardname, longname = utils.extractVardesc(dmp)
            vshortname = [shortname]
            vunits = [units]
            vstandardname = [standardname]
@@ -642,7 +620,7 @@ class Dataset:

            for _ in range(1, nvar):
                dmp = f.readline()
-                shortname, units, standardname, longname = extractVardesc(dmp)
+                shortname, units, standardname, longname = utils.extractVardesc(dmp)
                vshortname += [shortname]
                vunits += [units]
                vstandardname += [standardname]
@@ -702,8 +680,14 @@ class Dataset:
        rawNcom = [f.readline(doSplit=False) for _ in range(nncom)]
        self.normalComments.ingest(rawNcom)

+        r = self.normalComments.keywords["REVISION"].data
+        r = "0" if not r else r[0].strip("R")
+        self.revision = r
+
        self.nHeaderFile = f.line

+        # TODO this warning might be missleading since it assumes all normalComment keywords
+        #      had been defined - which is not guaranteed.
        if self.nHeader != nHeaderSuggested:
            warnings.warn(
                f"Number of header lines suggested in line 1 ({int(nHeaderSuggested)}) do not match actual header lines read ({int(self.nHeader)})"
@@ -763,7 +747,7 @@ class Dataset:

        return fn + ".ict"

-    def isValidFileName(self, name):  # TODO: this could be a 'utils' function
+    def isValidFileName(self, name):
        """test whether file name complies with ICARTT standard:

        ICARTT standard v2 2.1.1 3)
@@ -776,11 +760,7 @@ class Dataset:
        :return: is file name valid according to ICARTT standard?
        :rtype: bool
        """
-
-        def isAsciiAlpha(x):  # TODO: this could be a 'utils' function
-            return re.match("[a-zA-Z0-9-_.]", x)
-
-        allAsciiAlpha = all(isAsciiAlpha(x) for x in name)
+        allAsciiAlpha = all(re.match("[a-zA-Z0-9-_.]", c) for c in name)
        lessThan128Characters = len(name) < 128

        return allAsciiAlpha and lessThan128Characters and name.endswith(".ict")
@@ -952,12 +932,18 @@ class Dataset:
            if not self.inputFhandle.closed:
                self.inputFhandle.close()

-    def __repr__(self):
-        # TODO: this could be more meaningful
-        return "icartt.Dataset()"
-
    def __str__(self):
-        return f"ICARTT Dataset {self.makeFileName()}"
+        s = [
+            f"ICARTT Dataset {self.makeFileName()}, format index {self.format.value}",
+            f"data ID: {self.dataID}",
+            f"location ID: {self.locationID}",
+            f"PI: {self.PIName}",
+            f"Affiliation: {self.PIAffiliation}",
+            f"Mission: {self.missionName}",
+            f"Collection date, Revision date: {self.dateOfCollection}, {self.dateOfRevision}",
+            f"Variables ({len(self.variables)}):\n{', '.join(x for x in self.variables)}",
+        ]
+        return "\n".join(s)

    def __init__(
        self,
@@ -981,7 +967,7 @@ class Dataset:
        """

        self.format = format
-        self.version = None
+        self.version = None  # TODO: should this be 2.0 by default?

        self.dataID = "dataID"
        self.locationID = "locationID"
@@ -1028,6 +1014,13 @@ class Dataset:

            if not self.isValidFileName(pathlib.Path(f).name):
                warnings.warn(f"{pathlib.Path(f).name} is not a valid ICARTT filename")
+            else:
+                # try to obtain dataID and locationID from file name
+                parts = pathlib.Path(f).name.split("_")
+                # there should be at least 3 parts; data ID, location ID and revision date + file name extension
+                if len(parts) > 2:
+                    self.dataID = parts[0]
+                    self.locationID = parts[1]

            self.readHeader(delimiter)
            if loadData:

--- a/src/icartt/utils.py
+++ b/src/icartt/utils.py
+# -*- coding: utf-8 -*-
+
+
+class FilehandleWithLinecounter:
+    """a file handle that counts the number of files that were read"""
+
+    def __init__(self, f, delimiter):
+        self.f = f
+        self.line = 0
+        self.delimiter = delimiter
+
+    def readline(self, doSplit=True):
+        self.line += 1
+        dmp = self.f.readline().replace("\n", "").replace("\r", "")
+        if doSplit:
+            dmp = [word.strip(" ") for word in dmp.split(self.delimiter)]
+        return dmp
+
+
+def extractVardesc(line_parts: list) -> str:
+    """extract variable description from ict header line parts (splitted line)"""
+    shortname, units, standardname, longname, *_ = line_parts + [None] * 3
+    return shortname, units, standardname, longname
--- a/tests/test_1001.py
+++ b/tests/test_1001.py
@@ -163,7 +163,6 @@ class Simple1001TestCase(unittest.TestCase):
            ["Use of these data requires PRIOR OK from the PI"],
        )
        self.assertEqual(ict.normalComments.keywords["OTHER_COMMENTS"].data, ["N/A"])
-        # TODO test revision information

    def testReadData(self):
        ict = icartt.Dataset(self.fn, loadData=True)

--- a/tests/test_bulkIO.py
+++ b/tests/test_bulkIO.py
 import unittest
 import pathlib
 import io
+import re

-# import pytest

 import icartt

@@ -44,7 +44,7 @@ fileinfo = {
 }


-# TODO: dataset -> close file pointer after read ?!
+# TODO? dataset -> close file pointer after read


 class BulkIOTestCase(unittest.TestCase):
@@ -75,6 +75,9 @@ class BulkIOTestCase(unittest.TestCase):
            with self.subTest(msg=f"Reading data from test file {str(fn)}"):
                ict = icartt.Dataset(fn, loadData=True)
                self.assertEqual(type(ict), icartt.Dataset)
+                m = re.search("R([a-zA-Z0-9]).ict", fn.name)
+                if m:
+                    self.assertEqual(m.groups()[0], ict.revision)

    def testWriteHeader(self):
        for fn in self.files_ok:
No results found