18574: Initial vocabulary loading support & tests.
authorLucas Di Pentima <lucas.dipentima@curii.com>
Mon, 28 Feb 2022 16:14:32 +0000 (13:14 -0300)
committerLucas Di Pentima <lucas.dipentima@curii.com>
Mon, 28 Feb 2022 21:34:47 +0000 (18:34 -0300)
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas.dipentima@curii.com>

sdk/python/arvados/vocabulary.py [new file with mode: 0644]
sdk/python/tests/test_vocabulary.py [new file with mode: 0644]

diff --git a/sdk/python/arvados/vocabulary.py b/sdk/python/arvados/vocabulary.py
new file mode 100644 (file)
index 0000000..8d89746
--- /dev/null
@@ -0,0 +1,44 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import logging
+
+from . import api
+
+_logger = logging.getLogger('arvados.vocabulary')
+
+def load_vocabulary(api_client=api('v1')):
+    """Load the Arvados vocabulary from the API.
+    """
+    return Vocabulary(api_client.vocabulary())
+
+class Vocabulary(object):
+    def __init__(self, voc_definition={}):
+        self._definition = voc_definition
+        self.strict_keys = self._definition.get('strict_tags', False)
+        self.key_aliases = {}
+
+        for key_id, val in voc_definition.get('tags', {}).items():
+            strict = val.get('strict', False)
+            key_labels = [l['label'] for l in val.get('labels', [])]
+            values = {}
+            for v_id, v_val in val.get('values', {}).items():
+                labels = [l['label'] for l in v_val.get('labels', [])]
+                values[v_id] = VocabularyValue(v_id, labels)
+            self.key_aliases[key_id] = VocabularyKey(key_id, key_labels, values, strict)
+
+class VocabularyData(object):
+    def __init__(self, identifier, aliases=[]):
+        self.identifier = identifier
+        self.aliases = set([x.lower() for x in aliases])
+
+class VocabularyValue(VocabularyData):
+    def __init__(self, identifier, aliases=[]):
+        super(VocabularyValue, self).__init__(identifier, aliases)
+
+class VocabularyKey(VocabularyData):
+    def __init__(self, identifier, aliases=[], values={}, strict=False):
+        super(VocabularyKey, self).__init__(identifier, aliases)
+        self.values = values
+        self.strict = strict
\ No newline at end of file
diff --git a/sdk/python/tests/test_vocabulary.py b/sdk/python/tests/test_vocabulary.py
new file mode 100644 (file)
index 0000000..7aea129
--- /dev/null
@@ -0,0 +1,89 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import arvados
+import unittest
+import mock
+
+from arvados import api, vocabulary
+
+class VocabularyTest(unittest.TestCase):
+    EXAMPLE_VOC = {
+        'tags': {
+            'IDTAGANIMALS': {
+                'strict': False,
+                'labels': [
+                    {'label': 'Animal'},
+                    {'label': 'Creature'},
+                ],
+                'values': {
+                    'IDVALANIMAL1': {
+                        'labels': [
+                            {'label': 'Human'},
+                            {'label': 'Homo sapiens'},
+                        ],
+                    },
+                    'IDVALANIMAL2': {
+                        'labels': [
+                            {'label': 'Elephant'},
+                            {'label': 'Loxodonta'},
+                        ],
+                    },
+                },
+            },
+            'IDTAGIMPORTANCE': {
+                'strict': True,
+                'labels': [
+                    {'label': 'Importance'},
+                    {'label': 'Priority'},
+                ],
+                'values': {
+                    'IDVALIMPORTANCE1': {
+                        'labels': [
+                            {'label': 'High'},
+                            {'label': 'High priority'},
+                        ],
+                    },
+                    'IDVALIMPORTANCE2': {
+                        'labels': [
+                            {'label': 'Medium'},
+                            {'label': 'Medium priority'},
+                        ],
+                    },
+                    'IDVALIMPORTANCE3': {
+                        'labels': [
+                            {'label': 'Low'},
+                            {'label': 'Low priority'},
+                        ],
+                    },
+                },
+            },
+        },
+    }
+
+    def perform_vocabulary_tests(self, voc):
+        self.assertEqual(voc.strict_keys, False)
+        self.assertEqual(voc.key_aliases.keys(), set(['IDTAGANIMALS', 'IDTAGIMPORTANCE']))
+
+        self.assertEqual(voc.key_aliases['IDTAGANIMALS'].strict, False)
+        self.assertEqual(set(voc.key_aliases['IDTAGANIMALS'].aliases), set(['animal', 'creature']))
+        self.assertEqual(voc.key_aliases['IDTAGANIMALS'].values.keys(), set(['IDVALANIMAL1', 'IDVALANIMAL2']))
+        self.assertEqual(voc.key_aliases['IDTAGANIMALS'].values['IDVALANIMAL1'].aliases, set(['human', 'homo sapiens']))
+
+    def test_empty_vocabulary(self):
+        voc = vocabulary.Vocabulary()
+        self.assertEqual(voc.strict_keys, False)
+        self.assertEqual(voc.key_aliases, {})
+
+    def test_load_vocabulary(self):
+        voc = vocabulary.Vocabulary(self.EXAMPLE_VOC)
+        self.perform_vocabulary_tests(voc)
+
+    @mock.patch('arvados.api')
+    def test_load_vocabulary_with_api(self, api_mock):
+        api_mock.return_value = mock.MagicMock()
+        api_mock.return_value.vocabulary.return_value = self.EXAMPLE_VOC
+
+        voc = vocabulary.load_vocabulary(arvados.api('v1'))
+        self.perform_vocabulary_tests(voc)