18574: Adds conversion methods. Improves & adds tests.
authorLucas Di Pentima <lucas.dipentima@curii.com>
Mon, 28 Feb 2022 20:20:49 +0000 (17:20 -0300)
committerLucas Di Pentima <lucas.dipentima@curii.com>
Mon, 28 Feb 2022 21:34:47 +0000 (18:34 -0300)
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas.dipentima@curii.com>

sdk/python/arvados/vocabulary.py
sdk/python/tests/test_vocabulary.py

index 1791566b584fda1bf05dd18542b665edb9bb9c53..b4148890aa31e547879d940f5aa1ce9ff46bb742 100644 (file)
@@ -26,13 +26,51 @@ class Vocabulary(object):
                 labels = [l['label'] for l in v_val.get('labels', [])]
                 values[v_id] = VocabularyValue(v_id, labels)
             vk = VocabularyKey(key_id, key_labels, values, strict)
-            self.key_aliases[key_id] = vk
+            self.key_aliases[key_id.lower()] = vk
             for alias in vk.aliases:
                 self.key_aliases[alias.lower()] = vk
 
     def __getitem__(self, key):
         return self.key_aliases[key.lower()]
 
+    def convert_to_identifiers(self, obj={}):
+        """Translate key/value pairs to machine readable identifiers.
+        """
+        if not isinstance(obj, dict):
+            raise ValueError("obj must be a dict")
+        r = {}
+        for k, v in obj.items():
+            k_id, v_id = k, v
+            try:
+                k_id = self[k].identifier
+                try:
+                    v_id = self[k][v].identifier
+                except KeyError:
+                    pass
+            except KeyError:
+                pass
+            r[k_id] = v_id
+        return r
+
+    def convert_to_labels(self, obj={}):
+        """Translate key/value pairs to human readable labels.
+        """
+        if not isinstance(obj, dict):
+            raise ValueError("obj must be a dict")
+        r = {}
+        for k, v in obj.items():
+            k_lbl, v_lbl = k, v
+            try:
+                k_lbl = self[k].preferred_label
+                try:
+                    v_lbl = self[k][v].preferred_label
+                except KeyError:
+                    pass
+            except KeyError:
+                pass
+            r[k_lbl] = v_lbl
+        return r
+
 class VocabularyData(object):
     def __init__(self, identifier, aliases=[]):
         self.identifier = identifier
@@ -53,7 +91,7 @@ class VocabularyKey(VocabularyData):
         self.strict = strict
         self.value_aliases = {}
         for v_id, v_val in values.items():
-            self.value_aliases[v_id] = v_val
+            self.value_aliases[v_id.lower()] = v_val
             for v_alias in v_val.aliases:
                 self.value_aliases[v_alias.lower()] = v_val
 
index ccaa7fe88d68889c8103d995f8179848dc94261d..7cca66a1c668ea160124777d29037b750b473f58 100644 (file)
@@ -62,44 +62,79 @@ class VocabularyTest(unittest.TestCase):
         },
     }
 
-    def perform_vocabulary_tests(self, voc):
-        self.assertEqual(voc.strict_keys, False)
+    def setUp(self):
+        self.api = arvados.api('v1')
+        self.voc = vocabulary.Vocabulary(self.EXAMPLE_VOC)
+        self.api.vocabulary = mock.MagicMock(return_value=self.EXAMPLE_VOC)
+
+    def test_vocabulary_keys(self):
+        self.assertEqual(self.voc.strict_keys, False)
         self.assertEqual(
-            voc.key_aliases.keys(),
-            set(['IDTAGANIMALS', 'creature', 'animal',
-                'IDTAGIMPORTANCE', 'importance', 'priority'])
+            self.voc.key_aliases.keys(),
+            set(['idtaganimals', 'creature', 'animal',
+                'idtagimportance', 'importance', 'priority'])
         )
 
-        vk = voc.key_aliases['creature']
+        vk = self.voc.key_aliases['creature']
         self.assertEqual(vk.strict, False)
         self.assertEqual(vk.identifier, 'IDTAGANIMALS')
         self.assertEqual(vk.aliases, ['Animal', 'Creature'])
         self.assertEqual(vk.preferred_label, 'Animal')
+        self.assertEqual(
+            vk.value_aliases.keys(),
+            set(['idvalanimal1', 'human', 'homo sapiens',
+                'idvalanimal2', 'elephant', 'loxodonta'])
+        )
 
+    def test_vocabulary_values(self):
+        vk = self.voc.key_aliases['creature']
         vv = vk.value_aliases['human']
         self.assertEqual(vv.identifier, 'IDVALANIMAL1')
         self.assertEqual(vv.aliases, ['Human', 'Homo sapiens'])
         self.assertEqual(vv.preferred_label, 'Human')
 
-        self.assertEqual(voc['creature']['human'].identifier, vv.identifier)
-        self.assertEqual(voc['Creature']['Human'].identifier, vv.identifier)
-        self.assertEqual(voc['CREATURE']['HUMAN'].identifier, vv.identifier)
+    def test_vocabulary_indexing(self):
+        self.assertEqual(self.voc['creature']['human'].identifier, 'IDVALANIMAL1')
+        self.assertEqual(self.voc['Creature']['Human'].identifier, 'IDVALANIMAL1')
+        self.assertEqual(self.voc['CREATURE']['HUMAN'].identifier, 'IDVALANIMAL1')
         with self.assertRaises(KeyError):
-            inexistant = voc['foo']
+            inexistant = self.voc['foo']
 
     def test_empty_vocabulary(self):
-        voc = vocabulary.Vocabulary()
+        voc = vocabulary.Vocabulary({})
         self.assertEqual(voc.strict_keys, False)
         self.assertEqual(voc.key_aliases, {})
 
-    def test_vocabulary_explicit_instantiation(self):
-        voc = vocabulary.Vocabulary(self.EXAMPLE_VOC)
-        self.perform_vocabulary_tests(voc)
+    def test_load_vocabulary_with_api(self):
+        voc = vocabulary.load_vocabulary(self.api)
+        self.assertEqual(voc['creature']['human'].identifier, 'IDVALANIMAL1')
+        self.assertEqual(voc['Creature']['Human'].identifier, 'IDVALANIMAL1')
+        self.assertEqual(voc['CREATURE']['HUMAN'].identifier, 'IDVALANIMAL1')
 
-    @mock.patch('arvados.api')
-    def test_load_vocabulary_with_api(self, api_mock):
-        api_mock.return_value = mock.MagicMock()
-        api_mock.return_value.vocabulary.return_value = self.EXAMPLE_VOC
+    def test_convert_to_identifiers(self):
+        cases = [
+            {'IDTAGIMPORTANCE': 'IDVALIMPORTANCE1'},
+            {'IDTAGIMPORTANCE': 'High'},
+            {'importance': 'IDVALIMPORTANCE1'},
+            {'priority': 'high priority'},
+        ]
+        for case in cases:
+            self.assertEqual(
+                self.voc.convert_to_identifiers(case),
+                {'IDTAGIMPORTANCE': 'IDVALIMPORTANCE1'},
+                "failing test case: {}".format(case)
+            )
 
-        voc = vocabulary.load_vocabulary(arvados.api('v1'))
-        self.perform_vocabulary_tests(voc)
+    def test_convert_to_labels(self):
+        cases = [
+            {'IDTAGIMPORTANCE': 'IDVALIMPORTANCE1'},
+            {'IDTAGIMPORTANCE': 'High'},
+            {'importance': 'IDVALIMPORTANCE1'},
+            {'priority': 'high priority'},
+        ]
+        for case in cases:
+            self.assertEqual(
+                self.voc.convert_to_labels(case),
+                {'Importance': 'High'},
+                "failing test case: {}".format(case)
+            )
\ No newline at end of file