From: Peter Amstutz Date: Mon, 1 May 2023 14:58:25 +0000 (-0400) Subject: 20462: Refactor "common prefix" function & fix fencepost errors X-Git-Tag: 2.7.0~125^2~1 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/7ac1e7337dca972ce93d04e5eccb6f4de9aed141 20462: Refactor "common prefix" function & fix fencepost errors Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- diff --git a/sdk/cwl/arvados_cwl/arvworkflow.py b/sdk/cwl/arvados_cwl/arvworkflow.py index 895676565d..bd32e9f81b 100644 --- a/sdk/cwl/arvados_cwl/arvworkflow.py +++ b/sdk/cwl/arvados_cwl/arvworkflow.py @@ -235,6 +235,7 @@ def fix_schemadef(req, baseuri, urlexpander, merged_map, jobmapper, pdh): merged_map[mm].resolved[r] = rename return req + def drop_ids(d): if isinstance(d, MutableSequence): for i, s in enumerate(d): @@ -247,6 +248,32 @@ def drop_ids(d): drop_ids(d[field]) +def common_prefix(firstfile, all_files): + n = 0 + allmatch = True + if not firstfile: + return "" + + while allmatch and n < len(firstfile)-1: + n += 1 + for f in all_files: + if len(f)-1 < n: + n -= 1 + allmatch = False + break + if f[n] != firstfile[n]: + allmatch = False + break + + while n > 0 and firstfile[n] != "/": + n -= 1 + + if firstfile[n] == "/": + n += 1 + + return firstfile[:n] + + def upload_workflow(arvRunner, tool, job_order, project_uuid, runtimeContext, uuid=None, @@ -280,22 +307,7 @@ def upload_workflow(arvRunner, tool, job_order, project_uuid, # Find the longest common prefix among all the file names. We'll # use this to recreate the directory structure in a keep # collection with correct relative references. - n = 7 - allmatch = True - if firstfile: - while allmatch: - n += 1 - for f in all_files: - if len(f)-1 < n: - n -= 1 - allmatch = False - break - if f[n] != firstfile[n]: - allmatch = False - break - - while firstfile[n] != "/": - n -= 1 + prefix = common_prefix(firstfile, all_files) col = arvados.collection.Collection(api_client=arvRunner.api) @@ -332,22 +344,22 @@ def upload_workflow(arvRunner, tool, job_order, project_uuid, update_refs(result, w, tool.doc_loader.expand_url, merged_map, jobmapper, runtimeContext, "", "") # Write the updated file to the collection. - with col.open(w[n+1:], "wt") as f: + with col.open(w[len(prefix):], "wt") as f: if export_as_json: json.dump(result, f, indent=4, separators=(',',': ')) else: yamlloader.dump(result, stream=f) # Also store a verbatim copy of the original files - with col.open(os.path.join("original", w[n+1:]), "wt") as f: + with col.open(os.path.join("original", w[len(prefix):]), "wt") as f: f.write(text) # Upload files referenced by $include directives, these are used # unchanged and don't need to be updated. for w in include_files: - with col.open(w[n+1:], "wb") as f1: - with col.open(os.path.join("original", w[n+1:]), "wb") as f3: + with col.open(w[len(prefix):], "wb") as f1: + with col.open(os.path.join("original", w[len(prefix):]), "wb") as f3: with open(uri_file_path(w), "rb") as f2: dat = f2.read(65536) while dat: @@ -361,7 +373,7 @@ def upload_workflow(arvRunner, tool, job_order, project_uuid, if git_info and git_info.get("http://arvados.org/cwl#gitDescribe"): toolname = "%s (%s)" % (toolname, git_info.get("http://arvados.org/cwl#gitDescribe")) - toolfile = tool.tool["id"][n+1:] + toolfile = tool.tool["id"][len(prefix):] properties = { "type": "workflow", diff --git a/sdk/cwl/tests/test_util.py b/sdk/cwl/tests/test_util.py index 1209b88d8e..183e0fcc69 100644 --- a/sdk/cwl/tests/test_util.py +++ b/sdk/cwl/tests/test_util.py @@ -11,6 +11,7 @@ import httplib2 from arvados_cwl.util import * from arvados.errors import ApiError +from arvados_cwl.arvworkflow import common_prefix class MockDateTime(datetime.datetime): @classmethod @@ -53,4 +54,19 @@ class TestUtil(unittest.TestCase): logger = mock.MagicMock() current_container = get_current_container(api, num_retries=0, logger=logger) - self.assertEqual(current_container, None) \ No newline at end of file + self.assertEqual(current_container, None) + + def test_common_prefix(self): + self.assertEqual(common_prefix("file:///foo/bar", ["file:///foo/bar/baz"]), "file:///foo/") + self.assertEqual(common_prefix("file:///foo", ["file:///foo", "file:///foo/bar", "file:///foo/bar/"]), "file:///") + self.assertEqual(common_prefix("file:///foo/", ["file:///foo/", "file:///foo/bar", "file:///foo/bar/"]), "file:///foo/") + self.assertEqual(common_prefix("file:///foo/bar", ["file:///foo/bar", "file:///foo/baz", "file:///foo/quux/q2"]), "file:///foo/") + self.assertEqual(common_prefix("file:///foo/bar/", ["file:///foo/bar/", "file:///foo/baz", "file:///foo/quux/q2"]), "file:///foo/") + self.assertEqual(common_prefix("file:///foo/bar/splat", ["file:///foo/bar/splat", "file:///foo/baz", "file:///foo/quux/q2"]), "file:///foo/") + self.assertEqual(common_prefix("file:///foo/bar/splat", ["file:///foo/bar/splat", "file:///nope", "file:///foo/quux/q2"]), "file:///") + self.assertEqual(common_prefix("file:///blub/foo", ["file:///blub/foo", "file:///blub/foo/bar", "file:///blub/foo/bar/"]), "file:///blub/") + + # sanity check, the subsequent code strips off the prefix so + # just confirm the logic doesn't have a fencepost error + prefix = "file:///" + self.assertEqual("file:///foo/bar"[len(prefix):], "foo/bar")