From e19cf51d9f79f4e5301c082d32bdec1d46930a6a Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Sat, 22 Jun 2024 21:31:05 -0400 Subject: [PATCH] 21891: Use collectionfs Snapshot/Splice instead of Extract. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- lib/crunchrun/copier.go | 60 ++++++++++++++++++++++++++++-------- lib/crunchrun/copier_test.go | 12 ++++++-- 2 files changed, 58 insertions(+), 14 deletions(-) diff --git a/lib/crunchrun/copier.go b/lib/crunchrun/copier.go index b411948733..afdd686919 100644 --- a/lib/crunchrun/copier.go +++ b/lib/crunchrun/copier.go @@ -17,7 +17,6 @@ import ( "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/keepclient" - "git.arvados.org/arvados.git/sdk/go/manifest" "github.com/bmatcuk/doublestar/v4" ) @@ -66,7 +65,7 @@ type copier struct { files []filetodo manifest string - manifestCache map[string]*manifest.Manifest + manifestCache map[string]string } // Copy copies data as needed, and returns a new manifest. @@ -370,7 +369,10 @@ func (cp *copier) walkMount(dest, src string, maxSymlinks int, walkMountsBelow b if err != nil { return err } - cp.manifest += mft.Extract(srcRelPath, dest).Text + err = cp.copyFromCollection(dest, &arvados.Collection{ManifestText: mft}, srcRelPath) + if err != nil { + return err + } default: cp.logger.Printf("copying %q", outputRelPath) hostRoot, err := cp.hostRoot(srcRoot) @@ -387,8 +389,10 @@ func (cp *copier) walkMount(dest, src string, maxSymlinks int, walkMountsBelow b if err != nil { return err } - mft := manifest.Manifest{Text: coll.ManifestText} - cp.manifest += mft.Extract(srcRelPath, dest).Text + err = cp.copyFromCollection(dest, &coll, srcRelPath) + if err != nil { + return err + } } if walkMountsBelow { return cp.walkMountsBelow(dest, src) @@ -396,6 +400,40 @@ func (cp *copier) walkMount(dest, src string, maxSymlinks int, walkMountsBelow b return nil } +func (cp *copier) copyFromCollection(dest string, coll *arvados.Collection, srcRelPath string) error { + tmpfs, err := coll.FileSystem(cp.client, cp.keepClient) + if err != nil { + return err + } + snap, err := arvados.Snapshot(tmpfs, srcRelPath) + if err != nil { + return err + } + tmpfs, err = (&arvados.Collection{}).FileSystem(cp.client, cp.keepClient) + if err != nil { + return err + } + // Create ancestors of dest, if necessary. + for i, c := range dest { + if i > 0 && c == '/' { + err = tmpfs.Mkdir(dest[:i], 0777) + if err != nil && !os.IsExist(err) { + return err + } + } + } + err = arvados.Splice(tmpfs, dest, snap) + if err != nil { + return err + } + mtxt, err := tmpfs.MarshalManifest(".") + if err != nil { + return err + } + cp.manifest += mtxt + return nil +} + func (cp *copier) walkMountsBelow(dest, src string) error { for mnt, mntinfo := range cp.mounts { if !strings.HasPrefix(mnt, src+"/") { @@ -550,20 +588,18 @@ func (cp *copier) copyRegularFiles(m arvados.Mount) bool { return m.Kind == "text" || m.Kind == "json" || (m.Kind == "collection" && m.Writable) } -func (cp *copier) getManifest(pdh string) (*manifest.Manifest, error) { +func (cp *copier) getManifest(pdh string) (string, error) { if mft, ok := cp.manifestCache[pdh]; ok { return mft, nil } var coll arvados.Collection err := cp.client.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+pdh, nil, nil) if err != nil { - return nil, fmt.Errorf("error retrieving collection record for %q: %s", pdh, err) + return "", fmt.Errorf("error retrieving collection record for %q: %s", pdh, err) } - mft := &manifest.Manifest{Text: coll.ManifestText} if cp.manifestCache == nil { - cp.manifestCache = map[string]*manifest.Manifest{pdh: mft} - } else { - cp.manifestCache[pdh] = mft + cp.manifestCache = make(map[string]string) } - return mft, nil + cp.manifestCache[pdh] = coll.ManifestText + return coll.ManifestText, nil } diff --git a/lib/crunchrun/copier_test.go b/lib/crunchrun/copier_test.go index 2413833395..6baa4da7cb 100644 --- a/lib/crunchrun/copier_test.go +++ b/lib/crunchrun/copier_test.go @@ -15,8 +15,9 @@ import ( "syscall" "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/arvadosclient" "git.arvados.org/arvados.git/sdk/go/arvadostest" - "git.arvados.org/arvados.git/sdk/go/manifest" + "git.arvados.org/arvados.git/sdk/go/keepclient" "github.com/sirupsen/logrus" check "gopkg.in/check.v1" ) @@ -31,8 +32,15 @@ type copierSuite struct { func (s *copierSuite) SetUpTest(c *check.C) { tmpdir := c.MkDir() s.log = bytes.Buffer{} + + cl, err := arvadosclient.MakeArvadosClient() + c.Assert(err, check.IsNil) + kc, err := keepclient.MakeKeepClient(cl) + c.Assert(err, check.IsNil) + s.cp = copier{ client: arvados.NewClientFromEnv(), + keepClient: kc, hostOutputDir: tmpdir, ctrOutputDir: "/ctr/outdir", mounts: map[string]arvados.Mount{ @@ -322,7 +330,7 @@ func (s *copierSuite) testCopyFromLargeCollection(c *check.C, writable bool) { s.cp.bindmounts = map[string]bindmount{ "/fakecollection": bindmount{HostPath: bindtmp, ReadOnly: !writable}, } - s.cp.manifestCache = map[string]*manifest.Manifest{pdh: &manifest.Manifest{Text: mtxt}} + s.cp.manifestCache = map[string]string{pdh: mtxt} err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true) c.Check(err, check.IsNil) c.Log(s.log.String()) -- 2.30.2