From: Tom Clegg Date: Thu, 16 Nov 2017 21:19:58 +0000 (-0500) Subject: 12483: Compress adjacent segments when writing filetokens. X-Git-Tag: 1.1.2~38^2~31 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/5759c046e8d53818689774abde541449aeb3e0fb 12483: Compress adjacent segments when writing filetokens. "0:100:foo 100:100:foo 200:100:foo" --> "0:300:foo" Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/sdk/go/arvados/collection_fs.go b/sdk/go/arvados/collection_fs.go index 3cae9cbbb5..beddb9249b 100644 --- a/sdk/go/arvados/collection_fs.go +++ b/sdk/go/arvados/collection_fs.go @@ -725,11 +725,18 @@ func (dn *dirnode) marshalManifest(prefix string) (string, error) { } else { blocks = append(blocks, e.locator) } - segments = append(segments, m1segment{ + next := m1segment{ name: name, offset: streamLen + int64(e.offset), length: int64(e.length), - }) + } + if prev := len(segments) - 1; prev >= 0 && + segments[prev].name == name && + segments[prev].offset+segments[prev].length == next.offset { + segments[prev].length += next.length + } else { + segments = append(segments, next) + } streamLen += int64(e.size) default: // This can't happen: we diff --git a/sdk/go/arvados/collection_fs_test.go b/sdk/go/arvados/collection_fs_test.go index 324ece11ce..6e8e615254 100644 --- a/sdk/go/arvados/collection_fs_test.go +++ b/sdk/go/arvados/collection_fs_test.go @@ -738,7 +738,7 @@ func (s *CollectionFSSuite) TestFlushFullBlocks(c *check.C) { c.Check(currentMemExtents(), check.HasLen, 1) m, err := fs.MarshalManifest(".") - c.Check(m, check.Not(check.Equals), "") + c.Check(m, check.Matches, `[^:]* 0:50000:50K\n`) c.Check(err, check.IsNil) c.Check(currentMemExtents(), check.HasLen, 0) }