"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
"git.curoverse.com/arvados.git/sdk/go/arvadostest"
"git.curoverse.com/arvados.git/sdk/go/keepclient"
+ "git.curoverse.com/arvados.git/services/datamanager/collection"
+ "git.curoverse.com/arvados.git/services/datamanager/summary"
"io/ioutil"
"net/http"
"os"
"os/exec"
+ "path"
"regexp"
"strings"
"testing"
arvadostest.StartAPI()
arvadostest.StartKeep(2, false)
- arv = makeArvadosClient()
+ var err error
+ arv, err = arvadosclient.MakeArvadosClient()
+ if err != nil {
+ t.Fatalf("Error making arvados client: %s", err)
+ }
arv.ApiToken = arvadostest.DataManagerToken
// keep client
keepClient = &keepclient.KeepClient{
Arvados: &arv,
Want_replicas: 2,
- Using_proxy: true,
Client: &http.Client{},
}
// discover keep services
- if err := keepClient.DiscoverKeepServers(); err != nil {
+ if err = keepClient.DiscoverKeepServers(); err != nil {
t.Fatalf("Error discovering keep services: %s", err)
}
keepServers = []string{}
func TearDownDataManagerTest(t *testing.T) {
arvadostest.StopKeep(2)
arvadostest.StopAPI()
+ summary.WriteDataTo = ""
+ collection.HeapProfileFilename = ""
}
func putBlock(t *testing.T, data string) string {
return false
}
-/*
-Test env uses two keep volumes. The volume names can be found by reading the files
- ARVADOS_HOME/tmp/keep0.volume and ARVADOS_HOME/tmp/keep1.volume
-
-The keep volumes are of the dir structure:
- volumeN/subdir/locator
-*/
+// Test env uses two keep volumes. The volume names can be found by reading the files
+// ARVADOS_HOME/tmp/keep0.volume and ARVADOS_HOME/tmp/keep1.volume
+//
+// The keep volumes are of the dir structure: volumeN/subdir/locator
func backdateBlocks(t *testing.T, oldUnusedBlockLocators []string) {
// First get rid of any size hints in the locators
var trimmedBlockLocators []string
}
}
-/*
-Create some blocks and backdate some of them.
-Also create some collections and delete some of them.
-Verify block indexes.
-*/
+// Create some blocks and backdate some of them.
+// Also create some collections and delete some of them.
+// Verify block indexes.
func TestPutAndGetBlocks(t *testing.T) {
defer TearDownDataManagerTest(t)
SetupDataManagerTest(t)
}
}
-// Create a collection with multiple streams and blocks using arv-put
-func createMultiBlockCollection(t *testing.T, data string, numStreams, numBlocks int) string {
- tempdir, err := ioutil.TempDir(os.TempDir(), "temp-test-dir")
+func TestPutAndGetBlocks_NoErrorDuringSingleRun(t *testing.T) {
+ testOldBlocksNotDeletedOnDataManagerError(t, "", "", false, false)
+}
+
+func createBadPath(t *testing.T) (badpath string) {
+ tempdir, err := ioutil.TempDir("", "bad")
+ if err != nil {
+ t.Fatalf("Could not create temporary directory for bad path: %v", err)
+ }
+ badpath = path.Join(tempdir, "bad")
+ return
+}
+
+func destroyBadPath(t *testing.T, badpath string) {
+ tempdir := path.Join(badpath, "..")
+ err := os.Remove(tempdir)
if err != nil {
- t.Fatalf("Error creating tempdir %s", err)
+ t.Fatalf("Could not remove bad path temporary directory %v: %v", tempdir, err)
}
- defer os.Remove(tempdir)
+}
+
+func TestPutAndGetBlocks_ErrorDuringGetCollectionsBadWriteTo(t *testing.T) {
+ badpath := createBadPath(t)
+ defer destroyBadPath(t, badpath)
+ testOldBlocksNotDeletedOnDataManagerError(t, path.Join(badpath, "writetofile"), "", true, true)
+}
- for i := 0; i < numStreams; i++ {
- stream, err := ioutil.TempDir(tempdir, "stream")
+func TestPutAndGetBlocks_ErrorDuringGetCollectionsBadHeapProfileFilename(t *testing.T) {
+ badpath := createBadPath(t)
+ defer destroyBadPath(t, badpath)
+ testOldBlocksNotDeletedOnDataManagerError(t, "", path.Join(badpath, "heapprofilefile"), true, true)
+}
+
+// Create some blocks and backdate some of them.
+// Run datamanager while producing an error condition.
+// Verify that the blocks are hence not deleted.
+func testOldBlocksNotDeletedOnDataManagerError(t *testing.T, writeDataTo string, heapProfileFile string, expectError bool, expectOldBlocks bool) {
+ defer TearDownDataManagerTest(t)
+ SetupDataManagerTest(t)
+
+ // Put some blocks and backdate them.
+ var oldUnusedBlockLocators []string
+ oldUnusedBlockData := "this block will have older mtime"
+ for i := 0; i < 5; i++ {
+ oldUnusedBlockLocators = append(oldUnusedBlockLocators, putBlock(t, fmt.Sprintf("%s%d", oldUnusedBlockData, i)))
+ }
+ backdateBlocks(t, oldUnusedBlockLocators)
+
+ // Run data manager
+ summary.WriteDataTo = writeDataTo
+ collection.HeapProfileFilename = heapProfileFile
+
+ err := singlerun(arv)
+ if !expectError {
if err != nil {
- t.Fatalf("Error creating stream tempdir %s", err)
+ t.Fatalf("Got an error during datamanager singlerun: %v", err)
+ }
+ } else {
+ if err == nil {
+ t.Fatalf("Expected error during datamanager singlerun")
}
- defer os.Remove(stream)
+ }
+ waitUntilQueuesFinishWork(t)
- for j := 0; j < numBlocks; j++ {
- tempfile, err := ioutil.TempFile(stream, "temp-test-file")
- if err != nil {
- t.Fatalf("Error creating tempfile %s", err)
- }
- defer os.Remove(tempfile.Name())
+ // Get block indexes and verify that all backdated blocks are not/deleted as expected
+ if expectOldBlocks {
+ verifyBlocks(t, nil, oldUnusedBlockLocators, 2)
+ } else {
+ verifyBlocks(t, oldUnusedBlockLocators, nil, 2)
+ }
+}
+
+// Create a collection with multiple streams and blocks
+func createMultiStreamBlockCollection(t *testing.T, data string, numStreams, numBlocks int) (string, []string) {
+ defer switchToken(arvadostest.AdminToken)()
- _, err = tempfile.Write([]byte(fmt.Sprintf("%s%d", data, i)))
+ manifest := ""
+ locators := make(map[string]bool)
+ for s := 0; s < numStreams; s++ {
+ manifest += fmt.Sprintf("./stream%d ", s)
+ for b := 0; b < numBlocks; b++ {
+ locator, _, err := keepClient.PutB([]byte(fmt.Sprintf("%s in stream %d and block %d", data, s, b)))
if err != nil {
- t.Fatalf("Error writing to tempfile %v", err)
+ t.Fatalf("Error creating block %d in stream %d: %v", b, s, err)
}
+ locators[strings.Split(locator, "+A")[0]] = true
+ manifest += locator + " "
}
+ manifest += "0:1:dummyfile.txt\n"
}
- output, err := exec.Command("arv-put", tempdir).Output()
+ collection := make(Dict)
+ err := arv.Create("collections",
+ arvadosclient.Dict{"collection": arvadosclient.Dict{"manifest_text": manifest}},
+ &collection)
+
if err != nil {
- t.Fatalf("Error running arv-put %s", err)
+ t.Fatalf("Error creating collection %v", err)
}
- uuid := string(output[0:27]) // trim terminating char
- return uuid
+ var locs []string
+ for k := range locators {
+ locs = append(locs, k)
+ }
+
+ return collection["uuid"].(string), locs
}
-func geLocatorsFromCollection(t *testing.T, uuid string) []string {
- manifest := getCollection(t, uuid)["manifest_text"].(string)
+// Create collection with multiple streams and blocks; backdate the blocks and but do not delete the collection.
+// Also, create stray block and backdate it.
+// After datamanager run: expect blocks from the collection, but not the stray block.
+func TestManifestWithMultipleStreamsAndBlocks(t *testing.T) {
+ testManifestWithMultipleStreamsAndBlocks(t, 100, 10, "", false)
+}
- locators := []string{}
- splits := strings.Split(manifest, " ")
- for _, locator := range splits {
- match := locatorMatcher.FindStringSubmatch(locator)
- if match != nil {
- locators = append(locators, match[1]+"+"+match[2])
- }
- }
+// Same test as TestManifestWithMultipleStreamsAndBlocks with an additional
+// keepstore of a service type other than "disk". Only the "disk" type services
+// will be indexed by datamanager and hence should work the same way.
+func TestManifestWithMultipleStreamsAndBlocks_WithOneUnsupportedKeepServer(t *testing.T) {
+ testManifestWithMultipleStreamsAndBlocks(t, 2, 2, "testblobstore", false)
+}
- return locators
+// Test datamanager with dry-run. Expect no block to be deleted.
+func TestManifestWithMultipleStreamsAndBlocks_DryRun(t *testing.T) {
+ testManifestWithMultipleStreamsAndBlocks(t, 2, 2, "", true)
}
-/*
- Create collection with multiple streams and blocks; backdate the blocks and delete collection.
- Create another collection with multiple streams and blocks; backdate it's first block and delete the collection
- After datamanager run: expect only the undeleted blocks from second collection, and none of the backdated blocks.
-*/
-func TestPutAndGetCollectionsWithMultipleBlocks(t *testing.T) {
+func testManifestWithMultipleStreamsAndBlocks(t *testing.T, numStreams, numBlocks int, createExtraKeepServerWithType string, isDryRun bool) {
defer TearDownDataManagerTest(t)
SetupDataManagerTest(t)
- // Put some blocks which will be backdated later on
- collectionWithOldBlocks := createMultiBlockCollection(t, "to be deleted collection with old blocks", 5, 5)
- oldBlocks := geLocatorsFromCollection(t, collectionWithOldBlocks)
+ // create collection whose blocks will be backdated
+ collectionWithOldBlocks, oldBlocks := createMultiStreamBlockCollection(t, "old block", numStreams, numBlocks)
+ if collectionWithOldBlocks == "" {
+ t.Fatalf("Failed to create collection with %d blocks", numStreams*numBlocks)
+ }
+ if len(oldBlocks) != numStreams*numBlocks {
+ t.Fatalf("Not all blocks are created: expected %v, found %v", 1000, len(oldBlocks))
+ }
- collectionWithNewerBlocks := createMultiBlockCollection(t, "to be deleted collection with newer and older blocks", 5, 5)
- newerBlocks := geLocatorsFromCollection(t, collectionWithNewerBlocks)
+ // create a stray block that will be backdated
+ strayOldBlock := putBlock(t, "this stray block is old")
- expected := []string{}
+ expected := []string{strayOldBlock}
expected = append(expected, oldBlocks...)
- expected = append(expected, newerBlocks...)
verifyBlocks(t, nil, expected, 2)
- // Backdate old blocks and delete the collection
+ // Backdate old blocks; but the collection still references these blocks
backdateBlocks(t, oldBlocks)
- deleteCollection(t, collectionWithOldBlocks)
- // Backdate first block from the newer blocks and delete the collection; the rest are still be reachable
- backdateBlocks(t, newerBlocks[0:1])
- deleteCollection(t, collectionWithNewerBlocks)
+ // also backdate the stray old block
+ backdateBlocks(t, []string{strayOldBlock})
+
+ // If requested, create an extra keepserver with the given type
+ // This should be ignored during indexing and hence not change the datamanager outcome
+ var extraKeepServerUUID string
+ if createExtraKeepServerWithType != "" {
+ extraKeepServerUUID = addExtraKeepServer(t, createExtraKeepServerWithType)
+ defer deleteExtraKeepServer(extraKeepServerUUID)
+ }
// run datamanager
+ dryRun = isDryRun
dataManagerSingleRun(t)
- notExpected := []string{}
- notExpected = append(notExpected, oldBlocks...)
- notExpected = append(notExpected, newerBlocks[0])
+ if dryRun {
+ // verify that all blocks, including strayOldBlock, are still to be found
+ verifyBlocks(t, nil, expected, 2)
+ } else {
+ // verify that strayOldBlock is not to be found, but the collections blocks are still there
+ verifyBlocks(t, []string{strayOldBlock}, oldBlocks, 2)
+ }
+}
- verifyBlocks(t, notExpected, newerBlocks[1:], 2)
+// Add one more keepstore with the given service type
+func addExtraKeepServer(t *testing.T, serviceType string) string {
+ defer switchToken(arvadostest.AdminToken)()
+
+ extraKeepService := make(arvadosclient.Dict)
+ err := arv.Create("keep_services",
+ arvadosclient.Dict{"keep_service": arvadosclient.Dict{
+ "service_host": "localhost",
+ "service_port": "21321",
+ "service_ssl_flag": false,
+ "service_type": serviceType}},
+ &extraKeepService)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ return extraKeepService["uuid"].(string)
+}
+
+func deleteExtraKeepServer(uuid string) {
+ defer switchToken(arvadostest.AdminToken)()
+ arv.Delete("keep_services", uuid, nil, nil)
}