For running our tests, FUSE must be configured with the
[arvados-dev.git] / compute-image-cleaner / compute-image-cleaner.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package main
6
7 import (
8         "context"
9         "flag"
10         "fmt"
11         "log"
12         "net/url"
13         "os"
14         "regexp"
15         "sort"
16         "time"
17
18         "github.com/arvados/arvados-dev/compute-image-cleaner/config"
19
20         "github.com/Azure/azure-pipeline-go/pipeline"
21         "github.com/Azure/azure-storage-blob-go/azblob"
22
23         "code.cloudfoundry.org/bytefmt"
24 )
25
26 type blob struct {
27         name              string
28         created           time.Time
29         contentLength     int64
30         deletionCandidate bool
31 }
32
33 func prepAzBlob(storageKey string, account string, container string) (p pipeline.Pipeline, containerURL azblob.ContainerURL) {
34         // Create a default request pipeline using your storage account name and account key.
35         credential, err := azblob.NewSharedKeyCredential(account, storageKey)
36         if err != nil {
37                 log.Fatal("Invalid credentials with error: " + err.Error())
38         }
39         p = azblob.NewPipeline(credential, azblob.PipelineOptions{})
40         // From the Azure portal, get your storage account blob service URL endpoint.
41         URL, _ := url.Parse(fmt.Sprintf("https://%s.blob.core.windows.net/%s", account, container))
42
43         // Create a ContainerURL object that wraps the container URL and a request
44         // pipeline to make requests.
45         containerURL = azblob.NewContainerURL(*URL, p)
46
47         return
48 }
49
50 func loadBlobs(p pipeline.Pipeline, containerURL azblob.ContainerURL) (blobs []blob, blobNames map[string]*blob) {
51         blobNames = make(map[string]*blob)
52
53         ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
54         defer cancel()
55
56         for marker := (azblob.Marker{}); marker.NotDone(); {
57                 // Get a result segment starting with the blob indicated by the current Marker.
58                 listBlob, err := containerURL.ListBlobsFlatSegment(ctx, marker, azblob.ListBlobsSegmentOptions{})
59                 if err != nil {
60                         log.Fatal("Error getting blob list: " + err.Error())
61                 }
62
63                 // ListBlobs returns the start of the next segment; you MUST use this to get
64                 // the next segment (after processing the current result segment).
65                 marker = listBlob.NextMarker
66
67                 // Process the blobs returned in this result segment (if the segment is empty, the loop body won't execute)
68                 for _, blobInfo := range listBlob.Segment.BlobItems {
69                         blobs = append(blobs, blob{name: blobInfo.Name, created: *blobInfo.Properties.CreationTime, contentLength: *blobInfo.Properties.ContentLength})
70                         blobNames[blobInfo.Name] = &blobs[len(blobs)-1]
71                 }
72         }
73         sort.Slice(blobs, func(i, j int) bool { return blobs[i].created.After(blobs[j].created) })
74
75         return
76 }
77
78 func weedBlobs(blobs []blob, blobNames map[string]*blob, containerURL azblob.ContainerURL, account string, container string, doIt bool) {
79         ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
80         defer cancel()
81
82         var pairedFileName string
83         skipCount := 10
84         t := time.Now()
85         thirtyDaysAgo := t.AddDate(0, 0, -30)
86
87         // e.g. su92l-compute-osDisk.866eb426-8d1e-45ad-91be-2bb55b5a8147.vhd
88         vhd := regexp.MustCompile(`^(.*)-compute-osDisk\.(.*)\.vhd$`)
89         // e.g. su92l-compute-vmTemplate.866eb426-8d1e-45ad-91be-2bb55b5a8147.json
90         json := regexp.MustCompile(`^(.*)-compute-vmTemplate\.(.*)\.json$`)
91
92         for i, blob := range blobs {
93                 matches := vhd.FindStringSubmatch(blob.name)
94                 if len(matches) > 1 {
95                         // osDisk image file
96                         pairedFileName = matches[1] + "-compute-vmTemplate." + matches[2] + ".json"
97                 } else {
98                         matches := json.FindStringSubmatch(blob.name)
99                         if len(matches) > 1 {
100                                 // vmTemplate file
101                                 pairedFileName = matches[1] + "-compute-osDisk." + matches[2] + ".vhd"
102                         } else {
103                                 log.Println("Skipping blob because name does not match a known file name pattern:", blob.name, " ", blob.created)
104                                 continue
105                         }
106                 }
107                 if blob.created.After(thirtyDaysAgo) {
108                         log.Println("Skipping blob because it was created less than 30 days ago:", blob.name, " ", blob.created)
109                         skipCount = skipCount - 1
110                         continue
111                 }
112                 if skipCount > 0 {
113                         log.Println("Skipping blob because it's in the top 10 most recent list:", blob.name, " ", blob.created)
114                         skipCount = skipCount - 1
115                         continue
116                 }
117                 if _, ok := blobNames[pairedFileName]; !ok {
118                         log.Println("Warning: paired file", pairedFileName, "not found for blob", blob.name, " ", blob.created)
119                 }
120                 blobs[i].deletionCandidate = true
121         }
122
123         var reclaimedSpace, otherSpace int64
124
125         for _, blob := range blobs {
126                 if blob.deletionCandidate {
127                         log.Println("Candidate for deletion:", blob.name, " ", blob.created)
128                         reclaimedSpace = reclaimedSpace + blob.contentLength
129
130                         if doIt {
131                                 log.Println("Deleting:", blob.name, " ", blob.created)
132                                 blockBlobURL := containerURL.NewBlockBlobURL(blob.name)
133                                 result, err := blockBlobURL.Delete(ctx, azblob.DeleteSnapshotsOptionInclude, azblob.BlobAccessConditions{})
134                                 if err != nil {
135                                         log.Println(result)
136                                         log.Fatal("Error deleting blob: ", err.Error(), "\n", result)
137                                 }
138                         }
139                 } else {
140                         otherSpace = otherSpace + blob.contentLength
141                 }
142         }
143
144         if doIt {
145                 log.Println("Reclaimed", bytefmt.ByteSize(uint64(reclaimedSpace)), "or", reclaimedSpace, "bytes.")
146         } else {
147                 log.Println("Deletion not requested. Able to reclaim", bytefmt.ByteSize(uint64(reclaimedSpace)), "or", reclaimedSpace, "bytes.")
148         }
149         log.Println("Kept", bytefmt.ByteSize(uint64(otherSpace)), "or", otherSpace, "bytes.")
150
151 }
152
153 func loadStorageAccountKey(resourceGroup string, account string) (key string) {
154         ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
155         defer cancel()
156
157         storageClient := getStorageAccountsClient()
158         keys, err := storageClient.ListKeys(ctx, resourceGroup, account)
159         if err != nil {
160                 log.Fatal("Error getting storage account key:", err.Error())
161         }
162
163         key = *(*keys.Keys)[0].Value
164
165         return
166 }
167
168 func validateInputs() (resourceGroup string, account string, container string, doIt bool) {
169         err := config.ParseEnvironment()
170         if err != nil {
171                 log.Fatal("Unable to parse environment")
172         }
173
174         if config.ClientID() == "" || config.ClientSecret() == "" || config.TenantID() == "" || config.SubscriptionID() == "" {
175                 log.Fatal("Please make sure the environment variables AZURE_CLIENT_ID, AZURE_CLIENT_SECRET, AZURE_TENANT_ID and AZURE_SUBSCRIPTION_ID are set")
176         }
177
178         flags := flag.NewFlagSet("compute-image-cleaner", flag.ExitOnError)
179         flags.StringVar(&resourceGroup, "resourceGroup", "", "Name of the Azure resource group")
180         flags.StringVar(&account, "account", "", "Name of the Azure storage account")
181         flags.StringVar(&container, "container", "", "Name of the container in the Azure storage account")
182         flags.BoolVar(&doIt, "delete", false, "Delete blobs that meet criteria (default: false)")
183         flags.Usage = func() { usage(flags) }
184         err = flags.Parse(os.Args[1:])
185
186         if err != nil || resourceGroup == "" || account == "" || container == "" {
187                 usage(flags)
188                 os.Exit(1)
189         }
190
191         return
192 }
193
194 func main() {
195         resourceGroup, account, container, doIt := validateInputs()
196         storageKey := loadStorageAccountKey(resourceGroup, account)
197         p, containerURL := prepAzBlob(storageKey, account, container)
198
199         blobs, blobNames := loadBlobs(p, containerURL)
200         weedBlobs(blobs, blobNames, containerURL, account, container, doIt)
201 }