Merge branch '14018-acr-set-container-properties' into main
[arvados.git] / tools / test-collection-create / test-collection-create.py
1 #!/usr/bin/env python3
2 #
3 # Copyright (C) The Arvados Authors. All rights reserved.
4 #
5 # SPDX-License-Identifier: CC-BY-SA-3.0
6
7 import argparse
8 import logging
9 import random
10 import string
11 import sys
12
13 import arvados
14 import arvados.collection
15
16 logger = logging.getLogger('arvados.test_collection_create')
17 logger.setLevel(logging.INFO)
18
19 max_manifest_size = 127*1024*1024
20
21 opts = argparse.ArgumentParser(add_help=False)
22 opts.add_argument('--min-files', type=int, default=30000, help="""
23 Minimum number of files on each directory. Default: 30000.
24 """)
25 opts.add_argument('--max-files', type=int, default=30000, help="""
26 Maximum number of files on each directory. Default: 30000.
27 """)
28 opts.add_argument('--min-depth', type=int, default=0, help="""
29 Minimum depth for the created tree structure. Default: 0.
30 """)
31 opts.add_argument('--max-depth', type=int, default=0, help="""
32 Maximum depth for the created tree structure. Default: 0.
33 """)
34 opts.add_argument('--min-subdirs', type=int, default=1, help="""
35 Minimum number of subdirectories created at every depth level. Default: 1.
36 """)
37 opts.add_argument('--max-subdirs', type=int, default=10, help="""
38 Maximum number of subdirectories created at every depth level. Default: 10.
39 """)
40 opts.add_argument('--debug', action='store_true', default=False, help="""
41 Sets logging level to DEBUG.
42 """)
43
44 arg_parser = argparse.ArgumentParser(
45     description='Create a collection with garbage data for testing purposes.',
46     parents=[opts])
47
48 adjectives = ['abandoned','able','absolute','adorable','adventurous','academic',
49     'acceptable','acclaimed','accomplished','accurate','aching','acidic','acrobatic',
50     'active','actual','adept','admirable','admired','adolescent','adorable','adored',
51     'advanced','afraid','affectionate','aged','aggravating','aggressive','agile',
52     'agitated','agonizing','agreeable','ajar','alarmed','alarming','alert','alienated',
53     'alive','all','altruistic','amazing','ambitious','ample','amused','amusing','anchored',
54     'ancient','angelic','angry','anguished','animated','annual','another','antique',
55     'anxious','any','apprehensive','appropriate','apt','arctic','arid','aromatic','artistic',
56     'ashamed','assured','astonishing','athletic','attached','attentive','attractive',
57     'austere','authentic','authorized','automatic','avaricious','average','aware','awesome',
58     'awful','awkward','babyish','bad','back','baggy','bare','barren','basic','beautiful',
59     'belated','beloved','beneficial','better','best','bewitched','big','big-hearted',
60     'biodegradable','bite-sized','bitter','black','black-and-white','bland','blank',
61     'blaring','bleak','blind','blissful','blond','blue','blushing','bogus','boiling',
62     'bold','bony','boring','bossy','both','bouncy','bountiful','bowed','brave','breakable',
63     'brief','bright','brilliant','brisk','broken','bronze','brown','bruised','bubbly',
64     'bulky','bumpy','buoyant','burdensome','burly','bustling','busy','buttery','buzzing',
65     'calculating','calm','candid','canine','capital','carefree','careful','careless',
66     'caring','cautious','cavernous','celebrated','charming','cheap','cheerful','cheery',
67     'chief','chilly','chubby','circular','classic','clean','clear','clear-cut','clever',
68     'close','closed','cloudy','clueless','clumsy','cluttered','coarse','cold','colorful',
69     'colorless','colossal','comfortable','common','compassionate','competent','complete',
70     'complex','complicated','composed','concerned','concrete','confused','conscious',
71     'considerate','constant','content','conventional','cooked','cool','cooperative',
72     'coordinated','corny','corrupt','costly','courageous','courteous','crafty','crazy',
73     'creamy','creative','creepy','criminal','crisp','critical','crooked','crowded',
74     'cruel','crushing','cuddly','cultivated','cultured','cumbersome','curly','curvy',
75     'cute','cylindrical','damaged','damp','dangerous','dapper','daring','darling','dark',
76     'dazzling','dead','deadly','deafening','dear','dearest','decent','decimal','decisive',
77     'deep','defenseless','defensive','defiant','deficient','definite','definitive','delayed',
78     'delectable','delicious','delightful','delirious','demanding','dense','dental',
79     'dependable','dependent','descriptive','deserted','detailed','determined','devoted',
80     'different','difficult','digital','diligent','dim','dimpled','dimwitted','direct',
81     'disastrous','discrete','disfigured','disgusting','disloyal','dismal','distant',
82     'downright','dreary','dirty','disguised','dishonest','dismal','distant','distinct',
83     'distorted','dizzy','dopey','doting','double','downright','drab','drafty','dramatic',
84     'dreary','droopy','dry','dual','dull','dutiful','each','eager','earnest','early',
85     'easy','easy-going','ecstatic','edible','educated','elaborate','elastic','elated',
86     'elderly','electric','elegant','elementary','elliptical','embarrassed','embellished',
87     'eminent','emotional','empty','enchanted','enchanting','energetic','enlightened',
88     'enormous','enraged','entire','envious','equal','equatorial','essential','esteemed',
89     'ethical','euphoric','even','evergreen','everlasting','every','evil','exalted',
90     'excellent','exemplary','exhausted','excitable','excited','exciting','exotic',
91     'expensive','experienced','expert','extraneous','extroverted','extra-large','extra-small',
92     'fabulous','failing','faint','fair','faithful','fake','false','familiar','famous',
93     'fancy','fantastic','far','faraway','far-flung','far-off','fast','fat','fatal',
94     'fatherly','favorable','favorite','fearful','fearless','feisty','feline','female',
95     'feminine','few','fickle','filthy','fine','finished','firm','first','firsthand',
96     'fitting','fixed','flaky','flamboyant','flashy','flat','flawed','flawless','flickering',
97     'flimsy','flippant','flowery','fluffy','fluid','flustered','focused','fond','foolhardy',
98     'foolish','forceful','forked','formal','forsaken','forthright','fortunate','fragrant',
99     'frail','frank','frayed','free','French','fresh','frequent','friendly','frightened',
100     'frightening','frigid','frilly','frizzy','frivolous','front','frosty','frozen',
101     'frugal','fruitful','full','fumbling','functional','funny','fussy','fuzzy','gargantuan',
102     'gaseous','general','generous','gentle','genuine','giant','giddy','gigantic','gifted',
103     'giving','glamorous','glaring','glass','gleaming','gleeful','glistening','glittering',
104     'gloomy','glorious','glossy','glum','golden','good','good-natured','gorgeous',
105     'graceful','gracious','grand','grandiose','granular','grateful','grave','gray',
106     'great','greedy','green','gregarious','grim','grimy','gripping','grizzled','gross',
107     'grotesque','grouchy','grounded','growing','growling','grown','grubby','gruesome',
108     'grumpy','guilty','gullible','gummy','hairy','half','handmade','handsome','handy',
109     'happy','happy-go-lucky','hard','hard-to-find','harmful','harmless','harmonious',
110     'harsh','hasty','hateful','haunting','healthy','heartfelt','hearty','heavenly',
111     'heavy','hefty','helpful','helpless','hidden','hideous','high','high-level','hilarious',
112     'hoarse','hollow','homely','honest','honorable','honored','hopeful','horrible',
113     'hospitable','hot','huge','humble','humiliating','humming','humongous','hungry',
114     'hurtful','husky','icky','icy','ideal','idealistic','identical','idle','idiotic',
115     'idolized','ignorant','ill','illegal','ill-fated','ill-informed','illiterate',
116     'illustrious','imaginary','imaginative','immaculate','immaterial','immediate',
117     'immense','impassioned','impeccable','impartial','imperfect','imperturbable','impish',
118     'impolite','important','impossible','impractical','impressionable','impressive',
119     'improbable','impure','inborn','incomparable','incompatible','incomplete','inconsequential',
120     'incredible','indelible','inexperienced','indolent','infamous','infantile','infatuated',
121     'inferior','infinite','informal','innocent','insecure','insidious','insignificant',
122     'insistent','instructive','insubstantial','intelligent','intent','intentional',
123     'interesting','internal','international','intrepid','ironclad','irresponsible',
124     'irritating','itchy','jaded','jagged','jam-packed','jaunty','jealous','jittery',
125     'joint','jolly','jovial','joyful','joyous','jubilant','judicious','juicy','jumbo',
126     'junior','jumpy','juvenile','kaleidoscopic','keen','key','kind','kindhearted','kindly',
127     'klutzy','knobby','knotty','knowledgeable','knowing','known','kooky','kosher','lame',
128     'lanky','large','last','lasting','late','lavish','lawful','lazy','leading','lean',
129     'leafy','left','legal','legitimate','light','lighthearted','likable','likely','limited',
130     'limp','limping','linear','lined','liquid','little','live','lively','livid','loathsome',
131     'lone','lonely','long','long-term','loose','lopsided','lost','loud','lovable','lovely',
132     'loving','low','loyal','lucky','lumbering','luminous','lumpy','lustrous','luxurious',
133     'mad','made-up','magnificent','majestic','major','male','mammoth','married','marvelous',
134     'masculine','massive','mature','meager','mealy','mean','measly','meaty','medical',
135     'mediocre','medium','meek','mellow','melodic','memorable','menacing','merry','messy',
136     'metallic','mild','milky','mindless','miniature','minor','minty','miserable','miserly',
137     'misguided','misty','mixed','modern','modest','moist','monstrous','monthly','monumental',
138     'moral','mortified','motherly','motionless','mountainous','muddy','muffled','multicolored',
139     'mundane','murky','mushy','musty','muted','mysterious','naive','narrow','nasty','natural',
140     'naughty','nautical','near','neat','necessary','needy','negative','neglected','negligible',
141     'neighboring','nervous','new','next','nice','nifty','nimble','nippy','nocturnal','noisy',
142     'nonstop','normal','notable','noted','noteworthy','novel','noxious','numb','nutritious',
143     'nutty','obedient','obese','oblong','oily','oblong','obvious','occasional','odd',
144     'oddball','offbeat','offensive','official','old','old-fashioned','only','open','optimal',
145     'optimistic','opulent','orange','orderly','organic','ornate','ornery','ordinary',
146     'original','other','our','outlying','outgoing','outlandish','outrageous','outstanding',
147     'oval','overcooked','overdue','overjoyed','overlooked','palatable','pale','paltry',
148     'parallel','parched','partial','passionate','past','pastel','peaceful','peppery',
149     'perfect','perfumed','periodic','perky','personal','pertinent','pesky','pessimistic',
150     'petty','phony','physical','piercing','pink','pitiful','plain','plaintive','plastic',
151     'playful','pleasant','pleased','pleasing','plump','plush','polished','polite','political',
152     'pointed','pointless','poised','poor','popular','portly','posh','positive','possible',
153     'potable','powerful','powerless','practical','precious','present','prestigious',
154     'pretty','precious','previous','pricey','prickly','primary','prime','pristine','private',
155     'prize','probable','productive','profitable','profuse','proper','proud','prudent',
156     'punctual','pungent','puny','pure','purple','pushy','putrid','puzzled','puzzling',
157     'quaint','qualified','quarrelsome','quarterly','queasy','querulous','questionable',
158     'quick','quick-witted','quiet','quintessential','quirky','quixotic','quizzical',
159     'radiant','ragged','rapid','rare','rash','raw','recent','reckless','rectangular',
160     'ready','real','realistic','reasonable','red','reflecting','regal','regular',
161     'reliable','relieved','remarkable','remorseful','remote','repentant','required',
162     'respectful','responsible','repulsive','revolving','rewarding','rich','rigid',
163     'right','ringed','ripe','roasted','robust','rosy','rotating','rotten','rough',
164     'round','rowdy','royal','rubbery','rundown','ruddy','rude','runny','rural','rusty',
165     'sad','safe','salty','same','sandy','sane','sarcastic','sardonic','satisfied',
166     'scaly','scarce','scared','scary','scented','scholarly','scientific','scornful',
167     'scratchy','scrawny','second','secondary','second-hand','secret','self-assured',
168     'self-reliant','selfish','sentimental','separate','serene','serious','serpentine',
169     'several','severe','shabby','shadowy','shady','shallow','shameful','shameless',
170     'sharp','shimmering','shiny','shocked','shocking','shoddy','short','short-term',
171     'showy','shrill','shy','sick','silent','silky','silly','silver','similar','simple',
172     'simplistic','sinful','single','sizzling','skeletal','skinny','sleepy','slight',
173     'slim','slimy','slippery','slow','slushy','small','smart','smoggy','smooth','smug',
174     'snappy','snarling','sneaky','sniveling','snoopy','sociable','soft','soggy','solid',
175     'somber','some','spherical','sophisticated','sore','sorrowful','soulful','soupy',
176     'sour','Spanish','sparkling','sparse','specific','spectacular','speedy','spicy',
177     'spiffy','spirited','spiteful','splendid','spotless','spotted','spry','square',
178     'squeaky','squiggly','stable','staid','stained','stale','standard','starchy','stark',
179     'starry','steep','sticky','stiff','stimulating','stingy','stormy','straight','strange',
180     'steel','strict','strident','striking','striped','strong','studious','stunning',
181     'stupendous','stupid','sturdy','stylish','subdued','submissive','substantial','subtle',
182     'suburban','sudden','sugary','sunny','super','superb','superficial','superior',
183     'supportive','sure-footed','surprised','suspicious','svelte','sweaty','sweet','sweltering',
184     'swift','sympathetic','tall','talkative','tame','tan','tangible','tart','tasty',
185     'tattered','taut','tedious','teeming','tempting','tender','tense','tepid','terrible',
186     'terrific','testy','thankful','that','these','thick','thin','third','thirsty','this',
187     'thorough','thorny','those','thoughtful','threadbare','thrifty','thunderous','tidy',
188     'tight','timely','tinted','tiny','tired','torn','total','tough','traumatic','treasured',
189     'tremendous','tragic','trained','tremendous','triangular','tricky','trifling','trim',
190     'trivial','troubled','true','trusting','trustworthy','trusty','truthful','tubby',
191     'turbulent','twin','ugly','ultimate','unacceptable','unaware','uncomfortable',
192     'uncommon','unconscious','understated','unequaled','uneven','unfinished','unfit',
193     'unfolded','unfortunate','unhappy','unhealthy','uniform','unimportant','unique',
194     'united','unkempt','unknown','unlawful','unlined','unlucky','unnatural','unpleasant',
195     'unrealistic','unripe','unruly','unselfish','unsightly','unsteady','unsung','untidy',
196     'untimely','untried','untrue','unused','unusual','unwelcome','unwieldy','unwilling',
197     'unwitting','unwritten','upbeat','upright','upset','urban','usable','used','useful',
198     'useless','utilized','utter','vacant','vague','vain','valid','valuable','vapid',
199     'variable','vast','velvety','venerated','vengeful','verifiable','vibrant','vicious',
200     'victorious','vigilant','vigorous','villainous','violet','violent','virtual',
201     'virtuous','visible','vital','vivacious','vivid','voluminous','wan','warlike','warm',
202     'warmhearted','warped','wary','wasteful','watchful','waterlogged','watery','wavy',
203     'wealthy','weak','weary','webbed','wee','weekly','weepy','weighty','weird','welcome',
204     'well-documented','well-groomed','well-informed','well-lit','well-made','well-off',
205     'well-to-do','well-worn','wet','which','whimsical','whirlwind','whispered','white',
206     'whole','whopping','wicked','wide','wide-eyed','wiggly','wild','willing','wilted',
207     'winding','windy','winged','wiry','wise','witty','wobbly','woeful','wonderful',
208     'wooden','woozy','wordy','worldly','worn','worried','worrisome','worse','worst',
209     'worthless','worthwhile','worthy','wrathful','wretched','writhing','wrong','wry',
210     'yawning','yearly','yellow','yellowish','young','youthful','yummy','zany','zealous',
211     'zesty','zigzag']
212 nouns = ['people','history','way','art','world','information','map','two','family',
213     'government','health','system','computer','meat','year','thanks','music','person',
214     'reading','method','data','food','understanding','theory','law','bird','literature',
215     'problem','software','control','knowledge','power','ability','economics','love',
216     'internet','television','science','library','nature','fact','product','idea',
217     'temperature','investment','area','society','activity','story','industry','media',
218     'thing','oven','community','definition','safety','quality','development','language',
219     'management','player','variety','video','week','security','country','exam','movie',
220     'organization','equipment','physics','analysis','policy','series','thought','basis',
221     'boyfriend','direction','strategy','technology','army','camera','freedom','paper',
222     'environment','child','instance','month','truth','marketing','university','writing',
223     'article','department','difference','goal','news','audience','fishing','growth',
224     'income','marriage','user','combination','failure','meaning','medicine','philosophy',
225     'teacher','communication','night','chemistry','disease','disk','energy','nation',
226     'road','role','soup','advertising','location','success','addition','apartment','education',
227     'math','moment','painting','politics','attention','decision','event','property',
228     'shopping','student','wood','competition','distribution','entertainment','office',
229     'population','president','unit','category','cigarette','context','introduction',
230     'opportunity','performance','driver','flight','length','magazine','newspaper',
231     'relationship','teaching','cell','dealer','finding','lake','member','message','phone',
232     'scene','appearance','association','concept','customer','death','discussion','housing',
233     'inflation','insurance','mood','woman','advice','blood','effort','expression','importance',
234     'opinion','payment','reality','responsibility','situation','skill','statement','wealth',
235     'application','city','county','depth','estate','foundation','grandmother','heart',
236     'perspective','photo','recipe','studio','topic','collection','depression','imagination',
237     'passion','percentage','resource','setting','ad','agency','college','connection',
238     'criticism','debt','description','memory','patience','secretary','solution','administration',
239     'aspect','attitude','director','personality','psychology','recommendation','response',
240     'selection','storage','version','alcohol','argument','complaint','contract','emphasis',
241     'highway','loss','membership','possession','preparation','steak','union','agreement',
242     'cancer','currency','employment','engineering','entry','interaction','mixture','preference',
243     'region','republic','tradition','virus','actor','classroom','delivery','device',
244     'difficulty','drama','election','engine','football','guidance','hotel','owner',
245     'priority','protection','suggestion','tension','variation','anxiety','atmosphere',
246     'awareness','bath','bread','candidate','climate','comparison','confusion','construction',
247     'elevator','emotion','employee','employer','guest','height','leadership','mall','manager',
248     'operation','recording','sample','transportation','charity','cousin','disaster','editor',
249     'efficiency','excitement','extent','feedback','guitar','homework','leader','mom','outcome',
250     'permission','presentation','promotion','reflection','refrigerator','resolution','revenue',
251     'session','singer','tennis','basket','bonus','cabinet','childhood','church','clothes','coffee',
252     'dinner','drawing','hair','hearing','initiative','judgment','lab','measurement','mode','mud',
253     'orange','poetry','police','possibility','procedure','queen','ratio','relation','restaurant',
254     'satisfaction','sector','signature','significance','song','tooth','town','vehicle','volume','wife',
255     'accident','airport','appointment','arrival','assumption','baseball','chapter','committee',
256     'conversation','database','enthusiasm','error','explanation','farmer','gate','girl','hall',
257     'historian','hospital','injury','instruction','maintenance','manufacturer','meal','perception','pie',
258     'poem','presence','proposal','reception','replacement','revolution','river','son','speech','tea',
259     'village','warning','winner','worker','writer','assistance','breath','buyer','chest','chocolate',
260     'conclusion','contribution','cookie','courage','dad','desk','drawer','establishment','examination',
261     'garbage','grocery','honey','impression','improvement','independence','insect','inspection',
262     'inspector','king','ladder','menu','penalty','piano','potato','profession','professor','quantity',
263     'reaction','requirement','salad','sister','supermarket','tongue','weakness','wedding','affair',
264     'ambition','analyst','apple','assignment','assistant','bathroom','bedroom','beer','birthday',
265     'celebration','championship','cheek','client','consequence','departure','diamond','dirt','ear',
266     'fortune','friendship','funeral','gene','girlfriend','hat','indication','intention','lady',
267     'midnight','negotiation','obligation','passenger','pizza','platform','poet','pollution',
268     'recognition','reputation','shirt','sir','speaker','stranger','surgery','sympathy','tale','throat',
269     'trainer','uncle','youth','time','work','film','water','money','example','while','business','study',
270     'game','life','form','air','day','place','number','part','field','fish','back','process','heat',
271     'hand','experience','job','book','end','point','type','home','economy','value','body','market',
272     'guide','interest','state','radio','course','company','price','size','card','list','mind','trade',
273     'line','care','group','risk','word','fat','force','key','light','training','name','school','top',
274     'amount','level','order','practice','research','sense','service','piece','web','boss','sport','fun',
275     'house','page','term','test','answer','sound','focus','matter','kind','soil','board','oil','picture',
276     'access','garden','range','rate','reason','future','site','demand','exercise','image','case','cause',
277     'coast','action','age','bad','boat','record','result','section','building','mouse','cash','class',
278     'nothing','period','plan','store','tax','side','subject','space','rule','stock','weather','chance',
279     'figure','man','model','source','beginning','earth','program','chicken','design','feature','head',
280     'material','purpose','question','rock','salt','act','birth','car','dog','object','scale','sun',
281     'note','profit','rent','speed','style','war','bank','craft','half','inside','outside','standard',
282     'bus','exchange','eye','fire','position','pressure','stress','advantage','benefit','box','frame',
283     'issue','step','cycle','face','item','metal','paint','review','room','screen','structure','view',
284     'account','ball','discipline','medium','share','balance','bit','black','bottom','choice','gift',
285     'impact','machine','shape','tool','wind','address','average','career','culture','morning','pot',
286     'sign','table','task','condition','contact','credit','egg','hope','ice','network','north','square',
287     'attempt','date','effect','link','post','star','voice','capital','challenge','friend','self','shot',
288     'brush','couple','debate','exit','front','function','lack','living','plant','plastic','spot',
289     'summer','taste','theme','track','wing','brain','button','click','desire','foot','gas','influence',
290     'notice','rain','wall','base','damage','distance','feeling','pair','savings','staff','sugar',
291     'target','text','animal','author','budget','discount','file','ground','lesson','minute','officer',
292     'phase','reference','register','sky','stage','stick','title','trouble','bowl','bridge','campaign',
293     'character','club','edge','evidence','fan','letter','lock','maximum','novel','option','pack','park',
294     'plenty','quarter','skin','sort','weight','baby','background','carry','dish','factor','fruit',
295     'glass','joint','master','muscle','red','strength','traffic','trip','vegetable','appeal','chart',
296     'gear','ideal','kitchen','land','log','mother','net','party','principle','relative','sale','season',
297     'signal','spirit','street','tree','wave','belt','bench','commission','copy','drop','minimum','path',
298     'progress','project','sea','south','status','stuff','ticket','tour','angle','blue','breakfast',
299     'confidence','daughter','degree','doctor','dot','dream','duty','essay','father','fee','finance',
300     'hour','juice','limit','luck','milk','mouth','peace','pipe','seat','stable','storm','substance',
301     'team','trick','afternoon','bat','beach','blank','catch','chain','consideration','cream','crew',
302     'detail','gold','interview','kid','mark','match','mission','pain','pleasure','score','screw','sex',
303     'shop','shower','suit','tone','window','agent','band','block','bone','calendar','cap','coat',
304     'contest','corner','court','cup','district','door','east','finger','garage','guarantee','hole',
305     'hook','implement','layer','lecture','lie','manner','meeting','nose','parking','partner','profile',
306     'respect','rice','routine','schedule','swimming','telephone','tip','winter','airline','bag','battle',
307     'bed','bill','bother','cake','code','curve','designer','dimension','dress','ease','emergency',
308     'evening','extension','farm','fight','gap','grade','holiday','horror','horse','host','husband',
309     'loan','mistake','mountain','nail','noise','occasion','package','patient','pause','phrase','proof',
310     'race','relief','sand','sentence','shoulder','smoke','stomach','string','tourist','towel','vacation',
311     'west','wheel','wine','arm','aside','associate','bet','blow','border','branch','breast','brother',
312     'buddy','bunch','chip','coach','cross','document','draft','dust','expert','floor','god','golf',
313     'habit','iron','judge','knife','landscape','league','mail','mess','native','opening','parent',
314     'pattern','pin','pool','pound','request','salary','shame','shelter','shoe','silver','tackle','tank',
315     'trust','assist','bake','bar','bell','bike','blame','boy','brick','chair','closet','clue','collar',
316     'comment','conference','devil','diet','fear','fuel','glove','jacket','lunch','monitor','mortgage',
317     'nurse','pace','panic','peak','plane','reward','row','sandwich','shock','spite','spray','surprise',
318     'till','transition','weekend','welcome','yard','alarm','bend','bicycle','bite','blind','bottle',
319     'cable','candle','clerk','cloud','concert','counter','flower','grandfather','harm','knee','lawyer',
320     'leather','load','mirror','neck','pension','plate','purple','ruin','ship','skirt','slice','snow',
321     'specialist','stroke','switch','trash','tune','zone','anger','award','bid','bitter','boot','bug',
322     'camp','candy','carpet','cat','champion','channel','clock','comfort','cow','crack','engineer',
323     'entrance','fault','grass','guy','hell','highlight','incident','island','joke','jury','leg','lip',
324     'mate','motor','nerve','passage','pen','pride','priest','prize','promise','resident','resort','ring',
325     'roof','rope','sail','scheme','script','sock','station','toe','tower','truck','witness','a','you',
326     'it','can','will','if','one','many','most','other','use','make','good','look','help','go','great',
327     'being','few','might','still','public','read','keep','start','give','human','local','general','she',
328     'specific','long','play','feel','high','tonight','put','common','set','change','simple','past','big',
329     'possible','particular','today','major','personal','current','national','cut','natural','physical',
330     'show','try','check','second','call','move','pay','let','increase','single','individual','turn',
331     'ask','buy','guard','hold','main','offer','potential','professional','international','travel','cook',
332     'alternative','following','special','working','whole','dance','excuse','cold','commercial','low',
333     'purchase','deal','primary','worth','fall','necessary','positive','produce','search','present',
334     'spend','talk','creative','tell','cost','drive','green','support','glad','remove','return','run',
335     'complex','due','effective','middle','regular','reserve','independent','leave','original','reach',
336     'rest','serve','watch','beautiful','charge','active','break','negative','safe','stay','visit',
337     'visual','affect','cover','report','rise','walk','white','beyond','junior','pick','unique',
338     'anything','classic','final','lift','mix','private','stop','teach','western','concern','familiar',
339     'fly','official','broad','comfortable','gain','maybe','rich','save','stand','young','fail','heavy',
340     'hello','lead','listen','valuable','worry','handle','leading','meet','release','sell','finish',
341     'normal','press','ride','secret','spread','spring','tough','wait','brown','deep','display','flow',
342     'hit','objective','shoot','touch','cancel','chemical','cry','dump','extreme','push','conflict','eat',
343     'fill','formal','jump','kick','opposite','pass','pitch','remote','total','treat','vast','abuse',
344     'beat','burn','deposit','print','raise','sleep','somewhere','advance','anywhere','consist','dark',
345     'double','draw','equal','fix','hire','internal','join','kill','sensitive','tap','win','attack',
346     'claim','constant','drag','drink','guess','minor','pull','raw','soft','solid','wear','weird',
347     'wonder','annual','count','dead','doubt','feed','forever','impress','nobody','repeat','round','sing',
348     'slide','strip','whereas','wish','combine','command','dig','divide','equivalent','hang','hunt',
349     'initial','march','mention','smell','spiritual','survey','tie','adult','brief','crazy','escape',
350     'gather','hate','prior','repair','rough','sad','scratch','sick','strike','employ','external','hurt',
351     'illegal','laugh','lay','mobile','nasty','ordinary','respond','royal','senior','split','strain',
352     'struggle','swim','train','upper','wash','yellow','convert','crash','dependent','fold','funny',
353     'grab','hide','miss','permit','quote','recover','resolve','roll','sink','slip','spare','suspect',
354     'sweet','swing','twist','upstairs','usual','abroad','brave','calm','concentrate','estimate','grand',
355     'male','mine','prompt','quiet','refuse','regret','reveal','rush','shake','shift','shine','steal',
356     'suck','surround','anybody','bear','brilliant','dare','dear','delay','drunk','female','hurry',
357     'inevitable','invite','kiss','neat','pop','punch','quit','reply','representative','resist','rip',
358     'rub','silly','smile','spell','stretch','stupid','tear','temporary','tomorrow','wake','wrap',
359     'yesterday']
360
361 def get_random_name(with_ext=True):
362     return "{}_{}_{}{}".format(
363         random.choice(adjectives),
364         random.choice(nouns),
365         random.randint(0, 50000),
366         with_ext and '.txt' or '')
367
368 def get_random_file(max_filesize):
369     file_start = random.randint(0, (max_filesize - 1025))
370     file_size = random.randint(0, (max_filesize - file_start))
371     file_name = get_random_name()
372     return "{}:{}:{}".format(file_start, file_size, file_name)
373
374 def get_stream(name, max_filesize, data_loc, args):
375     files = []
376     for _ in range(random.randint(args.min_files, args.max_files)):
377         files.append(get_random_file(max_filesize))
378     stream = "{} {} {}".format(name, data_loc, ' '.join(files))
379     return stream
380
381 def create_substreams(depth, base_stream_name, max_filesize, data_loc, args, current_size=0):
382     current_stream = get_stream(base_stream_name, max_filesize, data_loc, args)
383     current_size += len(current_stream)
384     streams = [current_stream]
385
386     if current_size >= max_manifest_size:
387         logger.debug("Maximum manifest size reached -- finishing early at {}".format(base_stream_name))
388     elif depth == 0:
389         logger.debug("Finished stream {}".format(base_stream_name))
390     else:
391         for _ in range(random.randint(args.min_subdirs, args.max_subdirs)):
392             stream_name = base_stream_name+'/'+get_random_name(False)
393             substreams = create_substreams(depth-1, stream_name, max_filesize,
394                 data_loc, args, current_size)
395             current_size += sum([len(x) for x in substreams])
396             if current_size >= max_manifest_size:
397                 break
398             streams.extend(substreams)
399     return streams
400
401 def parse_arguments(arguments):
402     args = arg_parser.parse_args(arguments)
403     if args.debug:
404         logger.setLevel(logging.DEBUG)
405     if args.max_files < args.min_files:
406         arg_parser.error("--min-files={} should be less or equal than max-files={}".format(args.min_files, args.max_files))
407     if args.min_depth < 0:
408         arg_parser.error("--min-depth should be at least 0")
409     if args.max_depth < 0 or args.max_depth < args.min_depth:
410         arg_parser.error("--max-depth should be at >= 0 and >= min-depth={}".format(args.min_depth))
411     if args.max_subdirs < args.min_subdirs:
412         arg_parser.error("--min-subdirs={} should be less or equal than max-subdirs={}".format(args.min_subdirs, args.max_subdirs))
413     return args
414
415 def main(arguments=None):
416     args = parse_arguments(arguments)
417     logger.info("Creating test collection with (min={}, max={}) files per directory and a tree depth of (min={}, max={}) and (min={}, max={}) subdirs in each depth level...".format(args.min_files, args.max_files, args.min_depth, args.max_depth, args.min_subdirs, args.max_subdirs))
418     api = arvados.api('v1', timeout=5*60)
419     max_filesize = 1024*1024
420     data_block = ''.join([random.choice(string.printable) for i in range(max_filesize)])
421     data_loc = arvados.KeepClient(api).put(data_block)
422     streams = create_substreams(random.randint(args.min_depth, args.max_depth),
423         '.', max_filesize, data_loc, args)
424     manifest = ''
425     for s in streams:
426         if len(manifest)+len(s) > max_manifest_size:
427             logger.info("Skipping stream {} to avoid making a manifest bigger than 128MiB".format(s.split(' ')[0]))
428             break
429         manifest += s + '\n'
430     try:
431         coll_name = get_random_name(False)
432         coll = api.collections().create(
433             body={"collection": {
434                 "name": coll_name,
435                 "manifest_text": manifest
436             },
437         }).execute()
438     except:
439         logger.info("ERROR creating collection with name '{}' and manifest:\n'{}...'\nSize: {}".format(coll_name, manifest[0:1024], len(manifest)))
440         raise
441     logger.info("Created collection {} - manifest size: {}".format(coll["uuid"], len(manifest)))
442     return 0
443
444 if __name__ == "__main__":
445     sys.exit(main())