8031f29fb7a4bb2c673eefeeafcfe0d8f67011b5
[arvados.git] / tools / test-collection-create / test-collection-create.py
1 #!/usr/bin/env python3
2 #
3 # Copyright (C) The Arvados Authors. All rights reserved.
4 #
5 # SPDX-License-Identifier: CC-BY-SA-3.0
6
7 import argparse
8 import logging
9 import random
10 import string
11 import sys
12
13 import arvados
14 import arvados.collection
15
16 logger = logging.getLogger('arvados.test_collection_create')
17 logger.setLevel(logging.INFO)
18
19 opts = argparse.ArgumentParser(add_help=False)
20 opts.add_argument('--min-files', type=int, default=30000, help="""
21 Minimum number of files on each directory. Default: 30000.
22 """)
23 opts.add_argument('--max-files', type=int, default=30000, help="""
24 Maximum number of files on each directory. Default: 30000.
25 """)
26 opts.add_argument('--min-depth', type=int, default=0, help="""
27 Minimum depth for the created tree structure. Default: 0.
28 """)
29 opts.add_argument('--max-depth', type=int, default=0, help="""
30 Maximum depth for the created tree structure. Default: 0.
31 """)
32 opts.add_argument('--debug', action='store_true', default=False, help="""
33 Sets logging level to DEBUG.
34 """)
35
36 arg_parser = argparse.ArgumentParser(
37     description='Create a collection with garbage data for testing purposes.',
38     parents=[opts])
39
40 adjectives = ['abandoned','able','absolute','adorable','adventurous','academic',
41     'acceptable','acclaimed','accomplished','accurate','aching','acidic','acrobatic',
42     'active','actual','adept','admirable','admired','adolescent','adorable','adored',
43     'advanced','afraid','affectionate','aged','aggravating','aggressive','agile',
44     'agitated','agonizing','agreeable','ajar','alarmed','alarming','alert','alienated',
45     'alive','all','altruistic','amazing','ambitious','ample','amused','amusing','anchored',
46     'ancient','angelic','angry','anguished','animated','annual','another','antique',
47     'anxious','any','apprehensive','appropriate','apt','arctic','arid','aromatic','artistic',
48     'ashamed','assured','astonishing','athletic','attached','attentive','attractive',
49     'austere','authentic','authorized','automatic','avaricious','average','aware','awesome',
50     'awful','awkward','babyish','bad','back','baggy','bare','barren','basic','beautiful',
51     'belated','beloved','beneficial','better','best','bewitched','big','big-hearted',
52     'biodegradable','bite-sized','bitter','black','black-and-white','bland','blank',
53     'blaring','bleak','blind','blissful','blond','blue','blushing','bogus','boiling',
54     'bold','bony','boring','bossy','both','bouncy','bountiful','bowed','brave','breakable',
55     'brief','bright','brilliant','brisk','broken','bronze','brown','bruised','bubbly',
56     'bulky','bumpy','buoyant','burdensome','burly','bustling','busy','buttery','buzzing',
57     'calculating','calm','candid','canine','capital','carefree','careful','careless',
58     'caring','cautious','cavernous','celebrated','charming','cheap','cheerful','cheery',
59     'chief','chilly','chubby','circular','classic','clean','clear','clear-cut','clever',
60     'close','closed','cloudy','clueless','clumsy','cluttered','coarse','cold','colorful',
61     'colorless','colossal','comfortable','common','compassionate','competent','complete',
62     'complex','complicated','composed','concerned','concrete','confused','conscious',
63     'considerate','constant','content','conventional','cooked','cool','cooperative',
64     'coordinated','corny','corrupt','costly','courageous','courteous','crafty','crazy',
65     'creamy','creative','creepy','criminal','crisp','critical','crooked','crowded',
66     'cruel','crushing','cuddly','cultivated','cultured','cumbersome','curly','curvy',
67     'cute','cylindrical','damaged','damp','dangerous','dapper','daring','darling','dark',
68     'dazzling','dead','deadly','deafening','dear','dearest','decent','decimal','decisive',
69     'deep','defenseless','defensive','defiant','deficient','definite','definitive','delayed',
70     'delectable','delicious','delightful','delirious','demanding','dense','dental',
71     'dependable','dependent','descriptive','deserted','detailed','determined','devoted',
72     'different','difficult','digital','diligent','dim','dimpled','dimwitted','direct',
73     'disastrous','discrete','disfigured','disgusting','disloyal','dismal','distant',
74     'downright','dreary','dirty','disguised','dishonest','dismal','distant','distinct',
75     'distorted','dizzy','dopey','doting','double','downright','drab','drafty','dramatic',
76     'dreary','droopy','dry','dual','dull','dutiful','each','eager','earnest','early',
77     'easy','easy-going','ecstatic','edible','educated','elaborate','elastic','elated',
78     'elderly','electric','elegant','elementary','elliptical','embarrassed','embellished',
79     'eminent','emotional','empty','enchanted','enchanting','energetic','enlightened',
80     'enormous','enraged','entire','envious','equal','equatorial','essential','esteemed',
81     'ethical','euphoric','even','evergreen','everlasting','every','evil','exalted',
82     'excellent','exemplary','exhausted','excitable','excited','exciting','exotic',
83     'expensive','experienced','expert','extraneous','extroverted','extra-large','extra-small',
84     'fabulous','failing','faint','fair','faithful','fake','false','familiar','famous',
85     'fancy','fantastic','far','faraway','far-flung','far-off','fast','fat','fatal',
86     'fatherly','favorable','favorite','fearful','fearless','feisty','feline','female',
87     'feminine','few','fickle','filthy','fine','finished','firm','first','firsthand',
88     'fitting','fixed','flaky','flamboyant','flashy','flat','flawed','flawless','flickering',
89     'flimsy','flippant','flowery','fluffy','fluid','flustered','focused','fond','foolhardy',
90     'foolish','forceful','forked','formal','forsaken','forthright','fortunate','fragrant',
91     'frail','frank','frayed','free','French','fresh','frequent','friendly','frightened',
92     'frightening','frigid','frilly','frizzy','frivolous','front','frosty','frozen',
93     'frugal','fruitful','full','fumbling','functional','funny','fussy','fuzzy','gargantuan',
94     'gaseous','general','generous','gentle','genuine','giant','giddy','gigantic','gifted',
95     'giving','glamorous','glaring','glass','gleaming','gleeful','glistening','glittering',
96     'gloomy','glorious','glossy','glum','golden','good','good-natured','gorgeous',
97     'graceful','gracious','grand','grandiose','granular','grateful','grave','gray',
98     'great','greedy','green','gregarious','grim','grimy','gripping','grizzled','gross',
99     'grotesque','grouchy','grounded','growing','growling','grown','grubby','gruesome',
100     'grumpy','guilty','gullible','gummy','hairy','half','handmade','handsome','handy',
101     'happy','happy-go-lucky','hard','hard-to-find','harmful','harmless','harmonious',
102     'harsh','hasty','hateful','haunting','healthy','heartfelt','hearty','heavenly',
103     'heavy','hefty','helpful','helpless','hidden','hideous','high','high-level','hilarious',
104     'hoarse','hollow','homely','honest','honorable','honored','hopeful','horrible',
105     'hospitable','hot','huge','humble','humiliating','humming','humongous','hungry',
106     'hurtful','husky','icky','icy','ideal','idealistic','identical','idle','idiotic',
107     'idolized','ignorant','ill','illegal','ill-fated','ill-informed','illiterate',
108     'illustrious','imaginary','imaginative','immaculate','immaterial','immediate',
109     'immense','impassioned','impeccable','impartial','imperfect','imperturbable','impish',
110     'impolite','important','impossible','impractical','impressionable','impressive',
111     'improbable','impure','inborn','incomparable','incompatible','incomplete','inconsequential',
112     'incredible','indelible','inexperienced','indolent','infamous','infantile','infatuated',
113     'inferior','infinite','informal','innocent','insecure','insidious','insignificant',
114     'insistent','instructive','insubstantial','intelligent','intent','intentional',
115     'interesting','internal','international','intrepid','ironclad','irresponsible',
116     'irritating','itchy','jaded','jagged','jam-packed','jaunty','jealous','jittery',
117     'joint','jolly','jovial','joyful','joyous','jubilant','judicious','juicy','jumbo',
118     'junior','jumpy','juvenile','kaleidoscopic','keen','key','kind','kindhearted','kindly',
119     'klutzy','knobby','knotty','knowledgeable','knowing','known','kooky','kosher','lame',
120     'lanky','large','last','lasting','late','lavish','lawful','lazy','leading','lean',
121     'leafy','left','legal','legitimate','light','lighthearted','likable','likely','limited',
122     'limp','limping','linear','lined','liquid','little','live','lively','livid','loathsome',
123     'lone','lonely','long','long-term','loose','lopsided','lost','loud','lovable','lovely',
124     'loving','low','loyal','lucky','lumbering','luminous','lumpy','lustrous','luxurious',
125     'mad','made-up','magnificent','majestic','major','male','mammoth','married','marvelous',
126     'masculine','massive','mature','meager','mealy','mean','measly','meaty','medical',
127     'mediocre','medium','meek','mellow','melodic','memorable','menacing','merry','messy',
128     'metallic','mild','milky','mindless','miniature','minor','minty','miserable','miserly',
129     'misguided','misty','mixed','modern','modest','moist','monstrous','monthly','monumental',
130     'moral','mortified','motherly','motionless','mountainous','muddy','muffled','multicolored',
131     'mundane','murky','mushy','musty','muted','mysterious','naive','narrow','nasty','natural',
132     'naughty','nautical','near','neat','necessary','needy','negative','neglected','negligible',
133     'neighboring','nervous','new','next','nice','nifty','nimble','nippy','nocturnal','noisy',
134     'nonstop','normal','notable','noted','noteworthy','novel','noxious','numb','nutritious',
135     'nutty','obedient','obese','oblong','oily','oblong','obvious','occasional','odd',
136     'oddball','offbeat','offensive','official','old','old-fashioned','only','open','optimal',
137     'optimistic','opulent','orange','orderly','organic','ornate','ornery','ordinary',
138     'original','other','our','outlying','outgoing','outlandish','outrageous','outstanding',
139     'oval','overcooked','overdue','overjoyed','overlooked','palatable','pale','paltry',
140     'parallel','parched','partial','passionate','past','pastel','peaceful','peppery',
141     'perfect','perfumed','periodic','perky','personal','pertinent','pesky','pessimistic',
142     'petty','phony','physical','piercing','pink','pitiful','plain','plaintive','plastic',
143     'playful','pleasant','pleased','pleasing','plump','plush','polished','polite','political',
144     'pointed','pointless','poised','poor','popular','portly','posh','positive','possible',
145     'potable','powerful','powerless','practical','precious','present','prestigious',
146     'pretty','precious','previous','pricey','prickly','primary','prime','pristine','private',
147     'prize','probable','productive','profitable','profuse','proper','proud','prudent',
148     'punctual','pungent','puny','pure','purple','pushy','putrid','puzzled','puzzling',
149     'quaint','qualified','quarrelsome','quarterly','queasy','querulous','questionable',
150     'quick','quick-witted','quiet','quintessential','quirky','quixotic','quizzical',
151     'radiant','ragged','rapid','rare','rash','raw','recent','reckless','rectangular',
152     'ready','real','realistic','reasonable','red','reflecting','regal','regular',
153     'reliable','relieved','remarkable','remorseful','remote','repentant','required',
154     'respectful','responsible','repulsive','revolving','rewarding','rich','rigid',
155     'right','ringed','ripe','roasted','robust','rosy','rotating','rotten','rough',
156     'round','rowdy','royal','rubbery','rundown','ruddy','rude','runny','rural','rusty',
157     'sad','safe','salty','same','sandy','sane','sarcastic','sardonic','satisfied',
158     'scaly','scarce','scared','scary','scented','scholarly','scientific','scornful',
159     'scratchy','scrawny','second','secondary','second-hand','secret','self-assured',
160     'self-reliant','selfish','sentimental','separate','serene','serious','serpentine',
161     'several','severe','shabby','shadowy','shady','shallow','shameful','shameless',
162     'sharp','shimmering','shiny','shocked','shocking','shoddy','short','short-term',
163     'showy','shrill','shy','sick','silent','silky','silly','silver','similar','simple',
164     'simplistic','sinful','single','sizzling','skeletal','skinny','sleepy','slight',
165     'slim','slimy','slippery','slow','slushy','small','smart','smoggy','smooth','smug',
166     'snappy','snarling','sneaky','sniveling','snoopy','sociable','soft','soggy','solid',
167     'somber','some','spherical','sophisticated','sore','sorrowful','soulful','soupy',
168     'sour','Spanish','sparkling','sparse','specific','spectacular','speedy','spicy',
169     'spiffy','spirited','spiteful','splendid','spotless','spotted','spry','square',
170     'squeaky','squiggly','stable','staid','stained','stale','standard','starchy','stark',
171     'starry','steep','sticky','stiff','stimulating','stingy','stormy','straight','strange',
172     'steel','strict','strident','striking','striped','strong','studious','stunning',
173     'stupendous','stupid','sturdy','stylish','subdued','submissive','substantial','subtle',
174     'suburban','sudden','sugary','sunny','super','superb','superficial','superior',
175     'supportive','sure-footed','surprised','suspicious','svelte','sweaty','sweet','sweltering',
176     'swift','sympathetic','tall','talkative','tame','tan','tangible','tart','tasty',
177     'tattered','taut','tedious','teeming','tempting','tender','tense','tepid','terrible',
178     'terrific','testy','thankful','that','these','thick','thin','third','thirsty','this',
179     'thorough','thorny','those','thoughtful','threadbare','thrifty','thunderous','tidy',
180     'tight','timely','tinted','tiny','tired','torn','total','tough','traumatic','treasured',
181     'tremendous','tragic','trained','tremendous','triangular','tricky','trifling','trim',
182     'trivial','troubled','true','trusting','trustworthy','trusty','truthful','tubby',
183     'turbulent','twin','ugly','ultimate','unacceptable','unaware','uncomfortable',
184     'uncommon','unconscious','understated','unequaled','uneven','unfinished','unfit',
185     'unfolded','unfortunate','unhappy','unhealthy','uniform','unimportant','unique',
186     'united','unkempt','unknown','unlawful','unlined','unlucky','unnatural','unpleasant',
187     'unrealistic','unripe','unruly','unselfish','unsightly','unsteady','unsung','untidy',
188     'untimely','untried','untrue','unused','unusual','unwelcome','unwieldy','unwilling',
189     'unwitting','unwritten','upbeat','upright','upset','urban','usable','used','useful',
190     'useless','utilized','utter','vacant','vague','vain','valid','valuable','vapid',
191     'variable','vast','velvety','venerated','vengeful','verifiable','vibrant','vicious',
192     'victorious','vigilant','vigorous','villainous','violet','violent','virtual',
193     'virtuous','visible','vital','vivacious','vivid','voluminous','wan','warlike','warm',
194     'warmhearted','warped','wary','wasteful','watchful','waterlogged','watery','wavy',
195     'wealthy','weak','weary','webbed','wee','weekly','weepy','weighty','weird','welcome',
196     'well-documented','well-groomed','well-informed','well-lit','well-made','well-off',
197     'well-to-do','well-worn','wet','which','whimsical','whirlwind','whispered','white',
198     'whole','whopping','wicked','wide','wide-eyed','wiggly','wild','willing','wilted',
199     'winding','windy','winged','wiry','wise','witty','wobbly','woeful','wonderful',
200     'wooden','woozy','wordy','worldly','worn','worried','worrisome','worse','worst',
201     'worthless','worthwhile','worthy','wrathful','wretched','writhing','wrong','wry',
202     'yawning','yearly','yellow','yellowish','young','youthful','yummy','zany','zealous',
203     'zesty','zigzag']
204 nouns = ['people','history','way','art','world','information','map','two','family',
205     'government','health','system','computer','meat','year','thanks','music','person',
206     'reading','method','data','food','understanding','theory','law','bird','literature',
207     'problem','software','control','knowledge','power','ability','economics','love',
208     'internet','television','science','library','nature','fact','product','idea',
209     'temperature','investment','area','society','activity','story','industry','media',
210     'thing','oven','community','definition','safety','quality','development','language',
211     'management','player','variety','video','week','security','country','exam','movie',
212     'organization','equipment','physics','analysis','policy','series','thought','basis',
213     'boyfriend','direction','strategy','technology','army','camera','freedom','paper',
214     'environment','child','instance','month','truth','marketing','university','writing',
215     'article','department','difference','goal','news','audience','fishing','growth',
216     'income','marriage','user','combination','failure','meaning','medicine','philosophy',
217     'teacher','communication','night','chemistry','disease','disk','energy','nation',
218     'road','role','soup','advertising','location','success','addition','apartment','education',
219     'math','moment','painting','politics','attention','decision','event','property',
220     'shopping','student','wood','competition','distribution','entertainment','office',
221     'population','president','unit','category','cigarette','context','introduction',
222     'opportunity','performance','driver','flight','length','magazine','newspaper',
223     'relationship','teaching','cell','dealer','finding','lake','member','message','phone',
224     'scene','appearance','association','concept','customer','death','discussion','housing',
225     'inflation','insurance','mood','woman','advice','blood','effort','expression','importance',
226     'opinion','payment','reality','responsibility','situation','skill','statement','wealth',
227     'application','city','county','depth','estate','foundation','grandmother','heart',
228     'perspective','photo','recipe','studio','topic','collection','depression','imagination',
229     'passion','percentage','resource','setting','ad','agency','college','connection',
230     'criticism','debt','description','memory','patience','secretary','solution','administration',
231     'aspect','attitude','director','personality','psychology','recommendation','response',
232     'selection','storage','version','alcohol','argument','complaint','contract','emphasis',
233     'highway','loss','membership','possession','preparation','steak','union','agreement',
234     'cancer','currency','employment','engineering','entry','interaction','mixture','preference',
235     'region','republic','tradition','virus','actor','classroom','delivery','device',
236     'difficulty','drama','election','engine','football','guidance','hotel','owner',
237     'priority','protection','suggestion','tension','variation','anxiety','atmosphere',
238     'awareness','bath','bread','candidate','climate','comparison','confusion','construction',
239     'elevator','emotion','employee','employer','guest','height','leadership','mall','manager',
240     'operation','recording','sample','transportation','charity','cousin','disaster','editor',
241     'efficiency','excitement','extent','feedback','guitar','homework','leader','mom','outcome',
242     'permission','presentation','promotion','reflection','refrigerator','resolution','revenue',
243     'session','singer','tennis','basket','bonus','cabinet','childhood','church','clothes','coffee',
244     'dinner','drawing','hair','hearing','initiative','judgment','lab','measurement','mode','mud',
245     'orange','poetry','police','possibility','procedure','queen','ratio','relation','restaurant',
246     'satisfaction','sector','signature','significance','song','tooth','town','vehicle','volume','wife',
247     'accident','airport','appointment','arrival','assumption','baseball','chapter','committee',
248     'conversation','database','enthusiasm','error','explanation','farmer','gate','girl','hall',
249     'historian','hospital','injury','instruction','maintenance','manufacturer','meal','perception','pie',
250     'poem','presence','proposal','reception','replacement','revolution','river','son','speech','tea',
251     'village','warning','winner','worker','writer','assistance','breath','buyer','chest','chocolate',
252     'conclusion','contribution','cookie','courage','dad','desk','drawer','establishment','examination',
253     'garbage','grocery','honey','impression','improvement','independence','insect','inspection',
254     'inspector','king','ladder','menu','penalty','piano','potato','profession','professor','quantity',
255     'reaction','requirement','salad','sister','supermarket','tongue','weakness','wedding','affair',
256     'ambition','analyst','apple','assignment','assistant','bathroom','bedroom','beer','birthday',
257     'celebration','championship','cheek','client','consequence','departure','diamond','dirt','ear',
258     'fortune','friendship','funeral','gene','girlfriend','hat','indication','intention','lady',
259     'midnight','negotiation','obligation','passenger','pizza','platform','poet','pollution',
260     'recognition','reputation','shirt','sir','speaker','stranger','surgery','sympathy','tale','throat',
261     'trainer','uncle','youth','time','work','film','water','money','example','while','business','study',
262     'game','life','form','air','day','place','number','part','field','fish','back','process','heat',
263     'hand','experience','job','book','end','point','type','home','economy','value','body','market',
264     'guide','interest','state','radio','course','company','price','size','card','list','mind','trade',
265     'line','care','group','risk','word','fat','force','key','light','training','name','school','top',
266     'amount','level','order','practice','research','sense','service','piece','web','boss','sport','fun',
267     'house','page','term','test','answer','sound','focus','matter','kind','soil','board','oil','picture',
268     'access','garden','range','rate','reason','future','site','demand','exercise','image','case','cause',
269     'coast','action','age','bad','boat','record','result','section','building','mouse','cash','class',
270     'nothing','period','plan','store','tax','side','subject','space','rule','stock','weather','chance',
271     'figure','man','model','source','beginning','earth','program','chicken','design','feature','head',
272     'material','purpose','question','rock','salt','act','birth','car','dog','object','scale','sun',
273     'note','profit','rent','speed','style','war','bank','craft','half','inside','outside','standard',
274     'bus','exchange','eye','fire','position','pressure','stress','advantage','benefit','box','frame',
275     'issue','step','cycle','face','item','metal','paint','review','room','screen','structure','view',
276     'account','ball','discipline','medium','share','balance','bit','black','bottom','choice','gift',
277     'impact','machine','shape','tool','wind','address','average','career','culture','morning','pot',
278     'sign','table','task','condition','contact','credit','egg','hope','ice','network','north','square',
279     'attempt','date','effect','link','post','star','voice','capital','challenge','friend','self','shot',
280     'brush','couple','debate','exit','front','function','lack','living','plant','plastic','spot',
281     'summer','taste','theme','track','wing','brain','button','click','desire','foot','gas','influence',
282     'notice','rain','wall','base','damage','distance','feeling','pair','savings','staff','sugar',
283     'target','text','animal','author','budget','discount','file','ground','lesson','minute','officer',
284     'phase','reference','register','sky','stage','stick','title','trouble','bowl','bridge','campaign',
285     'character','club','edge','evidence','fan','letter','lock','maximum','novel','option','pack','park',
286     'plenty','quarter','skin','sort','weight','baby','background','carry','dish','factor','fruit',
287     'glass','joint','master','muscle','red','strength','traffic','trip','vegetable','appeal','chart',
288     'gear','ideal','kitchen','land','log','mother','net','party','principle','relative','sale','season',
289     'signal','spirit','street','tree','wave','belt','bench','commission','copy','drop','minimum','path',
290     'progress','project','sea','south','status','stuff','ticket','tour','angle','blue','breakfast',
291     'confidence','daughter','degree','doctor','dot','dream','duty','essay','father','fee','finance',
292     'hour','juice','limit','luck','milk','mouth','peace','pipe','seat','stable','storm','substance',
293     'team','trick','afternoon','bat','beach','blank','catch','chain','consideration','cream','crew',
294     'detail','gold','interview','kid','mark','match','mission','pain','pleasure','score','screw','sex',
295     'shop','shower','suit','tone','window','agent','band','block','bone','calendar','cap','coat',
296     'contest','corner','court','cup','district','door','east','finger','garage','guarantee','hole',
297     'hook','implement','layer','lecture','lie','manner','meeting','nose','parking','partner','profile',
298     'respect','rice','routine','schedule','swimming','telephone','tip','winter','airline','bag','battle',
299     'bed','bill','bother','cake','code','curve','designer','dimension','dress','ease','emergency',
300     'evening','extension','farm','fight','gap','grade','holiday','horror','horse','host','husband',
301     'loan','mistake','mountain','nail','noise','occasion','package','patient','pause','phrase','proof',
302     'race','relief','sand','sentence','shoulder','smoke','stomach','string','tourist','towel','vacation',
303     'west','wheel','wine','arm','aside','associate','bet','blow','border','branch','breast','brother',
304     'buddy','bunch','chip','coach','cross','document','draft','dust','expert','floor','god','golf',
305     'habit','iron','judge','knife','landscape','league','mail','mess','native','opening','parent',
306     'pattern','pin','pool','pound','request','salary','shame','shelter','shoe','silver','tackle','tank',
307     'trust','assist','bake','bar','bell','bike','blame','boy','brick','chair','closet','clue','collar',
308     'comment','conference','devil','diet','fear','fuel','glove','jacket','lunch','monitor','mortgage',
309     'nurse','pace','panic','peak','plane','reward','row','sandwich','shock','spite','spray','surprise',
310     'till','transition','weekend','welcome','yard','alarm','bend','bicycle','bite','blind','bottle',
311     'cable','candle','clerk','cloud','concert','counter','flower','grandfather','harm','knee','lawyer',
312     'leather','load','mirror','neck','pension','plate','purple','ruin','ship','skirt','slice','snow',
313     'specialist','stroke','switch','trash','tune','zone','anger','award','bid','bitter','boot','bug',
314     'camp','candy','carpet','cat','champion','channel','clock','comfort','cow','crack','engineer',
315     'entrance','fault','grass','guy','hell','highlight','incident','island','joke','jury','leg','lip',
316     'mate','motor','nerve','passage','pen','pride','priest','prize','promise','resident','resort','ring',
317     'roof','rope','sail','scheme','script','sock','station','toe','tower','truck','witness','a','you',
318     'it','can','will','if','one','many','most','other','use','make','good','look','help','go','great',
319     'being','few','might','still','public','read','keep','start','give','human','local','general','she',
320     'specific','long','play','feel','high','tonight','put','common','set','change','simple','past','big',
321     'possible','particular','today','major','personal','current','national','cut','natural','physical',
322     'show','try','check','second','call','move','pay','let','increase','single','individual','turn',
323     'ask','buy','guard','hold','main','offer','potential','professional','international','travel','cook',
324     'alternative','following','special','working','whole','dance','excuse','cold','commercial','low',
325     'purchase','deal','primary','worth','fall','necessary','positive','produce','search','present',
326     'spend','talk','creative','tell','cost','drive','green','support','glad','remove','return','run',
327     'complex','due','effective','middle','regular','reserve','independent','leave','original','reach',
328     'rest','serve','watch','beautiful','charge','active','break','negative','safe','stay','visit',
329     'visual','affect','cover','report','rise','walk','white','beyond','junior','pick','unique',
330     'anything','classic','final','lift','mix','private','stop','teach','western','concern','familiar',
331     'fly','official','broad','comfortable','gain','maybe','rich','save','stand','young','fail','heavy',
332     'hello','lead','listen','valuable','worry','handle','leading','meet','release','sell','finish',
333     'normal','press','ride','secret','spread','spring','tough','wait','brown','deep','display','flow',
334     'hit','objective','shoot','touch','cancel','chemical','cry','dump','extreme','push','conflict','eat',
335     'fill','formal','jump','kick','opposite','pass','pitch','remote','total','treat','vast','abuse',
336     'beat','burn','deposit','print','raise','sleep','somewhere','advance','anywhere','consist','dark',
337     'double','draw','equal','fix','hire','internal','join','kill','sensitive','tap','win','attack',
338     'claim','constant','drag','drink','guess','minor','pull','raw','soft','solid','wear','weird',
339     'wonder','annual','count','dead','doubt','feed','forever','impress','nobody','repeat','round','sing',
340     'slide','strip','whereas','wish','combine','command','dig','divide','equivalent','hang','hunt',
341     'initial','march','mention','smell','spiritual','survey','tie','adult','brief','crazy','escape',
342     'gather','hate','prior','repair','rough','sad','scratch','sick','strike','employ','external','hurt',
343     'illegal','laugh','lay','mobile','nasty','ordinary','respond','royal','senior','split','strain',
344     'struggle','swim','train','upper','wash','yellow','convert','crash','dependent','fold','funny',
345     'grab','hide','miss','permit','quote','recover','resolve','roll','sink','slip','spare','suspect',
346     'sweet','swing','twist','upstairs','usual','abroad','brave','calm','concentrate','estimate','grand',
347     'male','mine','prompt','quiet','refuse','regret','reveal','rush','shake','shift','shine','steal',
348     'suck','surround','anybody','bear','brilliant','dare','dear','delay','drunk','female','hurry',
349     'inevitable','invite','kiss','neat','pop','punch','quit','reply','representative','resist','rip',
350     'rub','silly','smile','spell','stretch','stupid','tear','temporary','tomorrow','wake','wrap',
351     'yesterday']
352
353 def get_random_name(with_ext=True):
354     return "{}_{}_{}{}".format(
355         random.choice(adjectives),
356         random.choice(nouns),
357         random.randint(0, 50000),
358         with_ext and '.txt' or '')
359
360 def get_random_file(max_filesize):
361     file_start = random.randint(0, (max_filesize - 1025))
362     file_size = random.randint(0, (max_filesize - file_start))
363     file_name = get_random_name()
364     return "{}:{}:{}".format(file_start, file_size, file_name)
365
366 def get_stream(name, max_filesize, data_loc, args):
367     files = []
368     for _ in range(random.randint(args.min_files, args.max_files)):
369         files.append(get_random_file(max_filesize))
370     stream = "{} {} {}".format(name, data_loc, ' '.join(files))
371     return stream
372
373 def create_substreams(depth, base_stream_name, max_filesize, data_loc, args, current_size=0):
374     current_stream = get_stream(base_stream_name, max_filesize, data_loc, args)
375     current_size += len(current_stream)
376     streams = [current_stream]
377
378     if current_size >= (128 * 1024 * 1024):
379         logger.debug("Maximum manifest size reached -- finishing early at {}".format(base_stream_name))
380     elif depth == 0:
381         logger.debug("Finished stream {}".format(base_stream_name))
382     else:
383         for _ in range(random.randint(1, 10)):
384             stream_name = base_stream_name+'/'+get_random_name(False)
385             substreams = create_substreams(depth-1, stream_name, max_filesize,
386                 data_loc, args, current_size)
387             current_size += sum([len(x) for x in substreams])
388             if current_size >= (128 * 1024 * 1024):
389                 break
390             streams.extend(substreams)
391     return streams
392
393 def parse_arguments(arguments):
394     args = arg_parser.parse_args(arguments)
395     if args.debug:
396         logger.setLevel(logging.DEBUG)
397     if args.max_files < args.min_files:
398         arg_parser.error("--min-files={} should be less or equal than max-files={}".format(args.min_files, args.max_files))
399     if args.min_depth < 0:
400         arg_parser.error("--min-depth should be at least 0")
401     if args.max_depth < 0 or args.max_depth < args.min_depth:
402         arg_parser.error("--max-depth should be at >= 0 and >= min-depth={}".format(args.min_depth))
403     return args
404
405 def main(arguments=None):
406     args = parse_arguments(arguments)
407     logger.info("Creating test collection with (min={}, max={}) files per directory and a tree depth of (min={}, max={})...".format(args.min_files, args.max_files, args.min_depth, args.max_depth))
408     api = arvados.api('v1', timeout=5*60)
409     max_filesize = 1024*1024
410     data_block = ''.join([random.choice(string.printable) for i in range(max_filesize)])
411     data_loc = arvados.KeepClient(api).put(data_block)
412     streams = create_substreams(random.randint(args.min_depth, args.max_depth),
413         '.', max_filesize, data_loc, args)
414     manifest = ''
415     for s in streams:
416         if len(manifest)+len(s) > (1024*1024*128)-2:
417             logger.info("Skipping stream {} to avoid making a manifest bigger than 128MiB".format(s.split(' ')[0]))
418             break
419         manifest += s + '\n'
420     try:
421         coll = api.collections().create(body={
422             "ensure_unique_name": True,
423             "collection": {
424                 "name": get_random_name(False),
425                 "manifest_text": manifest
426             },
427         }).execute()
428     except:
429         logger.info("ERROR trying manifest:\n'{}...'\nSize: {}".format(manifest[0:1024], len(manifest)))
430         raise
431     logger.info("Created collection {} - manifest size: {}".format(coll["uuid"], len(manifest)))
432     return 0
433
434 if __name__ == "__main__":
435     sys.exit(main())