@api_client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
else
# Use system CA certificates
- @api_client.ssl_config.add_trust_ca('/etc/ssl/certs')
+ ["/etc/ssl/certs/ca-certificates.crt",
+ "/etc/pki/tls/certs/ca-bundle.crt"]
+ .select { |ca_path| File.readable?(ca_path) }
+ .each { |ca_path| @api_client.ssl_config.add_trust_ca(ca_path) }
end
if Rails.configuration.api_response_compression
@api_client.transparent_gzip_decompression = true
# FIXME: Remove this line after #6885 is done.
fpm_args+=(--iteration 2)
+
+# FIXME: Remove once support for llfuse 0.42+ is in place
+fpm_args+=(-v 0.41.1)
h2. Copy configuration files from the dispatcher (API server)
-The @/etc/slurm-llnl/slurm.conf@ and @/etc/munge/munge.key@ files need to be identicaly across the dispatcher and all compute nodes. Copy the files you created in the "Install the Crunch dispatcher":install-crunch-dispatch.html step to this compute node.
+The @slurm.conf@ and @/etc/munge/munge.key@ files need to be identical across the dispatcher and all compute nodes. Copy the files you created in the "Install the Crunch dispatcher":install-crunch-dispatch.html step to this compute node.
h2. Configure FUSE
</code></pre>
</notextile>
-Now we need to give SLURM a configuration file in @/etc/slurm-llnl/slurm.conf@. Here's an example:
+Now we need to give SLURM a configuration file. On Debian-based systems, this is installed at @/etc/slurm-llnl/slurm.conf@. On Red Hat-based systems, this is installed at @/etc/slurm/slurm.conf@. Here's an example @slurm.conf@:
<notextile>
<pre>
Each hostname in @slurm.conf@ must also resolve correctly on all SLURM worker nodes as well as the controller itself. Furthermore, the hostnames used in the configuration file must match the hostnames reported by @hostname@ or @hostname -s@ on the nodes themselves. This applies to the ControlMachine as well as the worker nodes.
For example:
-* In @/etc/slurm-llnl/slurm.conf@ on control and worker nodes: @ControlMachine=uuid_prefix.your.domain@
-* In @/etc/slurm-llnl/slurm.conf@ on control and worker nodes: @NodeName=compute[0-255]@
+* In @slurm.conf@ on control and worker nodes: @ControlMachine=uuid_prefix.your.domain@
+* In @slurm.conf@ on control and worker nodes: @NodeName=compute[0-255]@
* In @/etc/resolv.conf@ on control and worker nodes: @search uuid_prefix.your.domain@
* On the control node: @hostname@ reports @uuid_prefix.your.domain@
* On worker node 123: @hostname@ reports @compute123.uuid_prefix.your.domain@
server {
listen <span class="userinput">[your public IP address]</span>:443 ssl;
- server_name keep.<span class="userinput">uuid_prefix</span>.your.domain
+ server_name keep.<span class="userinput">uuid_prefix</span>.your.domain;
proxy_connect_timeout 90s;
proxy_read_timeout 300s;
h2. Install the Ruby SDK and utilities
-If you're using RVM:
+First, install the curl development libraries necessary to build the Arvados Ruby SDK. On Debian-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install libcurl4-openssl-dev</span>
+</code></pre>
+</notextile>
+
+On Red Hat-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install libcurl-devel</span>
+</code></pre>
+</notextile>
+
+Next, install the arvados-cli Ruby gem. If you're using RVM:
<notextile>
<pre><code>~$ <span class="userinput">sudo /usr/local/rvm/bin/rvm-exec default gem install arvados-cli</span>
<li>If you're deploying on an older Red Hat-based distribution and installed Pythyon 2.7 from Software Collections, configure Nginx to use it:
<pre><code>~$ <span class="userinput">sudo usermod --shell /bin/bash nginx</span>
-~$ <span class="userinput">sudo -u nginx sh -c 'echo "[[ -z \$PS1 && -e /opt/rh/python27/enable ]] && source /opt/rh/python27/enable" >>~/.bash_profile'</span>
+~$ <span class="userinput">sudo -u nginx sh -c 'echo "[[ -z \$PS1 ]] && source scl_source enable python27" >>~/.bash_profile'</span>
</code></pre>
</li>
self.add_argument('--crunchstat-interval', type=float, help="Write stats to stderr every N seconds (default disabled)", default=0)
+ self.add_argument('--unmount-timeout',
+ type=float, default=2.0,
+ help="Time to wait for graceful shutdown after --exec program exits and filesystem is unmounted")
+
self.add_argument('--exec', type=str, nargs=argparse.REMAINDER,
dest="exec_args", metavar=('command', 'args', '...', '--'),
help="""Mount, run a command, then unmount and exit""")
llfuse.init(self.operations, self.args.mountpoint, self._fuse_options())
if self.args.mode != 'by_pdh':
self.operations.listen_for_events()
- t = threading.Thread(None, lambda: llfuse.main())
- t.start()
+ self.llfuse_thread = threading.Thread(None, lambda: self._llfuse_main())
+ self.llfuse_thread.daemon = True
+ self.llfuse_thread.start()
self.operations.initlock.wait()
def __exit__(self, exc_type, exc_value, traceback):
subprocess.call(["fusermount", "-u", "-z", self.args.mountpoint])
- self.operations.destroy()
+ self.llfuse_thread.join(timeout=self.args.unmount_timeout)
+ if self.llfuse_thread.is_alive():
+ self.logger.warning("Mount.__exit__:"
+ " llfuse thread still alive %fs after umount"
+ " -- abandoning and exiting anyway",
+ self.args.unmount_timeout)
def run(self):
if self.args.exec_args:
'''.format(api_host, user_email)
def _run_exec(self):
- # Initialize the fuse connection
- llfuse.init(self.operations, self.args.mountpoint, self._fuse_options())
-
- # Subscribe to change events from API server
- if self.args.mode != 'by_pdh':
- self.operations.listen_for_events()
-
- t = threading.Thread(None, lambda: llfuse.main())
- t.start()
-
- # wait until the driver is finished initializing
- self.operations.initlock.wait()
-
rc = 255
- try:
- sp = subprocess.Popen(self.args.exec_args, shell=False)
-
- # forward signals to the process.
- signal.signal(signal.SIGINT, lambda signum, frame: sp.send_signal(signum))
- signal.signal(signal.SIGTERM, lambda signum, frame: sp.send_signal(signum))
- signal.signal(signal.SIGQUIT, lambda signum, frame: sp.send_signal(signum))
-
- # wait for process to complete.
- rc = sp.wait()
-
- # restore default signal handlers.
- signal.signal(signal.SIGINT, signal.SIG_DFL)
- signal.signal(signal.SIGTERM, signal.SIG_DFL)
- signal.signal(signal.SIGQUIT, signal.SIG_DFL)
- except Exception as e:
- self.logger.exception(
- 'arv-mount: exception during exec %s', self.args.exec_args)
+ with self:
try:
- rc = e.errno
- except AttributeError:
- pass
- finally:
- subprocess.call(["fusermount", "-u", "-z", self.args.mountpoint])
- self.operations.destroy()
+ sp = subprocess.Popen(self.args.exec_args, shell=False)
+
+ # forward signals to the process.
+ signal.signal(signal.SIGINT, lambda signum, frame: sp.send_signal(signum))
+ signal.signal(signal.SIGTERM, lambda signum, frame: sp.send_signal(signum))
+ signal.signal(signal.SIGQUIT, lambda signum, frame: sp.send_signal(signum))
+
+ # wait for process to complete.
+ rc = sp.wait()
+
+ # restore default signal handlers.
+ signal.signal(signal.SIGINT, signal.SIG_DFL)
+ signal.signal(signal.SIGTERM, signal.SIG_DFL)
+ signal.signal(signal.SIGQUIT, signal.SIG_DFL)
+ except Exception as e:
+ self.logger.exception(
+ 'arv-mount: exception during exec %s', self.args.exec_args)
+ try:
+ rc = e.errno
+ except AttributeError:
+ pass
exit(rc)
def _run_standalone(self):
try:
llfuse.init(self.operations, self.args.mountpoint, self._fuse_options())
- if not (self.args.exec_args or self.args.foreground):
- self.daemon_ctx = daemon.DaemonContext(working_directory=os.path.dirname(self.args.mountpoint),
- files_preserve=range(3, resource.getrlimit(resource.RLIMIT_NOFILE)[1]))
+ if not self.args.foreground:
+ self.daemon_ctx = daemon.DaemonContext(
+ working_directory=os.path.dirname(self.args.mountpoint),
+ files_preserve=range(
+ 3, resource.getrlimit(resource.RLIMIT_NOFILE)[1]))
self.daemon_ctx.open()
# Subscribe to change events from API server
self.operations.listen_for_events()
- llfuse.main()
+ self._llfuse_main()
except Exception as e:
self.logger.exception('arv-mount: exception during mount: %s', e)
exit(getattr(e, 'errno', 1))
- finally:
- self.operations.destroy()
exit(0)
+
+ def _llfuse_main(self):
+ try:
+ llfuse.main()
+ except:
+ llfuse.close(unmount=False)
+ raise
+ llfuse.close()
--- /dev/null
+fpm_depends+=(fuse)
def wrapper(self, *args, **kwargs):
with arvados_fuse.command.Mount(
arvados_fuse.command.ArgumentParser().parse_args(
- argv + ['--foreground', self.mnt])):
+ argv + ['--foreground',
+ '--unmount-timeout=0.1',
+ self.mnt])):
return func(self, *args, **kwargs)
return wrapper
return decorator
run_test_server.authorize_with("admin")
self.api = api if api else arvados.safeapi.ThreadSafeApiCache(arvados.config.settings())
+ # This is a copy of Mount's method. TODO: Refactor MountTestBase
+ # to use a Mount instead of copying its code.
+ def _llfuse_main(self):
+ try:
+ llfuse.main()
+ except:
+ llfuse.close(unmount=False)
+ raise
+ llfuse.close()
+
def make_mount(self, root_class, **root_kwargs):
self.operations = fuse.Operations(
os.getuid(), os.getgid(),
self.operations.inodes.add_entry(root_class(
llfuse.ROOT_INODE, self.operations.inodes, self.api, 0, **root_kwargs))
llfuse.init(self.operations, self.mounttmp, [])
- threading.Thread(None, llfuse.main).start()
+ self.llfuse_thread = threading.Thread(None, lambda: self._llfuse_main())
+ self.llfuse_thread.daemon = True
+ self.llfuse_thread.start()
# wait until the driver is finished initializing
self.operations.initlock.wait()
return self.operations.inodes[llfuse.ROOT_INODE]
self.pool.join()
del self.pool
- # llfuse.close is buggy, so use fusermount instead.
- #llfuse.close(unmount=True)
-
- count = 0
- success = 1
- while (count < 9 and success != 0):
- success = subprocess.call(["fusermount", "-u", self.mounttmp])
- time.sleep(0.1)
- count += 1
-
- self.operations.destroy()
+ subprocess.call(["fusermount", "-u", "-z", self.mounttmp])
+ self.llfuse_thread.join(timeout=1)
+ if self.llfuse_thread.is_alive():
+ logger.warning("MountTestBase.tearDown():"
+ " llfuse thread still alive 1s after umount"
+ " -- abandoning and exiting anyway")
os.rmdir(self.mounttmp)
if self.keeptmp:
--- /dev/null
+import arvados_fuse.command
+import json
+import multiprocessing
+import os
+import run_test_server
+import tempfile
+import unittest
+
+try:
+ from shlex import quote
+except:
+ from pipes import quote
+
+def try_exec(mnt, cmd):
+ try:
+ arvados_fuse.command.Mount(
+ arvados_fuse.command.ArgumentParser().parse_args([
+ '--read-write',
+ '--mount-tmp=zzz',
+ '--unmount-timeout=0.1',
+ mnt,
+ '--exec'] + cmd)).run()
+ except SystemExit:
+ pass
+ else:
+ raise AssertionError('should have exited')
+
+
+class ExecMode(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ run_test_server.run()
+ run_test_server.run_keep(enforce_permissions=True, num_servers=2)
+ run_test_server.authorize_with('active')
+
+ @classmethod
+ def tearDownClass(cls):
+ run_test_server.stop_keep(num_servers=2)
+
+ def setUp(self):
+ self.mnt = tempfile.mkdtemp()
+ _, self.okfile = tempfile.mkstemp()
+ self.pool = multiprocessing.Pool(1)
+
+ def tearDown(self):
+ self.pool.terminate()
+ self.pool.join()
+ os.rmdir(self.mnt)
+ os.unlink(self.okfile)
+
+ def test_exec(self):
+ self.pool.apply(try_exec, (self.mnt, [
+ 'sh', '-c',
+ 'echo -n foo >{}; cp {} {}'.format(
+ quote(os.path.join(self.mnt, 'zzz', 'foo.txt')),
+ quote(os.path.join(self.mnt, 'zzz', '.arvados#collection')),
+ quote(os.path.join(self.okfile)))]))
+ self.assertRegexpMatches(
+ json.load(open(self.okfile))['manifest_text'],
+ r' 0:3:foo.txt\n')
logins.each do |l|
next if seen[l[:username]]
seen[l[:username]] = true if not seen.has_key?(l[:username])
- @homedir = "/home/#{l[:username]}"
unless uids[l[:username]]
STDERR.puts "Creating account #{l[:username]}"
out: devnull)
end
# Create .ssh directory if necessary
+ @homedir = Etc.getpwnam(l[:username]).dir
userdotssh = File.join(@homedir, ".ssh")
Dir.mkdir(userdotssh) if !File.exists?(userdotssh)
@key = "#######################################################################################
puts bang.backtrace.join("\n")
exit 1
end
-
include agpl-3.0.txt
+include crunchstat_summary/chartjs.js