Drop In Support

wget https://github.com/juicedata/juicefs/releases/download/v1.2.3/juicefs-1.2.3-linux-amd64.tar.gz -O - | tar xzvf -

juicefs format --storage s3 --bucket $S3_ENDPOINT_URL/<bucket_name> --access-key $AWS_ACCESS_KEY_ID --secret-key $AWS_SECRET_KEY_ID sqlite3://<db_name>.db <db_name>

juicefs mount sqlite3://<db_name>.db <mount_point> -d

juicefs umount <mount_point>

#!/bin/bash

#SBATCH --job-name=juicefs
#SBATCH --partition=ckpt
#SBATCH --nodes=1
#SBATCH --cpus-per-task=1
#SBATCH --mem=16G
#SBATCH --array=0-7

# Change directory to per node storage so different mounts don't conflict
cd /scr

# Remove this if you already have formatted buckets. If so you will likely need an array of bucket names to map task id to
juicefs format --storage s3 --bucket $S3_ENDPOINT_URL/<netid>-$SLURM_ARRAY_TASK_ID --access-key $AWS_ACCESS_KEY_ID --secret-key $AWS_SECRET_KEY_ID sqlite3://<db_name>.db <db_name>

# Mount new juicefs database in background at /scr/<netid>-$SLURM_ARRAY_TASK_ID. 
# If multiple tasks share the mount just wrap this in a if ($SLURM_ARRAY_TASK_ID % <num-tasks> == 0) where <num-tasks> is the number of tasks per node (not correct syntax)
# Could potentially also use srun to separate your script into job steps to ensure everything completes before unmounting your juicefs bucket
mkdir $USER-$SLURM_ARRAY_TASK_ID
juicefs mount sqlite3://<db_name>.db /scr/<netid>-$SLURM_ARRAY_TASK_ID -d

# Cd into mount and process data
cd /scr/$USER-$SLURM_ARRAY_TASK_ID
cp /sw/hyak101/example_data/r0-f0

# Cd out of and unmount juicefs database. If multiple tasks are sharing the mount on the node need to ensure only the last one unmounts somehow (shared incremented variable?)
cd ..
juicefs umount /scr/$USER-$SLURM_ARRAY_TASK_ID
rmdir /scr/$USER-$SLURM_ARRAY_TASK_ID

Drop In Support

Installation

Usage

Example Script

Installation​

Usage​

Example Script​

Installation

Usage

Example Script