Title: Download all data from a collection
Date: 25 Oct 2021
Description:
# Install specific packages required for this notebook
!pip install flywheel-sdk tqdm pandas fw-meta backoff
# Import packages
import logging
import os
import re
from getpass import getpass
from functools import lru_cache
from pathlib import Path
import pandas as pd
import backoff
import pandas as pd
import flywheel
from tqdm.notebook import tqdm
from permission import check_user_permission
# Instantiate a logger
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
log = logging.getLogger('root')
Get a API_KEY. More on this in the Flywheel SDK doc here.
API_KEY = getpass('Enter API_KEY here: ')
Instantiate the Flywheel API client
fw = flywheel.Client(API_KEY if 'API_KEY' in locals() else os.environ.get('FW_KEY'))
Show Flywheel logging information
log.info('You are now logged in as %s to %s', fw.get_current_user()['email'], fw.get_config()['site']['api_url'])
# Collection ID
COLLECTION_ID = '<collection-id>'
# Local root path where to download data
ROOT_DATA = Path('/tmp')
# File type of filter on
FILE_TYPE = 'nifti'
# wrapper around `get_project` caching result. Help to reduce repeated calls.
@lru_cache()
def get_project(fw, project_id):
return fw.get_project(project_id)
def is_not_500_502_504(exc):
if hasattr(exc, "status"):
if exc.status in [504, 502, 500]:
# 500: Internal Server Error
# 502: Bad Gateway
# 504: Gateway Timeout
return False
return True
@backoff.on_exception(
backoff.expo, flywheel.rest.ApiException, max_time=60, giveup=is_not_500_502_504
)
# will retry for 60s, waiting an exponentially increasing delay between retries
# e.g. 1s, 2s, 4s, 8s, etc, giving up if exception is in 500, 502, 504.
def robust_download(file, dst_path):
file.download(dst_path)
collection = fw.get_collection(COLLECTION_ID)
if not collection:
log.error(f'Collection {f} not found.')
for session in tqdm(collection.sessions.iter()):
project = get_project(fw, session.project)
for acq in session.acquisitions.iter():
for file in acq.files:
if file.type == FILE_TYPE:
# assuming labels are POSIX compliant
dst_path = ROOT_DATA / project.label / session.subject.label / session.label / acq.label / file.name
dst_path.parent.mkdir(parents=True, exist_ok=True)
robust_download(file, str(dst_path))