Title: Download all data from a collection Date: 25 Oct 2021
Description:

  • Download all data from a collection

Install and import dependencies¶

In [ ]:
# Install specific packages required for this notebook
!pip install flywheel-sdk tqdm pandas fw-meta backoff
In [ ]:
# Import packages
import logging
import os
import re
from getpass import getpass
from functools import lru_cache
from pathlib import Path

import pandas as pd
import backoff
import pandas as pd
import flywheel
from tqdm.notebook import tqdm

from permission import check_user_permission
In [ ]:
# Instantiate a logger
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
log = logging.getLogger('root')

Flywheel API Key and Client¶

Get a API_KEY. More on this in the Flywheel SDK doc here.

In [ ]:
API_KEY = getpass('Enter API_KEY here: ')

Instantiate the Flywheel API client

In [ ]:
fw = flywheel.Client(API_KEY if 'API_KEY' in locals() else os.environ.get('FW_KEY'))

Show Flywheel logging information

In [ ]:
log.info('You are now logged in as %s to %s', fw.get_current_user()['email'], fw.get_config()['site']['api_url'])

Constants¶

In [ ]:
# Collection ID
COLLECTION_ID = '<collection-id>'
# Local root path where to download data
ROOT_DATA = Path('/tmp')
# File type of filter on
FILE_TYPE = 'nifti'

Helper functions¶

In [ ]:
# wrapper around `get_project` caching result. Help to reduce repeated calls.
@lru_cache()
def get_project(fw, project_id):
    return fw.get_project(project_id)
In [ ]:
def is_not_500_502_504(exc):
    if hasattr(exc, "status"):
        if exc.status in [504, 502, 500]:
            # 500: Internal Server Error
            # 502: Bad Gateway
            # 504: Gateway Timeout
            return False
    return True


@backoff.on_exception(
    backoff.expo, flywheel.rest.ApiException, max_time=60, giveup=is_not_500_502_504
)
# will retry for 60s, waiting an exponentially increasing delay between retries
# e.g. 1s, 2s, 4s, 8s, etc, giving up if exception is in 500, 502, 504.
def robust_download(file, dst_path):
    file.download(dst_path)

Main script¶

Get the collection¶

In [ ]:
collection = fw.get_collection(COLLECTION_ID)
if not collection:
    log.error(f'Collection {f} not found.')

Download all files in the collection matching FILE_TYPE¶

In [ ]:
for session in tqdm(collection.sessions.iter()):
    project = get_project(fw, session.project)
    for acq in session.acquisitions.iter():
        for file in acq.files:
            if file.type == FILE_TYPE:
                # assuming labels are POSIX compliant
                dst_path = ROOT_DATA / project.label / session.subject.label / session.label / acq.label / file.name
                dst_path.parent.mkdir(parents=True, exist_ok=True)
                robust_download(file, str(dst_path))
In [ ]: