Title: Flywheel - RC integration notebook
Date: 16-04-2020
Description:
# Install specific packages required for this notebook
!pip install flywheel-sdk
# Import packages
from getpass import getpass
import logging
import os
import flywheel
import sys
import pprint
import pandas as pd
from permission import check_user_permission
! git clone git://github.com/sburns/PyCap.git PyCap
sys.path.append('/content/PyCap')
!pip3 install PyCap/
from redcap import Project
# Instantiate a logger
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
log = logging.getLogger('root')
Get a API_KEY. More on this at in the Flywheel SDK doc here.
API_KEY = getpass('Enter API_KEY here: ')
Instantiate the Flywheel API client
fw = flywheel.Client(API_KEY if 'API_KEY' in locals() else os.environ.get('FW_KEY'))
Show Flywheel logging information
log.info('You are now logged in as %s to %s', fw.get_current_user()['email'], fw.get_config()['site']['api_url'])
# RedCap Login (replace this with your RedCap API URL)
URL = 'https://redcap.test.edu/redcap_v0.0.01/API/'
# Enter your REDcap API key
RC_API_KEY = input('Please enter your RedCap API key: ')
project = Project(URL, RC_API_KEY)
#Constant for RedCap form
#Metadata
subj_metadata = ['enrollment_log'] # or intake form (form that identify the participant)
subj_forms = [
{
'fw_session_label' : 'session01',
'redcap_form_label': 'd1_baseline_questionnaires'
},
{
'fw_session_label' : 'session02',
'redcap_form_label': 'd8_post_questionnaires'
}
] # add forms that you have on RedCap that will correspond with data you uploaded on FW
# FW Project ID that you want to use
PROJECT_LABEL = input('Enter your project label here: ')
fw_project = fw.projects.find_first(f'label={PROJECT_LABEL}')
Before starting off, we want to check your permission on the Flywheel Instance in order to proceed in this notebook.
min_reqs = {
"site": "user",
"group": "ro",
"project": ['containers_modify_metadata']
}
GROUP_ID = input('Please enter the Group ID that you will be working with: ')
check_user_permission
will return True if both the group and project meet the minimum requirement, else a compatible list will be printed.
check_user_permission(fw, min_reqs, group=GROUP_ID, project=PROJECT_LABEL)
project_info = project.export_project_info(format='json')
pprint.pprint(project_info)
{'creation_time': '2020-03-27 10:09:59', 'custom_record_label': '', 'ddp_enabled': 0, 'display_today_now_button': 1, 'external_modules': '', 'has_repeating_instruments_or_events': 0, 'in_production': 0, 'is_longitudinal': 1, 'missing_data_codes': '', 'production_time': '', 'project_grant_number': '', 'project_id': 10379, 'project_irb_number': '', 'project_language': 'EnglishUMN', 'project_notes': 'This is the project for testing.', 'project_pi_firstname': '', 'project_pi_lastname': '', 'project_title': 'Test_3b', 'purpose': 0, 'purpose_other': '', 'randomization_enabled': 0, 'record_autonumbering_enabled': 1, 'scheduling_enabled': 1, 'secondary_unique_field': '', 'surveys_enabled': 1}
arm_event = {}
event_list = []
for event in project.events:
arm_num = event['arm_num']
event_name = event['event_name']
unique_event_name = event['unique_event_name']
if arm_num not in arm_event:
event_list.append(unique_event_name)
arm_event[arm_num] = event_list
print(f'Event Name: {event_name}, Unique Event Name: {unique_event_name}, Arm Number: {arm_num}')
print('\nBelow is the arm number with respective unique event name:')
pprint.pprint(arm_event)
Event Name: Timepoint 0, Unique Event Name: timepoint_0_arm_1, Arm Number: 1 Event Name: Timepoint 1, Unique Event Name: timepoint_1_arm_1, Arm Number: 1 Event Name: Timepoint 0, Unique Event Name: timepoint_0_arm_2, Arm Number: 2 Event Name: Timepoint 1, Unique Event Name: timepoint_1_arm_2, Arm Number: 2 Below is the arm number with respective unique event name: {1: ['timepoint_0_arm_1', 'timepoint_0_arm_2'], 2: ['timepoint_0_arm_1', 'timepoint_0_arm_2']}
print('Arm Names: ' + str(project.arm_names))
print('No of Arms: ' + str(project.arm_nums))
Arm Names: ('ARM1', 'ARM2') No of Arms: (1, 2)
form_list = []
for form in project.forms:
form_list.append(form)
print(f'Forms that is in the project: \n{form_list}')
for labels in project.names_labels(do_print= True):
print(labels)
record_list = project.export_records(fields=[project.def_field])
pprint.pprint(record_list)
[{'participant_id': '1', 'redcap_event_name': 'timepoint_0_arm_1'}, {'participant_id': '1', 'redcap_event_name': 'timepoint_1_arm_1'}, {'participant_id': '2', 'redcap_event_name': 'timepoint_0_arm_2'}, {'participant_id': '2', 'redcap_event_name': 'timepoint_1_arm_2'}, {'participant_id': '3', 'redcap_event_name': 'timepoint_0_arm_1'}, {'participant_id': '3', 'redcap_event_name': 'timepoint_1_arm_1'}, {'participant_id': '4', 'redcap_event_name': 'timepoint_0_arm_1'}, {'participant_id': '4', 'redcap_event_name': 'timepoint_1_arm_1'}]
all_records = project.export_records()
metaData = project.metadata
print('Below is all the field name from the RedCap Project:\n')
for each in metaData:
print('Form: %s, Field Name: %s'% (each['form_name'], each['field_name']))
print()
Below is all the field name from the RedCap Project: Form: enrollment_log, Field Name: participant_id Form: enrollment_log, Field Name: subject_initials Form: enrollment_log, Field Name: subject_fw_id Form: enrollment_log, Field Name: consent_date Form: enrollment_log, Field Name: hipaa_signed_yes_no Form: enrollment_log, Field Name: randomization_code Form: medical_history, Field Name: smoke Form: medical_history, Field Name: famhx Form: medical_history, Field Name: htn Form: medical_history, Field Name: ckd Form: medical_history, Field Name: obesity Form: d1_baseline_questionnaires, Field Name: d1_date Form: d1_baseline_questionnaires, Field Name: session_label_d1 Form: d1_baseline_questionnaires, Field Name: bl_pss_1 Form: d1_baseline_questionnaires, Field Name: bl_pss_2 Form: d1_baseline_questionnaires, Field Name: bl_pss_3 Form: d1_baseline_questionnaires, Field Name: bl_pss_4 Form: d1_baseline_questionnaires, Field Name: bl_pss_5 Form: d8_post_questionnaires, Field Name: d8_date Form: d8_post_questionnaires, Field Name: post_pss_1 Form: d8_post_questionnaires, Field Name: post_pss_2 Form: d8_post_questionnaires, Field Name: post_pss_3 Form: d8_post_questionnaires, Field Name: post_pss_4 Form: d8_post_questionnaires, Field Name: post_pss_5
all_forms_and_events = project.export_fem()
pprint.pprint(all_forms_and_events)
[{'arm_num': 1, 'form': 'enrollment_log', 'unique_event_name': 'timepoint_0_arm_1'}, {'arm_num': 1, 'form': 'd1_baseline_questionnaires', 'unique_event_name': 'timepoint_1_arm_1'}, {'arm_num': 2, 'form': 'enrollment_log', 'unique_event_name': 'timepoint_0_arm_2'}, {'arm_num': 2, 'form': 'd8_post_questionnaires', 'unique_event_name': 'timepoint_1_arm_2'}]
# this method will show all the forms in the entire project
# `timepoint_0_arm_1` is the unique_event_name
project.export_records(events=['timepoint_0_arm_1'])
arm1_enrollment = project.export_records(events=['timepoint_0_arm_1'], forms =['enrollment_log'], )
arm2_enrollment = project.export_records(events=['timepoint_0_arm_2'], forms =['enrollment_log'])
pprint.pprint(arm1_enrollment)
pprint.pprint(arm2_enrollment)
all_records_df = project.export_records(format='df', df_kwargs={'index_col': project.field_names[0]})
all_records_df
redcap_event_name | subject_initials | subject_fw_id | consent_date | hipaa_signed_yes_no | randomization_code | bl_pss_1 | bl_pss_2 | bl_pss_3 | bl_pss_4 | bl_pss_5 | post_pss_1 | post_pss_2 | post_pss_3 | post_pss_4 | post_pss_5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
participant_id | ||||||||||||||||
1 | timepoint_0_arm_1 | jdkf | 98.0 | 2020-01-06 | 0.0 | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | timepoint_1_arm_1 | NaN | NaN | NaN | NaN | NaN | 0.0 | 4.0 | 3.0 | 4.0 | 3.0 | NaN | NaN | NaN | NaN | NaN |
2 | timepoint_0_arm_2 | nkjn | 92.0 | 2020-01-06 | 1.0 | 2.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 | timepoint_1_arm_2 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | 2.0 | 2.0 | 1.0 | 1.0 |
3 | timepoint_0_arm_1 | qwoi | 234.0 | 2020-02-11 | 1.0 | 2.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 | timepoint_1_arm_1 | NaN | NaN | NaN | NaN | NaN | 1.0 | 1.0 | 2.0 | 3.0 | 3.0 | NaN | NaN | NaN | NaN | NaN |
4 | timepoint_0_arm_1 | udjf | 744.0 | 2020-01-20 | 1.0 | 2.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
4 | timepoint_1_arm_1 | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 3.0 | 2.0 | NaN | NaN | NaN | NaN | NaN |
raw_records_id = project.export_records(fields=['participant_id'])
pprint.pprint(raw_records_id)
[{'participant_id': '1', 'redcap_event_name': 'timepoint_0_arm_1'}, {'participant_id': '1', 'redcap_event_name': 'timepoint_1_arm_1'}, {'participant_id': '2', 'redcap_event_name': 'timepoint_0_arm_2'}, {'participant_id': '2', 'redcap_event_name': 'timepoint_1_arm_2'}, {'participant_id': '3', 'redcap_event_name': 'timepoint_0_arm_1'}, {'participant_id': '3', 'redcap_event_name': 'timepoint_1_arm_1'}, {'participant_id': '4', 'redcap_event_name': 'timepoint_0_arm_1'}, {'participant_id': '4', 'redcap_event_name': 'timepoint_1_arm_1'}]
# Save the records id into a list (without any repeat records)
records_id = []
for each in raw_records_id:
if each['participant_id'] not in records_id:
records_id.append(each['participant_id'])
#the participant ID
print(records_id)
['1', '2', '3', '4']
all_forms_and_events
[{'arm_num': 1, 'form': 'enrollment_log', 'unique_event_name': 'timepoint_0_arm_1'}, {'arm_num': 1, 'form': 'd1_baseline_questionnaires', 'unique_event_name': 'timepoint_1_arm_1'}, {'arm_num': 2, 'form': 'enrollment_log', 'unique_event_name': 'timepoint_0_arm_2'}, {'arm_num': 2, 'form': 'd8_post_questionnaires', 'unique_event_name': 'timepoint_1_arm_2'}]
arm1_enrollment
[{'consent_date': '2020-01-06', 'enrollment_log_complete': '2', 'hipaa_signed_yes_no': '0', 'participant_id': '1', 'randomization_code': '1', 'redcap_event_name': 'timepoint_0_arm_1', 'subject_fw_id': '098', 'subject_initials': 'jdkf'}, {'consent_date': '2020-02-11', 'enrollment_log_complete': '2', 'hipaa_signed_yes_no': '1', 'participant_id': '3', 'randomization_code': '2', 'redcap_event_name': 'timepoint_0_arm_1', 'subject_fw_id': '234', 'subject_initials': 'qwoi'}, {'consent_date': '2020-01-20', 'enrollment_log_complete': '2', 'hipaa_signed_yes_no': '1', 'participant_id': '4', 'randomization_code': '2', 'redcap_event_name': 'timepoint_0_arm_1', 'subject_fw_id': '744', 'subject_initials': 'udjf'}]
form_list
['d1_baseline_questionnaires', 'd8_post_questionnaires', 'enrollment_log']
# all_forms_and_events
form_records = {}
for each in all_forms_and_events:
if each['form'] not in form_records.keys():
records = project.export_records(events=[each['unique_event_name']], forms=[each['form']])
form_records[each['form']] = records
else:
records = project.export_records(events=[each['unique_event_name']], forms=[each['form']])
for record in records:
form_records[ each['form']].append(record)
pprint.pprint(form_records)
View all records from RedCap in table view
forms_info = []
forms_info_df = pd.DataFrame()
for event in all_forms_and_events:
records = project.export_records(events=[event['unique_event_name']], forms=[event['form']])
for record in records:
forms_info.append(record)
forms_info_df = forms_info_df.append(record, ignore_index=True)
pd.set_option('display.max_columns', None)
display(forms_info_df)
# get the project
fw_project = fw_project.reload()
subj_info = {}
all_subj_info = []
for subject in fw_project.subjects.iter():
subject = subject.reload()
subj_info[subject.label] = subject.id
info = {
'subj_label' : subject.label,
'subj_id' : subject.id,
'subj_info_label' : subject.info
}
# Lookup table for FW instances to compare with RedCap
all_subj_info.append(info)
{'fw_id': '098', 'enrollment_log': {'subject_initials': 'jdkf', 'subject_fw_id': '098', 'redcap_event_name': 'timepoint_0_arm_1', 'hipaa_signed_yes_no': '0', 'enrollment_log_complete': '2', 'participant_id': '1', 'randomization_code': '1', 'consent_date': '2020-01-06'}, 'initials': 'jdkf'} {'fw_id': '092', 'enrollment_log': {'subject_initials': 'nkjn', 'subject_fw_id': '092', 'redcap_event_name': 'timepoint_0_arm_2', 'hipaa_signed_yes_no': '1', 'enrollment_log_complete': '2', 'participant_id': '2', 'randomization_code': '2', 'consent_date': '2020-01-06'}, 'initials': 'nkjn'} {'fw_id': '234', 'enrollment_log': {'subject_initials': 'qwoi', 'subject_fw_id': '234', 'redcap_event_name': 'timepoint_0_arm_1', 'hipaa_signed_yes_no': '1', 'enrollment_log_complete': '2', 'participant_id': '3', 'randomization_code': '2', 'consent_date': '2020-02-11'}, 'initials': 'qwoi'} {'fw_id': '744', 'enrollment_log': {'subject_initials': 'udjf', 'subject_fw_id': '744', 'redcap_event_name': 'timepoint_0_arm_1', 'hipaa_signed_yes_no': '1', 'enrollment_log_complete': '2', 'participant_id': '4', 'randomization_code': '2', 'consent_date': '2020-01-20'}, 'initials': 'udjf'}
Compare subjects in RedCap and FW instance
enrollment_log
or intake forms from RedCapenrollment_log
or intake forms exist on the FW Subject info attributes.subj_in_fw = [subj['subj_label'] for subj in all_subj_info]
enrollment_data = form_records['enrollment_log']
# Check if the RedCap subj is the same as FW subj
if set(records_id) not in set(subj_in_fw):
missing_id = list(set(records_id)-set(subj_in_fw))
for new_id in missing_id:
tmp_data = {'enrollment_log': data for data in enrollment_data if data['participant_id'] == new_id}
new_subj = fw_project.add_subject(label = new_id, project=project_id, info = tmp_data)
else:
# if subj number is same then check if enrollment log is there
for sub in all_subj_info:
if 'enrollment_log' not in sub['subj_info_label'].keys():
enrollment_data = form_records['enrollment_log']
tmp = {'enrollment_log': data for data in enrollment_data if data['participant_id'] == sub['subj_label']}
update_subj = flywheel.models.Subject(info = tmp)
fw.modify_subject(sub['subj_id'], update_subj)
You can run the cells below to create new session with the subj_forms
lookup table
# This variable has all the form (include the enrollment_log) along with the data
form_records
# This cell will add the participant ID into the subj_forms listof dictionaries
# Add on the participant/subj ID into the lookup table for fw_session_label and redcap_form_label
for form, data in form_records.items():
# Exclude the enrollment_log (as everyone should have the same enrollment log)
if form != 'enrollment_log':
for i in subj_forms:
label = i['redcap_form_label']
if label == form:
tmp = [d['participant_id'] for d in data]
i['records_id'] = tmp
# View the added information on the list
subj_forms
[{'fw_session_label': 'session01', 'records_id': ['1', '3', '4'], 'redcap_form_label': 'd1_baseline_questionnaires'}, {'fw_session_label': 'session02', 'records_id': ['2'], 'redcap_form_label': 'd8_post_questionnaires'}]
The cell below will then create a new session on FW UI
for subj in fw_project.subjects.iter():
subj = subj.reload()
label = subj.label
for i in subj_forms:
if label in i['records_id']:
new_sess = subj.add_session(label = i['fw_session_label'])
new_sess.update(label=i['fw_session_label'])
This will only work if you have created corresponded sessions on the FW instance
for sess in fw_project.sessions.iter():
sess = sess.reload()
for i in subj_forms:
if i['fw_session_label'] == sess.label and i['redcap_form_label'] not in sess.info :
rc_label = i['redcap_form_label']
tmp_info = { rc_label: data for data in form_records[rc_label] if data['participant_id'] == sess.subject['label']}
update_sess = flywheel.models.Session(info = tmp_info)
fw.modify_session(sess.id, update_sess)