Begin by copying this file into the main directory of your workspace.
import sys
!{sys.executable} -m pip install plotly
import pandas as pd
import numpy as np
import plotly.express as px
test_demo_dat = pd.read_csv(
'gcs/pre-processed/test/test_demographics.csv')
train_demo_dat = pd.read_csv(
'gcs/pre-processed/train/train_demographics.csv')
valid_demo_dat = pd.read_csv(
'gcs/pre-processed/valid/valid_demographics.csv')
test_encount_dat = pd.read_csv(
'gcs/pre-processed/test/test_encounters.csv')
train_encount_dat = pd.read_csv(
'gcs/pre-processed/train/train_encounters.csv')
valid_encount_dat = pd.read_csv(
'gcs/pre-processed/valid/valid_encounters.csv')
We start by loading in the data. Once this is done, and we have the testing, training, and validation datasets for both demographic and encounter data, we can join these two datasets to get demographics on the patient level rather than the encounter level.
## Get encounter data, dropping any duplicates
encount_dat = test_encount_dat.append(
train_encount_dat).append(valid_encount_dat)
num_enc = len(encount_dat[['ENCOUNTER_NUM']].drop_duplicates())
demo_dat = test_demo_dat.append(train_demo_dat).append(valid_demo_dat)
num_dem = len(encount_dat[['ENCOUNTER_NUM']].drop_duplicates())
encount_dat_pt = encount_dat[['ENCOUNTER_NUM','PATIENT_DK']]
num_enc = len(encount_dat_pt['ENCOUNTER_NUM'])
patient_dat = test_demo_dat.append(
train_demo_dat).append(valid_demo_dat)
# Join with patient data
patient_dat = patient_dat.merge(
encount_dat_pt, on='ENCOUNTER_NUM',how='left')
patient_sex = patient_dat[['PATIENT_DK','sex']]
patient_sex_unique = patient_sex.drop_duplicates()
Once the data is prepped, we can group on the sex field to get patient counts by sex.
patient_sex = patient_dat[['PATIENT_DK','sex']]
patient_sex_unique = patient_sex.drop_duplicates()
sex = patient_sex_unique[['PATIENT_DK','sex']].groupby(
'sex',as_index=False).count()
sex['sex'] = ['M','F']
sex.columns = ['Sex','Count']
sex
Finally, we use the plotly package to plot the results.
fig_sex = px.bar(sex, x="Sex", y="Count", color = "Sex",
labels={"Sex" : "Sex",
"Count" : "Number of Patients"})
fig_sex.update_layout(paper_bgcolor="#f9f9f9")
fig_sex.update_layout(showlegend=False)
fig_sex