Core
combine_measures(df, df_map, index_col='SUBJECT', measure_prefix='CBCL', sort_col=None, drop_duplicates=True)
Creates a new dataframe column that combines values from multiple old fields
Parameters:
Name | Type | Description | Default |
---|---|---|---|
df |
DataFrame |
DataFrame |
required |
df_map |
DataFrame |
DataFrame where index is the name of the new row and columns represent the different measures |
required |
Returns:
Type | Description |
---|---|
DataFrame |
pd.DataFrame: DataFrame with new combined field added as column |
Source code in pondtools\core.py
def combine_measures(
df: pd.DataFrame,
df_map: pd.DataFrame,
index_col: Optional[str] = "SUBJECT",
measure_prefix: Optional[str] = "CBCL",
sort_col: Optional[str] = None,
drop_duplicates: Optional[bool] = True,
) -> pd.DataFrame:
"""
Creates a new dataframe column that combines values from multiple old fields
Args:
df (pd.DataFrame): DataFrame
df_map: DataFrame where index is the name of the new row and columns represent the different measures
Returns:
pd.DataFrame: DataFrame with new combined field added as column
"""
df = df.copy()
df_new = pd.DataFrame()
for measure, fields in df_map.iteritems():
fields = fields.dropna()
col_dict = df_map[measure].dropna().to_dict()
col_dict = dict(
[(value, key) for key, value in col_dict.items()]
) # swap dict keys and items
cols = [index_col] + [
i for i in df_map[measure].dropna().to_list() if i in df.columns
]
temp = df[cols].rename(columns=col_dict)
drop_subset = temp.columns.to_list()
drop_subset.remove(index_col)
temp.insert(1, f"{measure_prefix}_MEASURE", measure)
df_new = df_new.append(temp.dropna(how="all", subset=drop_subset))
if sort_col == None:
df_new = df_new
if drop_duplicates:
df_new = df_new.drop_duplicates(subset=[index_col], keep="first")
else:
df_new['Non_Null_Count'] = df_new.notnull().sum(axis=1)
df_new = df_new.sort_values([index_col, 'Non_Null_Count', sort_col])
if drop_duplicates:
df_new = df_new.drop_duplicates(subset=[index_col], keep="last")
df_new.drop(columns=['Non_Null_Count'], inplace=True)
return df_new
combine_measures_single(df, new_field, old_fields, index_col='SUBJECT')
Creates a new dataframe column that combines values from multiple old fields
Parameters:
Name | Type | Description | Default |
---|---|---|---|
df |
DataFrame |
DataFrame |
required |
new_field |
str |
Name of new field |
required |
old_fields |
List[str] |
List of old fields for combination in order of priority |
required |
Returns:
Type | Description |
---|---|
DataFrame |
pd.DataFrame: DataFrame with new combined field added as column |
Source code in pondtools\core.py
def combine_measures_single(
df: pd.DataFrame, new_field: str, old_fields: List[str], index_col: str = "SUBJECT"
) -> pd.DataFrame:
"""
Creates a new dataframe column that combines values from multiple old fields
Args:
df (pd.DataFrame): DataFrame
new_field (str): Name of new field
old_fields (List[str]): List of old fields for combination in order of priority
Returns:
pd.DataFrame: DataFrame with new combined field added as column
"""
df = df.copy()
df_new = pd.DataFrame()
for measure in old_fields:
if measure in df.columns:
temp = df[index_col].to_frame()
# temp['Measure'] = measure
temp[new_field] = df[measure]
df_new = df_new.append(temp)
df_new = df_new.dropna().drop_duplicates(subset=[index_col], keep="first")
return df.merge(df_new, how="left", left_index=True, right_index=True)
get_filepath(filename)
Get filepath of the data/resources stored in the pondtools module
Parameters:
Name | Type | Description | Default |
---|---|---|---|
filename |
str |
Name of filename |
required |
Returns:
Type | Description |
---|---|
str |
str: Path of the file |
Source code in pondtools\core.py
def get_filepath(filename: str) -> str:
"""
Get filepath of the data/resources stored in the pondtools module
Args:
filename (str): Name of filename
Returns:
str: Path of the file
"""
return files("pondtools.resources").joinpath(filename)