Skip to content

Core

combine_measures(df, df_map, index_col='SUBJECT', measure_prefix='CBCL', sort_col=None, drop_duplicates=True)

Creates a new dataframe column that combines values from multiple old fields

Parameters:

Name Type Description Default
df DataFrame

DataFrame

required
df_map DataFrame

DataFrame where index is the name of the new row and columns represent the different measures

required

Returns:

Type Description
DataFrame

pd.DataFrame: DataFrame with new combined field added as column

Source code in pondtools\core.py
def combine_measures(
    df: pd.DataFrame,
    df_map: pd.DataFrame,
    index_col: Optional[str] = "SUBJECT",
    measure_prefix: Optional[str] = "CBCL",
    sort_col: Optional[str] = None,
    drop_duplicates: Optional[bool] = True,
) -> pd.DataFrame:
    """
    Creates a new dataframe column that combines values from multiple old fields

    Args:
        df (pd.DataFrame): DataFrame
        df_map: DataFrame where index is the name of the new row and columns represent the different measures

    Returns:
        pd.DataFrame: DataFrame with new combined field added as column
    """

    df = df.copy()
    df_new = pd.DataFrame()

    for measure, fields in df_map.iteritems():
        fields = fields.dropna()
        col_dict = df_map[measure].dropna().to_dict()
        col_dict = dict(
            [(value, key) for key, value in col_dict.items()]
        )  # swap dict keys and items
        cols = [index_col] + [
            i for i in df_map[measure].dropna().to_list() if i in df.columns
        ]

        temp = df[cols].rename(columns=col_dict)

        drop_subset = temp.columns.to_list()
        drop_subset.remove(index_col)
        temp.insert(1, f"{measure_prefix}_MEASURE", measure)
        df_new = df_new.append(temp.dropna(how="all", subset=drop_subset))

    if sort_col == None:
        df_new = df_new
        if drop_duplicates:
            df_new = df_new.drop_duplicates(subset=[index_col], keep="first")

    else:
        df_new['Non_Null_Count'] = df_new.notnull().sum(axis=1)
        df_new = df_new.sort_values([index_col, 'Non_Null_Count', sort_col])
        if drop_duplicates:
            df_new = df_new.drop_duplicates(subset=[index_col], keep="last")

        df_new.drop(columns=['Non_Null_Count'], inplace=True)

    return df_new

combine_measures_single(df, new_field, old_fields, index_col='SUBJECT')

Creates a new dataframe column that combines values from multiple old fields

Parameters:

Name Type Description Default
df DataFrame

DataFrame

required
new_field str

Name of new field

required
old_fields List[str]

List of old fields for combination in order of priority

required

Returns:

Type Description
DataFrame

pd.DataFrame: DataFrame with new combined field added as column

Source code in pondtools\core.py
def combine_measures_single(
    df: pd.DataFrame, new_field: str, old_fields: List[str], index_col: str = "SUBJECT"
) -> pd.DataFrame:
    """
    Creates a new dataframe column that combines values from multiple old fields

    Args:
        df (pd.DataFrame): DataFrame
        new_field (str): Name of new field
        old_fields (List[str]): List of old fields for combination in order of priority

    Returns:
        pd.DataFrame: DataFrame with new combined field added as column
    """

    df = df.copy()
    df_new = pd.DataFrame()

    for measure in old_fields:
        if measure in df.columns:
            temp = df[index_col].to_frame()
            # temp['Measure'] = measure
            temp[new_field] = df[measure]
            df_new = df_new.append(temp)

    df_new = df_new.dropna().drop_duplicates(subset=[index_col], keep="first")
    return df.merge(df_new, how="left", left_index=True, right_index=True)

get_filepath(filename)

Get filepath of the data/resources stored in the pondtools module

Parameters:

Name Type Description Default
filename str

Name of filename

required

Returns:

Type Description
str

str: Path of the file

Source code in pondtools\core.py
def get_filepath(filename: str) -> str:
    """
    Get filepath of the data/resources stored in the pondtools module

    Args:
        filename (str): Name of filename

    Returns:
        str: Path of the file
    """
    return files("pondtools.resources").joinpath(filename)