Core

`combine_measures(df, df_map, index_col='SUBJECT', measure_prefix='CBCL', sort_col=None, drop_duplicates=True)`

Creates a new dataframe column that combines values from multiple old fields

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	DataFrame	required
`df_map`	`DataFrame`	DataFrame where index is the name of the new row and columns represent the different measures	required

Returns:

Type	Description
`DataFrame`	pd.DataFrame: DataFrame with new combined field added as column

Source code in pondtools\core.py

def combine_measures(
    df: pd.DataFrame,
    df_map: pd.DataFrame,
    index_col: Optional[str] = "SUBJECT",
    measure_prefix: Optional[str] = "CBCL",
    sort_col: Optional[str] = None,
    drop_duplicates: Optional[bool] = True,
) -> pd.DataFrame:
    """
    Creates a new dataframe column that combines values from multiple old fields

    Args:
        df (pd.DataFrame): DataFrame
        df_map: DataFrame where index is the name of the new row and columns represent the different measures

    Returns:
        pd.DataFrame: DataFrame with new combined field added as column
    """

    df = df.copy()
    df_new = pd.DataFrame()

    for measure, fields in df_map.iteritems():
        fields = fields.dropna()
        col_dict = df_map[measure].dropna().to_dict()
        col_dict = dict(
            [(value, key) for key, value in col_dict.items()]
        )  # swap dict keys and items
        cols = [index_col] + [
            i for i in df_map[measure].dropna().to_list() if i in df.columns
        ]

        temp = df[cols].rename(columns=col_dict)

        drop_subset = temp.columns.to_list()
        drop_subset.remove(index_col)
        temp.insert(1, f"{measure_prefix}_MEASURE", measure)
        df_new = df_new.append(temp.dropna(how="all", subset=drop_subset))

    if sort_col == None:
        df_new = df_new
        if drop_duplicates:
            df_new = df_new.drop_duplicates(subset=[index_col], keep="first")

    else:
        df_new['Non_Null_Count'] = df_new.notnull().sum(axis=1)
        df_new = df_new.sort_values([index_col, 'Non_Null_Count', sort_col])
        if drop_duplicates:
            df_new = df_new.drop_duplicates(subset=[index_col], keep="last")

        df_new.drop(columns=['Non_Null_Count'], inplace=True)

    return df_new

`combine_measures_single(df, new_field, old_fields, index_col='SUBJECT')`

Creates a new dataframe column that combines values from multiple old fields

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	DataFrame	required
`new_field`	`str`	Name of new field	required
`old_fields`	`List[str]`	List of old fields for combination in order of priority	required

Returns:

Type	Description
`DataFrame`	pd.DataFrame: DataFrame with new combined field added as column

Source code in pondtools\core.py

def combine_measures_single(
    df: pd.DataFrame, new_field: str, old_fields: List[str], index_col: str = "SUBJECT"
) -> pd.DataFrame:
    """
    Creates a new dataframe column that combines values from multiple old fields

    Args:
        df (pd.DataFrame): DataFrame
        new_field (str): Name of new field
        old_fields (List[str]): List of old fields for combination in order of priority

    Returns:
        pd.DataFrame: DataFrame with new combined field added as column
    """

    df = df.copy()
    df_new = pd.DataFrame()

    for measure in old_fields:
        if measure in df.columns:
            temp = df[index_col].to_frame()
            # temp['Measure'] = measure
            temp[new_field] = df[measure]
            df_new = df_new.append(temp)

    df_new = df_new.dropna().drop_duplicates(subset=[index_col], keep="first")
    return df.merge(df_new, how="left", left_index=True, right_index=True)

`get_filepath(filename)`

Get filepath of the data/resources stored in the pondtools module

Parameters:

Name	Type	Description	Default
`filename`	`str`	Name of filename	required

Returns:

Type	Description
`str`	str: Path of the file

Source code in pondtools\core.py

def get_filepath(filename: str) -> str:
    """
    Get filepath of the data/resources stored in the pondtools module

    Args:
        filename (str): Name of filename

    Returns:
        str: Path of the file
    """
    return files("pondtools.resources").joinpath(filename)