Skip to content

MESQUAL Pandas Util prepend_model_prop_levels

prepend_model_prop_levels

prepend_model_prop_levels(data: Series | DataFrame, model: DataFrame, *properties, prepend_to_top: bool = True, match_on_level: str = None) -> Series | DataFrame

Prepend model properties as new index levels to data.

Searches for an index level in data that matches the model's index, then prepends specified properties from the model as new index levels.

Parameters:

Name Type Description Default
data Series | DataFrame

The pandas object to add properties to.

required
model DataFrame

DataFrame containing properties to prepend, with matching index.

required
*properties

Column names from model to use as new index levels.

()
prepend_to_top bool

If True, add properties at the beginning of index levels. If False, add at the end.

True
match_on_level str

Optional level name to constrain matching to specific level. Useful in case the there are multiple index levels in data that match the model's index

None

Returns:

Type Description
Series | DataFrame

Copy of data with properties prepended as new index levels.

Raises:

Type Description
ValueError

If any property is not found in model columns.

Energy Domain Context

In Energy Systems Analysis, you often have to groupby and aggregate by certain properties. This module makes it easy to include the properties as a new index level before performing the groupby - agg pipeline.

Example:

>>> # You have a generation time-series df
>>> print(gen_df)  # Original DataFrame
    generator            GenA  GenB  GenC  SolarA  WindA
    2024-01-01 00:00:00   100   200   150      50     80
    2024-01-01 01:00:00   120   180   170      60     90
    2024-01-01 02:00:00   110   190   160      55     85

>>> # You have a generator model df
>>> print(model_df)
              zone technology  is_res
    generator
    GenA        DE    nuclear   False
    GenB        DE       coal   False
    GenC        FR        gas   False
    SolarA      DE      solar    True
    WindA       NL       wind    True

>>> gen_with_props = prepend_model_prop_levels(gen_df, model_df, 'zone', 'is_res')
>>> print(gen_with_props)  # DataFrame with prepended properties
    is_res              False            True
    zone                   DE        FR     DE    NL
    generator            GenA GenB GenC SolarA WindA
    2024-01-01 00:00:00   100  200  150     50    80
    2024-01-01 01:00:00   120  180  170     60    90
    2024-01-01 02:00:00   110  190  160     55    85

>>> gen_by_zone_and_type = gen_with_props.T.groupby(level=['zone', 'is_res']).sum().T
>>> print(gen_by_zone_and_type)  # grouped and aggregated
    zone                   DE          FR    NL
    is_res              False True  False True
    2024-01-01 00:00:00   300    50   150    80
    2024-01-01 01:00:00   300    60   170    90
    2024-01-01 02:00:00   300    55   160    85
Source code in submodules/mesqual/mesqual/utils/pandas_utils/pend_props.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
def prepend_model_prop_levels(
        data: pd.Series | pd.DataFrame,
        model: pd.DataFrame,
        *properties,
        prepend_to_top: bool = True,
        match_on_level: str = None,
) -> pd.Series | pd.DataFrame:
    """Prepend model properties as new index levels to data.

    Searches for an index level in data that matches the model's index, then
    prepends specified properties from the model as new index levels.

    Args:
        data: The pandas object to add properties to.
        model: DataFrame containing properties to prepend, with matching index.
        *properties: Column names from model to use as new index levels.
        prepend_to_top: If True, add properties at the beginning of index levels.
            If False, add at the end.
        match_on_level: Optional level name to constrain matching to specific level.
            Useful in case the there are multiple index levels in data that match
            the model's index

    Returns:
        Copy of data with properties prepended as new index levels.

    Raises:
        ValueError: If any property is not found in model columns.

    Energy Domain Context:
        In Energy Systems Analysis, you often have to groupby and aggregate
        by certain properties. This module makes it easy to include the properties
        as a new index level before performing the groupby - agg pipeline.

    Example:

        >>> # You have a generation time-series df
        >>> print(gen_df)  # Original DataFrame
            generator            GenA  GenB  GenC  SolarA  WindA
            2024-01-01 00:00:00   100   200   150      50     80
            2024-01-01 01:00:00   120   180   170      60     90
            2024-01-01 02:00:00   110   190   160      55     85

        >>> # You have a generator model df
        >>> print(model_df)
                      zone technology  is_res
            generator
            GenA        DE    nuclear   False
            GenB        DE       coal   False
            GenC        FR        gas   False
            SolarA      DE      solar    True
            WindA       NL       wind    True

        >>> gen_with_props = prepend_model_prop_levels(gen_df, model_df, 'zone', 'is_res')
        >>> print(gen_with_props)  # DataFrame with prepended properties
            is_res              False            True
            zone                   DE        FR     DE    NL
            generator            GenA GenB GenC SolarA WindA
            2024-01-01 00:00:00   100  200  150     50    80
            2024-01-01 01:00:00   120  180  170     60    90
            2024-01-01 02:00:00   110  190  160     55    85

        >>> gen_by_zone_and_type = gen_with_props.T.groupby(level=['zone', 'is_res']).sum().T
        >>> print(gen_by_zone_and_type)  # grouped and aggregated
            zone                   DE          FR    NL
            is_res              False True  False True
            2024-01-01 00:00:00   300    50   150    80
            2024-01-01 01:00:00   300    60   170    90
            2024-01-01 02:00:00   300    55   160    85
    """
    tmp = data.copy()
    properties = [p for p in properties if not ((p is None) or (p == ''))]

    if not properties:
        return tmp

    for prop in properties:
        if prop not in model.columns.tolist():
            raise ValueError(f'Property unavailable: {prop} was not found in your model_df.')
    axis, level = get_matching_axis_and_level(data, model.index, match_on_level)

    match_keys = tmp.axes[axis].get_level_values(level)
    new_index = tmp.axes[axis].to_frame(index=False)
    for prop in properties:
        if prop not in new_index:
            loc = 0 if prepend_to_top else len(new_index.columns)
            new_index.insert(loc, prop, model.loc[match_keys, prop].values)
    new_index = pd.MultiIndex.from_frame(new_index)
    if axis == 0:
        tmp.index = new_index
    else:
        tmp.columns = new_index

    if is_series(data):
        tmp: pd.Series = tmp
        return tmp
    elif is_dataframe(data):
        tmp: pd.DataFrame = tmp
        return tmp
    else:
        return tmp