Skip to content

Area Variable Accounting

AreaPriceCalculator

Bases: AreaVariableCalculatorBase

Calculates area-level prices from node prices using simple or weighted averaging.

This calculator aggregates node-level electricity prices to area-level (e.g., bidding zones, countries) using either simple averaging or weighted averaging based on demand, supply, or other energy quantities. It's particularly useful in energy market analysis where different regions may have multiple price nodes that need to be consolidated into representative area prices.

The class inherits from AreaVariableCalculatorBase and provides energy-aware price aggregation that handles edge cases like zero weights and missing data appropriately.

Typical use cases: - Aggregating nodal prices to bidding zone prices - Creating country-level price indices from multiple market nodes - Volume-weighted price calculations for regional analysis

Parameters:

Name Type Description Default
node_model_df DataFrame

DataFrame with node-area mappings

required
area_column str

Column name containing area identifiers

required

Example:

>>> import pandas as pd
>>> import numpy as np
>>>
>>> # Node model with area mapping
>>> node_model = pd.DataFrame({
...     'bidding_zone': ['DE_LU', 'DE_LU', 'FR', 'FR']
... }, index=['DE1', 'DE2', 'FR1', 'FR2'])
>>>
>>> # Price calculator
>>> calc = AreaPriceCalculator(node_model, 'bidding_zone')
>>>
>>> # Node prices
>>> prices = pd.DataFrame({
...     'DE1': [50.0, 45.0], 'DE2': [52.0, 47.0],
...     'FR1': [55.0, 48.0], 'FR2': [53.0, 46.0]
... }, index=pd.date_range('2024-01-01', periods=2, freq='h'))
>>>
>>> # Simple average
>>> area_prices = calc.calculate(prices)
>>> print(area_prices)
           bidding_zone  DE_LU FR
    datetime
    2024-01-01 00:00:00  51.0  54.0
    2024-01-01 01:00:00  46.0  47.0
Source code in submodules/mesqual/mesqual/energy_data_handling/area_accounting/area_variable_price_calculator.py
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
class AreaPriceCalculator(AreaVariableCalculatorBase):
    """Calculates area-level prices from node prices using simple or weighted averaging.

    This calculator aggregates node-level electricity prices to area-level (e.g., bidding zones,
    countries) using either simple averaging or weighted averaging based on demand, supply, or
    other energy quantities. It's particularly useful in energy market analysis where different
    regions may have multiple price nodes that need to be consolidated into representative area
    prices.

    The class inherits from AreaVariableCalculatorBase and provides energy-aware price aggregation
    that handles edge cases like zero weights and missing data appropriately.

    Typical use cases:
    - Aggregating nodal prices to bidding zone prices
    - Creating country-level price indices from multiple market nodes
    - Volume-weighted price calculations for regional analysis

    Args:
        node_model_df: DataFrame with node-area mappings
        area_column: Column name containing area identifiers

    Example:

        >>> import pandas as pd
        >>> import numpy as np
        >>>
        >>> # Node model with area mapping
        >>> node_model = pd.DataFrame({
        ...     'bidding_zone': ['DE_LU', 'DE_LU', 'FR', 'FR']
        ... }, index=['DE1', 'DE2', 'FR1', 'FR2'])
        >>>
        >>> # Price calculator
        >>> calc = AreaPriceCalculator(node_model, 'bidding_zone')
        >>>
        >>> # Node prices
        >>> prices = pd.DataFrame({
        ...     'DE1': [50.0, 45.0], 'DE2': [52.0, 47.0],
        ...     'FR1': [55.0, 48.0], 'FR2': [53.0, 46.0]
        ... }, index=pd.date_range('2024-01-01', periods=2, freq='h'))
        >>>
        >>> # Simple average
        >>> area_prices = calc.calculate(prices)
        >>> print(area_prices)
                   bidding_zone  DE_LU FR
            datetime
            2024-01-01 00:00:00  51.0  54.0
            2024-01-01 01:00:00  46.0  47.0
    """

    def calculate(
        self,
        node_price_df: pd.DataFrame,
        weighting_factor_df: pd.DataFrame = None,
    ) -> pd.DataFrame:
        """Calculate area prices with different weighting options.

        Aggregates node-level prices to area-level using simple averaging (when no weights
        provided) or weighted averaging (when weights provided). The method handles missing
        nodes gracefully and ensures proper handling of zero weights and NaN values.

        In case you want to exclude certain nodes from the aggregation (e.g. because they
        are virtual or synthetic nodes), you can simply remove them from the node_price_df
        before passing it to this method.

        Args:
            node_price_df: Node-level price time series with datetime index and node columns.
                Values represent electricity prices in €/MWh or similar units.
            weighting_factor_df: Optional weighting factor DataFrame with same structure as
                node_price_df. Common weighting factors include:
                - node_demand_df: Demand-weighted prices
                - node_supply_df: Supply-weighted prices  
                - node_capacity_df: Capacity-weighted prices
                If None, simple arithmetic average is used.

        Returns:
            DataFrame with area-level prices. Index matches input time series, columns
            represent areas with prices in same units as input.

        Raises:
            ValueError: If node_price_df structure is invalid
            KeyError: If required nodes are missing from weighting_factor_df

        Example:

            >>> # Simple average
            >>> area_prices = calc.calculate(node_prices)
            >>> 
            >>> # Demand-weighted average  
            >>> weighted_prices = calc.calculate(node_prices, node_demand)
        """
        self._validate_node_data(node_price_df, 'node_price_df')

        area_prices = {}

        for area in self.areas:
            area_nodes = self.get_area_nodes(area)
            area_nodes = [n for n in area_nodes if n in node_price_df.columns]

            if not area_nodes:
                continue

            prices = node_price_df[area_nodes]

            if weighting_factor_df is None:
                area_prices[area] = self._calculate_simple_average(prices)
            else:
                self._validate_node_data(weighting_factor_df, 'weighting_factor_df')
                area_prices[area] = self._calculate_weighted_average(
                    prices, weighting_factor_df[area_nodes]
                )

        result = pd.DataFrame(area_prices)
        result.columns.name = self.area_column
        return result

    def _calculate_simple_average(self, prices: pd.DataFrame) -> pd.Series:
        return prices.mean(axis=1)

    def _calculate_weighted_average(
        self, 
        prices: pd.DataFrame, 
        weights: pd.DataFrame
    ) -> pd.Series:
        """Calculate weighted average of prices using provided weights.

        Computes volume-weighted or otherwise weighted prices while handling edge cases
        appropriately. When weights sum to zero, the method defaults to weight of 1 to
        avoid division errors. When all prices are NaN for a time period, the result
        is also NaN.

        Args:
            prices: DataFrame with price time series for nodes in an area
            weights: DataFrame with weighting factors (e.g., demand, supply) with same
                structure as prices. Must have non-negative values.

        Returns:
            Series with weighted average prices over time

        Note:
            This method assumes weights are extensive quantities (like energy volumes)
            while prices are intensive quantities (like €/MWh).
        """
        weighted_sum = (prices * weights).sum(axis=1)
        weight_sum = weights.sum(axis=1).replace(0, 1)
        weighted_price = weighted_sum / weight_sum
        weighted_price[prices.isna().all(axis=1)] = np.nan
        return weighted_price

calculate

calculate(node_price_df: DataFrame, weighting_factor_df: DataFrame = None) -> DataFrame

Calculate area prices with different weighting options.

Aggregates node-level prices to area-level using simple averaging (when no weights provided) or weighted averaging (when weights provided). The method handles missing nodes gracefully and ensures proper handling of zero weights and NaN values.

In case you want to exclude certain nodes from the aggregation (e.g. because they are virtual or synthetic nodes), you can simply remove them from the node_price_df before passing it to this method.

Parameters:

Name Type Description Default
node_price_df DataFrame

Node-level price time series with datetime index and node columns. Values represent electricity prices in €/MWh or similar units.

required
weighting_factor_df DataFrame

Optional weighting factor DataFrame with same structure as node_price_df. Common weighting factors include: - node_demand_df: Demand-weighted prices - node_supply_df: Supply-weighted prices
- node_capacity_df: Capacity-weighted prices If None, simple arithmetic average is used.

None

Returns:

Type Description
DataFrame

DataFrame with area-level prices. Index matches input time series, columns

DataFrame

represent areas with prices in same units as input.

Raises:

Type Description
ValueError

If node_price_df structure is invalid

KeyError

If required nodes are missing from weighting_factor_df

Example:

>>> # Simple average
>>> area_prices = calc.calculate(node_prices)
>>> 
>>> # Demand-weighted average  
>>> weighted_prices = calc.calculate(node_prices, node_demand)
Source code in submodules/mesqual/mesqual/energy_data_handling/area_accounting/area_variable_price_calculator.py
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
def calculate(
    self,
    node_price_df: pd.DataFrame,
    weighting_factor_df: pd.DataFrame = None,
) -> pd.DataFrame:
    """Calculate area prices with different weighting options.

    Aggregates node-level prices to area-level using simple averaging (when no weights
    provided) or weighted averaging (when weights provided). The method handles missing
    nodes gracefully and ensures proper handling of zero weights and NaN values.

    In case you want to exclude certain nodes from the aggregation (e.g. because they
    are virtual or synthetic nodes), you can simply remove them from the node_price_df
    before passing it to this method.

    Args:
        node_price_df: Node-level price time series with datetime index and node columns.
            Values represent electricity prices in €/MWh or similar units.
        weighting_factor_df: Optional weighting factor DataFrame with same structure as
            node_price_df. Common weighting factors include:
            - node_demand_df: Demand-weighted prices
            - node_supply_df: Supply-weighted prices  
            - node_capacity_df: Capacity-weighted prices
            If None, simple arithmetic average is used.

    Returns:
        DataFrame with area-level prices. Index matches input time series, columns
        represent areas with prices in same units as input.

    Raises:
        ValueError: If node_price_df structure is invalid
        KeyError: If required nodes are missing from weighting_factor_df

    Example:

        >>> # Simple average
        >>> area_prices = calc.calculate(node_prices)
        >>> 
        >>> # Demand-weighted average  
        >>> weighted_prices = calc.calculate(node_prices, node_demand)
    """
    self._validate_node_data(node_price_df, 'node_price_df')

    area_prices = {}

    for area in self.areas:
        area_nodes = self.get_area_nodes(area)
        area_nodes = [n for n in area_nodes if n in node_price_df.columns]

        if not area_nodes:
            continue

        prices = node_price_df[area_nodes]

        if weighting_factor_df is None:
            area_prices[area] = self._calculate_simple_average(prices)
        else:
            self._validate_node_data(weighting_factor_df, 'weighting_factor_df')
            area_prices[area] = self._calculate_weighted_average(
                prices, weighting_factor_df[area_nodes]
            )

    result = pd.DataFrame(area_prices)
    result.columns.name = self.area_column
    return result

AreaSumCalculator

Bases: AreaVariableCalculatorBase

General calculator for summing node-level extensive quantities to area level.

This calculator aggregates extensive quantities (values that scale with system size) from node-level to area-level using summation. Typical use cases include power generation, demand, energy volumes, reserves, and other additive quantities in energy systems analysis.

Unlike intensive quantities (like prices), extensive quantities should be summed when aggregating to higher geographic levels, making this calculator appropriate for many physical quantities in energy modeling.

Inherits from AreaVariableCalculatorBase and provides the MESQUAL framework's standard approach for area-level aggregation of extensive variables.

Parameters:

Name Type Description Default
node_model_df DataFrame

DataFrame mapping nodes to areas

required
area_column str

Column name containing area identifiers

required

Example:

>>> import pandas as pd
>>> import numpy as np
>>>
>>> # Node model
>>> node_model = pd.DataFrame({
...     'bidding_zone': ['DE_LU', 'DE_LU', 'FR', 'FR']
... }, index=['DE1', 'DE2', 'FR1', 'FR2'])
>>>
>>> # Sum calculator
>>> calc = AreaSumCalculator(node_model, 'bidding_zone')
>>> # Node generation data
>>> generation = pd.DataFrame({
...     'DE1': [800, 850], 'DE2': [750, 780],
...     'FR1': [900, 920], 'FR2': [850, 870]
... }, index=pd.date_range('2024-01-01', periods=2, freq='h'))
>>>
>>> # Sum to areas
>>> area_generation = calc.calculate(generation)
>>> print(area_generation)
    bidding_zone  DE_LU   FR
    2024-01-01 00:00:00  1550  1750
    2024-01-01 01:00:00  1630  1790
Source code in submodules/mesqual/mesqual/energy_data_handling/area_accounting/area_variable_sum_calculator.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
class AreaSumCalculator(AreaVariableCalculatorBase):
    """General calculator for summing node-level extensive quantities to area level.

    This calculator aggregates extensive quantities (values that scale with system size)
    from node-level to area-level using summation. Typical use cases include power
    generation, demand, energy volumes, reserves, and other additive quantities in
    energy systems analysis.

    Unlike intensive quantities (like prices), extensive quantities should be summed
    when aggregating to higher geographic levels, making this calculator appropriate
    for many physical quantities in energy modeling.

    Inherits from AreaVariableCalculatorBase and provides the MESQUAL framework's
    standard approach for area-level aggregation of extensive variables.

    Args:
        node_model_df: DataFrame mapping nodes to areas
        area_column: Column name containing area identifiers

    Example:

        >>> import pandas as pd
        >>> import numpy as np
        >>>
        >>> # Node model
        >>> node_model = pd.DataFrame({
        ...     'bidding_zone': ['DE_LU', 'DE_LU', 'FR', 'FR']
        ... }, index=['DE1', 'DE2', 'FR1', 'FR2'])
        >>>
        >>> # Sum calculator
        >>> calc = AreaSumCalculator(node_model, 'bidding_zone')
        >>> # Node generation data
        >>> generation = pd.DataFrame({
        ...     'DE1': [800, 850], 'DE2': [750, 780],
        ...     'FR1': [900, 920], 'FR2': [850, 870]
        ... }, index=pd.date_range('2024-01-01', periods=2, freq='h'))
        >>>
        >>> # Sum to areas
        >>> area_generation = calc.calculate(generation)
        >>> print(area_generation)
            bidding_zone  DE_LU   FR
            2024-01-01 00:00:00  1550  1750
            2024-01-01 01:00:00  1630  1790
    """

    def calculate(self, node_data_df: pd.DataFrame) -> pd.DataFrame:
        """Calculate area sums from node-level extensive quantity data.

        Sums node-level values within each area to create area-level aggregates.
        This method is designed for extensive quantities where summation is the
        appropriate aggregation method (e.g., generation, demand, volumes).

        Missing nodes are handled gracefully - if a node exists in the node model
        but not in the data, it's simply ignored. Areas with no available nodes
        are omitted from the output.

        In case you want to exclude certain nodes from the aggregation (e.g. because
        they are virtual or synthetic nodes), you can simply remove them from the
        node_data_df before passing it to this method.

        Args:
            node_data_df: DataFrame with node-level time series data. Index should
                be datetime, columns should be node identifiers. Values represent
                extensive quantities (MW, MWh, etc.) that should be summed.

        Returns:
            DataFrame with area-level aggregated data. Index matches input time series,
            columns represent areas. Units are preserved from input data.

        Raises:
            ValueError: If node_data_df structure is invalid

        Example:

            >>> # Sum generation across nodes
            >>> area_generation = calc.calculate(node_generation_df)
            >>>
            >>> # Sum demand across nodes  
            >>> area_demand = calc.calculate(node_demand_df)
        """

        self._validate_node_data(node_data_df, 'node_data_df')

        area_sums = {}
        for area in self.areas:
            area_nodes = self.get_area_nodes(area)
            area_nodes = [n for n in area_nodes if n in node_data_df.columns]
            if area_nodes:
                area_sums[area] = node_data_df[area_nodes].sum(axis=1)

        result = pd.DataFrame(area_sums)
        result.columns.name = self.area_column
        return result

calculate

calculate(node_data_df: DataFrame) -> DataFrame

Calculate area sums from node-level extensive quantity data.

Sums node-level values within each area to create area-level aggregates. This method is designed for extensive quantities where summation is the appropriate aggregation method (e.g., generation, demand, volumes).

Missing nodes are handled gracefully - if a node exists in the node model but not in the data, it's simply ignored. Areas with no available nodes are omitted from the output.

In case you want to exclude certain nodes from the aggregation (e.g. because they are virtual or synthetic nodes), you can simply remove them from the node_data_df before passing it to this method.

Parameters:

Name Type Description Default
node_data_df DataFrame

DataFrame with node-level time series data. Index should be datetime, columns should be node identifiers. Values represent extensive quantities (MW, MWh, etc.) that should be summed.

required

Returns:

Type Description
DataFrame

DataFrame with area-level aggregated data. Index matches input time series,

DataFrame

columns represent areas. Units are preserved from input data.

Raises:

Type Description
ValueError

If node_data_df structure is invalid

Example:

>>> # Sum generation across nodes
>>> area_generation = calc.calculate(node_generation_df)
>>>
>>> # Sum demand across nodes  
>>> area_demand = calc.calculate(node_demand_df)
Source code in submodules/mesqual/mesqual/energy_data_handling/area_accounting/area_variable_sum_calculator.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def calculate(self, node_data_df: pd.DataFrame) -> pd.DataFrame:
    """Calculate area sums from node-level extensive quantity data.

    Sums node-level values within each area to create area-level aggregates.
    This method is designed for extensive quantities where summation is the
    appropriate aggregation method (e.g., generation, demand, volumes).

    Missing nodes are handled gracefully - if a node exists in the node model
    but not in the data, it's simply ignored. Areas with no available nodes
    are omitted from the output.

    In case you want to exclude certain nodes from the aggregation (e.g. because
    they are virtual or synthetic nodes), you can simply remove them from the
    node_data_df before passing it to this method.

    Args:
        node_data_df: DataFrame with node-level time series data. Index should
            be datetime, columns should be node identifiers. Values represent
            extensive quantities (MW, MWh, etc.) that should be summed.

    Returns:
        DataFrame with area-level aggregated data. Index matches input time series,
        columns represent areas. Units are preserved from input data.

    Raises:
        ValueError: If node_data_df structure is invalid

    Example:

        >>> # Sum generation across nodes
        >>> area_generation = calc.calculate(node_generation_df)
        >>>
        >>> # Sum demand across nodes  
        >>> area_demand = calc.calculate(node_demand_df)
    """

    self._validate_node_data(node_data_df, 'node_data_df')

    area_sums = {}
    for area in self.areas:
        area_nodes = self.get_area_nodes(area)
        area_nodes = [n for n in area_nodes if n in node_data_df.columns]
        if area_nodes:
            area_sums[area] = node_data_df[area_nodes].sum(axis=1)

    result = pd.DataFrame(area_sums)
    result.columns.name = self.area_column
    return result

AreaVariableCalculatorBase

Bases: ABC

Abstract base class for calculating energy variables aggregated at area level.

This base class provides common functionality for aggregating node-level energy data (such as generation, demand, prices) to higher-level areas (countries, bidding zones, market areas). It handles the mapping between nodes and areas and provides validation and utility methods for area-based calculations.

The class is designed to be subclassed for specific variable types, with each subclass implementing its own calculation logic while leveraging the common area mapping and validation functionality provided here.

Energy market context: In electricity markets, many variables are naturally defined at the nodal level (generators, loads, prices) but need to be aggregated to market or geographical areas for analysis, reporting, and trading. This aggregation must handle missing data, different node counts per area, and preserve energy-specific semantics.

Parameters:

Name Type Description Default
node_model_df DataFrame

DataFrame containing node information with area assignments. Index should be node identifiers, must contain the specified area_column.

required
area_column str

Name of the column in node_model_df that contains area assignments. Each node should be assigned to exactly one area (NaN values are allowed).

required

Attributes:

Name Type Description
node_model_df

The input node model DataFrame

area_column

Name of the area assignment column

node_to_area_map

Dictionary mapping node IDs to area names

areas

Sorted list of unique area names (excluding NaN)

Raises:

Type Description
ValueError

If area_column is not found in node_model_df

Example:

>>> import pandas as pd
>>> # Node model with area assignments
>>> node_model = pd.DataFrame({
...     'country': ['DE', 'DE', 'FR', 'FR', 'BE'],
...     'voltage': [380, 220, 380, 220, 380]
... }, index=['DE1', 'DE2', 'FR1', 'FR2', 'BE1'])
>>> 
>>> # Subclass implementation
>>> class MyAreaCalculator(AreaVariableCalculatorBase):
...     def calculate(self, **kwargs):
...         return pd.DataFrame()  # Implementation here
>>> 
>>> calculator = MyAreaCalculator(node_model, 'country')
>>> print(calculator.areas)  # ['BE', 'DE', 'FR']
>>> print(calculator.get_area_nodes('DE'))  # ['DE1', 'DE2']
Source code in submodules/mesqual/mesqual/energy_data_handling/area_accounting/area_variable_base.py
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
class AreaVariableCalculatorBase(ABC):
    """Abstract base class for calculating energy variables aggregated at area level.

    This base class provides common functionality for aggregating node-level energy data
    (such as generation, demand, prices) to higher-level areas (countries, bidding zones,
    market areas). It handles the mapping between nodes and areas and provides validation
    and utility methods for area-based calculations.

    The class is designed to be subclassed for specific variable types, with each subclass
    implementing its own calculation logic while leveraging the common area mapping and
    validation functionality provided here.

    Energy market context:
    In electricity markets, many variables are naturally defined at the nodal level 
    (generators, loads, prices) but need to be aggregated to market or geographical
    areas for analysis, reporting, and trading. This aggregation must handle missing
    data, different node counts per area, and preserve energy-specific semantics.

    Args:
        node_model_df: DataFrame containing node information with area assignments.
            Index should be node identifiers, must contain the specified area_column.
        area_column: Name of the column in node_model_df that contains area assignments.
            Each node should be assigned to exactly one area (NaN values are allowed).

    Attributes:
        node_model_df: The input node model DataFrame
        area_column: Name of the area assignment column
        node_to_area_map: Dictionary mapping node IDs to area names
        areas: Sorted list of unique area names (excluding NaN)

    Raises:
        ValueError: If area_column is not found in node_model_df

    Example:

        >>> import pandas as pd
        >>> # Node model with area assignments
        >>> node_model = pd.DataFrame({
        ...     'country': ['DE', 'DE', 'FR', 'FR', 'BE'],
        ...     'voltage': [380, 220, 380, 220, 380]
        ... }, index=['DE1', 'DE2', 'FR1', 'FR2', 'BE1'])
        >>> 
        >>> # Subclass implementation
        >>> class MyAreaCalculator(AreaVariableCalculatorBase):
        ...     def calculate(self, **kwargs):
        ...         return pd.DataFrame()  # Implementation here
        >>> 
        >>> calculator = MyAreaCalculator(node_model, 'country')
        >>> print(calculator.areas)  # ['BE', 'DE', 'FR']
        >>> print(calculator.get_area_nodes('DE'))  # ['DE1', 'DE2']
    """

    def __init__(self, node_model_df: pd.DataFrame, area_column: str):
        """Initialize the area variable calculator.

        Args:
            node_model_df: DataFrame with node-to-area mapping
            area_column: Column name containing area assignments

        Raises:
            ValueError: If area_column not found in node_model_df
        """
        self.node_model_df = node_model_df
        self.area_column = area_column
        self.node_to_area_map = self._create_node_to_area_map()
        self.areas = sorted(self.node_model_df[area_column].dropna().unique())
        self._validate_inputs()

    def _validate_inputs(self):
        """Validate input parameters during initialization.

        Raises:
            ValueError: If area_column is not found in node_model_df
        """
        if self.area_column not in self.node_model_df.columns:
            raise ValueError(f"Column '{self.area_column}' not found in node_model_df")

    def _create_node_to_area_map(self) -> dict[str, str]:
        return self.node_model_df[self.area_column].to_dict()

    def get_area_nodes(self, area: str) -> list[str]:
        """Get all nodes belonging to a specific area.

        Args:
            area: Area name to get nodes for

        Returns:
            List of node IDs that belong to the specified area

        Example:

            >>> calculator = MyAreaCalculator(node_model, 'country')
            >>> german_nodes = calculator.get_area_nodes('DE')
            >>> print(german_nodes)  # ['DE1', 'DE2']
        """
        return self.node_model_df[self.node_model_df[self.area_column] == area].index.tolist()

    @abstractmethod
    def calculate(self, **kwargs) -> pd.DataFrame:
        """Calculate the area variable. Must be implemented by subclasses.

        This method should contain the specific logic for aggregating node-level
        data to area level for the particular variable type. The implementation
        will vary depending on whether the variable is extensive (additive like
        energy volumes) or intensive (averaged like prices).

        Args:
            **kwargs: Variable-specific parameters for the calculation

        Returns:
            DataFrame with area-level aggregated data. Index should be datetime
            for time series data, columns should be area identifiers.

        Raises:
            NotImplementedError: This is an abstract method
        """
        pass

    def _validate_node_data(self, node_df: pd.DataFrame, data_name: str):
        """Validate that required nodes are present in node_model_df.

        Logs warnings for any nodes found in the data that are not in the node model.
        This is important for detecting data inconsistencies or model updates.

        Args:
            node_df: DataFrame containing node-level data to validate
            data_name: Descriptive name of the data being validated (for logging)

        Example:

            >>> # Log warning if generation_data has nodes not in node_model_df
            >>> calculator._validate_node_data(generation_data, "generation")
        """
        missing_nodes = set(node_df.columns) - set(self.node_model_df.index)
        if missing_nodes:
            logger.warning(f"{len(missing_nodes)} nodes missing in node_model_df from {data_name}")

__init__

__init__(node_model_df: DataFrame, area_column: str)

Initialize the area variable calculator.

Parameters:

Name Type Description Default
node_model_df DataFrame

DataFrame with node-to-area mapping

required
area_column str

Column name containing area assignments

required

Raises:

Type Description
ValueError

If area_column not found in node_model_df

Source code in submodules/mesqual/mesqual/energy_data_handling/area_accounting/area_variable_base.py
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def __init__(self, node_model_df: pd.DataFrame, area_column: str):
    """Initialize the area variable calculator.

    Args:
        node_model_df: DataFrame with node-to-area mapping
        area_column: Column name containing area assignments

    Raises:
        ValueError: If area_column not found in node_model_df
    """
    self.node_model_df = node_model_df
    self.area_column = area_column
    self.node_to_area_map = self._create_node_to_area_map()
    self.areas = sorted(self.node_model_df[area_column].dropna().unique())
    self._validate_inputs()

get_area_nodes

get_area_nodes(area: str) -> list[str]

Get all nodes belonging to a specific area.

Parameters:

Name Type Description Default
area str

Area name to get nodes for

required

Returns:

Type Description
list[str]

List of node IDs that belong to the specified area

Example:

>>> calculator = MyAreaCalculator(node_model, 'country')
>>> german_nodes = calculator.get_area_nodes('DE')
>>> print(german_nodes)  # ['DE1', 'DE2']
Source code in submodules/mesqual/mesqual/energy_data_handling/area_accounting/area_variable_base.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
def get_area_nodes(self, area: str) -> list[str]:
    """Get all nodes belonging to a specific area.

    Args:
        area: Area name to get nodes for

    Returns:
        List of node IDs that belong to the specified area

    Example:

        >>> calculator = MyAreaCalculator(node_model, 'country')
        >>> german_nodes = calculator.get_area_nodes('DE')
        >>> print(german_nodes)  # ['DE1', 'DE2']
    """
    return self.node_model_df[self.node_model_df[self.area_column] == area].index.tolist()

calculate abstractmethod

calculate(**kwargs) -> DataFrame

Calculate the area variable. Must be implemented by subclasses.

This method should contain the specific logic for aggregating node-level data to area level for the particular variable type. The implementation will vary depending on whether the variable is extensive (additive like energy volumes) or intensive (averaged like prices).

Parameters:

Name Type Description Default
**kwargs

Variable-specific parameters for the calculation

{}

Returns:

Type Description
DataFrame

DataFrame with area-level aggregated data. Index should be datetime

DataFrame

for time series data, columns should be area identifiers.

Raises:

Type Description
NotImplementedError

This is an abstract method

Source code in submodules/mesqual/mesqual/energy_data_handling/area_accounting/area_variable_base.py
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
@abstractmethod
def calculate(self, **kwargs) -> pd.DataFrame:
    """Calculate the area variable. Must be implemented by subclasses.

    This method should contain the specific logic for aggregating node-level
    data to area level for the particular variable type. The implementation
    will vary depending on whether the variable is extensive (additive like
    energy volumes) or intensive (averaged like prices).

    Args:
        **kwargs: Variable-specific parameters for the calculation

    Returns:
        DataFrame with area-level aggregated data. Index should be datetime
        for time series data, columns should be area identifiers.

    Raises:
        NotImplementedError: This is an abstract method
    """
    pass

ExampleSumCalculator

Bases: AreaVariableCalculatorBase

Example implementation that sums node-level data to area level.

Source code in submodules/mesqual/mesqual/energy_data_handling/area_accounting/area_variable_base.py
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
class ExampleSumCalculator(AreaVariableCalculatorBase):
    """Example implementation that sums node-level data to area level."""

    def calculate(self, node_data: pd.DataFrame) -> pd.DataFrame:
        """Sum node data for each area (extensive variable aggregation)."""
        self._validate_node_data(node_data, "example_data")

        result_dict = {}
        for area in self.areas:
            area_nodes = self.get_area_nodes(area)
            # Filter to nodes that exist in both model and data
            available_nodes = [n for n in area_nodes if n in node_data.columns]
            if available_nodes:
                result_dict[area] = node_data[available_nodes].sum(axis=1)
            else:
                # Create empty series with same index if no data available
                result_dict[area] = pd.Series(index=node_data.index, dtype=float)

        result_df = pd.DataFrame(result_dict)
        result_df.columns.name = self.area_column
        return result_df

calculate

calculate(node_data: DataFrame) -> DataFrame

Sum node data for each area (extensive variable aggregation).

Source code in submodules/mesqual/mesqual/energy_data_handling/area_accounting/area_variable_base.py
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
def calculate(self, node_data: pd.DataFrame) -> pd.DataFrame:
    """Sum node data for each area (extensive variable aggregation)."""
    self._validate_node_data(node_data, "example_data")

    result_dict = {}
    for area in self.areas:
        area_nodes = self.get_area_nodes(area)
        # Filter to nodes that exist in both model and data
        available_nodes = [n for n in area_nodes if n in node_data.columns]
        if available_nodes:
            result_dict[area] = node_data[available_nodes].sum(axis=1)
        else:
            # Create empty series with same index if no data available
            result_dict[area] = pd.Series(index=node_data.index, dtype=float)

    result_df = pd.DataFrame(result_dict)
    result_df.columns.name = self.area_column
    return result_df