Skip to content

Validation

This module provides functions for validating the integrity and physical consistency of generated power flow data.

validate_generated_data

Run all validation tests on the generated data.

Parameters:

Name Type Description Default
file_paths Dict[str, str]

Dictionary containing paths to data files (bus_data, branch_data, gen_data, y_bus_data).

required
mode str

Operating mode ("opf" or "pf").

required
n_partitions int

Number of partitions to sample for validation (0 for all partitions).

0
sn_mva float

Base MVA used to scale power quantities

required

Returns:

Type Description
bool

True if all validations pass.

Raises:

Type Description
AssertionError

If any validation fails.

Source code in gridfm_datakit/validation.py
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
def validate_generated_data(
    file_paths: Dict[str, str],
    mode: str,
    sn_mva: float,
    n_partitions: int = 0,
) -> bool:
    """Run all validation tests on the generated data.

    Args:
        file_paths: Dictionary containing paths to data files (bus_data, branch_data, gen_data, y_bus_data).
        mode: Operating mode ("opf" or "pf").
        n_partitions: Number of partitions to sample for validation (0 for all partitions).
        sn_mva: Base MVA used to scale power quantities

    Returns:
        True if all validations pass.

    Raises:
        AssertionError: If any validation fails.
    """
    # Get total scenarios from metadata
    data_dir = os.path.dirname(file_paths["bus_data"])
    total_scenarios = get_num_scenarios(data_dir)

    # Step 1: Validate partition structure on ALL partitions
    # print("Step 1: Validating partition structure on all partitions...")
    # _validate_partition_structure(file_paths, total_scenarios)

    # Calculate number of partitions (n_scenario_per_partition scenarios per partition)
    num_partitions = (
        total_scenarios + n_scenario_per_partition - 1
    ) // n_scenario_per_partition

    # Sample partitions
    if n_partitions > 0:
        n_partitions_to_sample = n_partitions
        sampled_partitions = sorted(
            np.random.choice(
                num_partitions,
                size=min(n_partitions_to_sample, num_partitions),
                replace=False,
            ),
        )
        max_scenarios_to_validate = len(sampled_partitions) * n_scenario_per_partition
        print(
            f"Step 2: Running core validations on {len(sampled_partitions)} sampled partitions (up to {max_scenarios_to_validate} scenarios) out of {num_partitions} total",
        )
    else:
        sampled_partitions = list(range(num_partitions))
        print(
            f"Step 2: Running core validations on all {num_partitions} partitions ({total_scenarios} total scenarios)",
        )

    bus_data = read_partitions(file_paths["bus_data"], sampled_partitions)
    branch_data = read_partitions(file_paths["branch_data"], sampled_partitions)
    gen_data = read_partitions(file_paths["gen_data"], sampled_partitions)
    y_bus_data = read_partitions(file_paths["y_bus_data"], sampled_partitions)
    runtime_data = (
        read_partitions(file_paths["runtime_data"], sampled_partitions)
        if "runtime_data" in file_paths
        else None
    )
    if runtime_data is None:
        print("No runtime data found, skipping runtime data validation")
    else:
        print(f"Runtime data found: {runtime_data.shape}")

    generated_data = {
        "bus_data": bus_data,
        "branch_data": branch_data,
        "gen_data": gen_data,
        "y_bus_data": y_bus_data,
        "runtime_data": runtime_data,
        "mode": mode,
        "file_paths": file_paths,
    }

    # Run core validations on sampled partitions
    try:
        validate_scenario_indexing_consistency(generated_data)
    except Exception as e:
        raise AssertionError(f"Scenario indexing consistency validation failed: {e}")

    try:
        validate_bus_indexing_consistency(generated_data)
    except Exception as e:
        raise AssertionError(f"Bus indexing consistency validation failed: {e}")

    try:
        validate_data_completeness(generated_data)
    except Exception as e:
        raise AssertionError(f"Data completeness validation failed: {e}")

    try:
        validate_dc_columns_consistency(generated_data)
    except Exception as e:
        raise AssertionError(f"DC columns consistency validation failed: {e}")

    # Check voltage angles are within [-180, 180]
    try:
        validate_voltage_angles_within_bounds(generated_data)
    except Exception as e:
        raise AssertionError(f"Voltage angles validation failed: {e}")
    # Run Y-Bus Consistency Tests
    try:
        validate_ybus_diagonal_consistency(generated_data)
    except Exception as e:
        raise AssertionError(f"Y-bus diagonal consistency validation failed: {e}")

    # Run Branch Constraint Tests
    try:
        validate_deactivated_lines_zero_admittance(generated_data)
    except Exception as e:
        raise AssertionError(
            f"Deactivated lines zero admittance validation failed: {e}",
        )

    try:
        validate_admittance_calculations(generated_data)
    except Exception as e:
        raise AssertionError(
            f"Admittance calculations validation failed: {e}",
        )

    try:
        validate_computed_vs_stored_power_flows(generated_data, sn_mva)
    except Exception as e:
        raise AssertionError(f"Computed vs stored power flows validation failed: {e}")

    try:
        validate_tap_not_zero(generated_data)
    except Exception as e:
        raise AssertionError(f"Tap not zero validation failed: {e}")

    # Run branch loading validation for both OPF and PF modes
    # In OPF mode: asserts loading <= 1.01
    # In PF mode: computes statistics without asserting
    try:
        validate_branch_loading_opf_mode(generated_data)
    except Exception as e:
        if mode == "opf":
            raise AssertionError(f"Branch loading OPF mode validation failed: {e}")
        else:
            print(f"    Branch loading computation encountered errors: {e}")

    # Run Generator Constraint Tests
    try:
        validate_deactivated_generators_zero_output(generated_data)
    except Exception as e:
        raise AssertionError(
            f"Deactivated generators zero output validation failed: {e}",
        )

    try:
        validate_generator_limits(generated_data)
    except Exception as e:
        raise AssertionError(f"Generator limits validation failed: {e}")

    # Run OPF mode Constraints
    if mode == "opf":
        try:
            validate_voltage_magnitude_limits_opf_mode(generated_data)
        except Exception as e:
            raise AssertionError(
                f"Voltage magnitude limits OPF mode validation failed: {e}",
            )
        try:
            validate_branch_angle_difference_opf_mode(generated_data)
        except Exception as e:
            raise AssertionError(
                f"Branch angle difference limits OPF mode validation failed: {e}",
            )

    # Run Power Balance Tests
    try:
        validate_bus_generation_consistency(generated_data)
    except Exception as e:
        raise AssertionError(f"Bus generation consistency validation failed: {e}")

    # DC bus generation consistency (if DC fields present)
    try:
        validate_bus_generation_consistency_dc(generated_data)
    except Exception as e:
        raise AssertionError(f"Bus generation DC consistency validation failed: {e}")

    # Check Pg and Pg_dc match at slack nodes in PF mode
    if mode == "pf":
        try:
            validate_non_slack_pg_consistency(generated_data)
        except Exception as e:
            raise AssertionError(f"Slack Pg consistency validation failed: {e}")

    try:
        validate_power_balance_equations(generated_data, sn_mva)
    except Exception as e:
        raise AssertionError(f"Power balance equations validation failed: {e}")

    # Run Generator Cost Perturbation Tests
    try:
        validate_constant_cost_generators_unchanged(generated_data)
    except Exception as e:
        raise AssertionError(
            f"Constant cost generators unchanged validation failed: {e}",
        )

    # Run Bus Type and Generator Consistency Tests
    try:
        validate_bus_type_generator_consistency(generated_data)
    except Exception as e:
        raise AssertionError(
            f"Bus type-generator consistency validation failed: {e}",
        )

    return True

Functions

validate_ybus_diagonal_consistency

Test Y-bus diagonal consistency with bus and branch data (vectorized).

Source code in gridfm_datakit/validation.py
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
def validate_ybus_diagonal_consistency(generated_data: Dict[str, pd.DataFrame]) -> None:
    """Test Y-bus diagonal consistency with bus and branch data (vectorized)."""
    bus_data = generated_data["bus_data"]
    branch_data = generated_data["branch_data"]
    y_bus_data = generated_data["y_bus_data"]

    scenarios = bus_data["scenario"].unique()
    total_buses = len(bus_data)
    print(
        f"    Y-bus diagonal consistency: validating {total_buses} bus entries across {len(scenarios)} scenarios",
    )

    # Aggregate Yff contributions by (scenario, from_bus)
    yff_sum = (
        branch_data.groupby(["scenario", "from_bus"], as_index=False)
        .agg({"Yff_r": "sum", "Yff_i": "sum"})
        .rename(columns={"from_bus": "bus", "Yff_r": "yff_sum_g", "Yff_i": "yff_sum_b"})
    )

    # Aggregate Ytt contributions by (scenario, to_bus)
    ytt_sum = (
        branch_data.groupby(["scenario", "to_bus"], as_index=False)
        .agg({"Ytt_r": "sum", "Ytt_i": "sum"})
        .rename(columns={"to_bus": "bus", "Ytt_r": "ytt_sum_g", "Ytt_i": "ytt_sum_b"})
    )

    # Prepare bus data with (scenario, bus) as key
    bus_keyed = bus_data[["scenario", "bus", "GS", "BS"]].copy()
    bus_keyed["scenario"] = bus_keyed["scenario"].astype(int)
    bus_keyed["bus"] = bus_keyed["bus"].astype(int)

    # Merge all contributions
    expected = (
        bus_keyed.merge(yff_sum, on=["scenario", "bus"], how="left")
        .merge(ytt_sum, on=["scenario", "bus"], how="left")
        .fillna(
            {"yff_sum_g": 0.0, "yff_sum_b": 0.0, "ytt_sum_g": 0.0, "ytt_sum_b": 0.0},
        )
    )

    # Compute expected G and B
    expected["expected_g"] = (
        expected["GS"] + expected["yff_sum_g"] + expected["ytt_sum_g"]
    )
    expected["expected_b"] = (
        expected["BS"] + expected["yff_sum_b"] + expected["ytt_sum_b"]
    )

    # Get actual G and B from y_bus_data (diagonal entries only)
    ybus_diagonal = y_bus_data[(y_bus_data["index1"] == y_bus_data["index2"])][
        ["scenario", "index1", "G", "B"]
    ].rename(columns={"index1": "bus"})
    ybus_diagonal["scenario"] = ybus_diagonal["scenario"].astype(int)
    ybus_diagonal["bus"] = ybus_diagonal["bus"].astype(int)

    # Merge expected with actual
    comparison = expected.merge(
        ybus_diagonal,
        on=["scenario", "bus"],
        how="left",
        suffixes=("", "_actual"),
    )

    # Vectorized comparison
    g_diff = np.abs(comparison["expected_g"] - comparison["G"])
    b_diff = np.abs(comparison["expected_b"] - comparison["B"])

    tolerance = 1e-6
    g_mismatches = comparison[g_diff >= tolerance]
    b_mismatches = comparison[b_diff >= tolerance]

    if len(g_mismatches) > 0:
        raise AssertionError(f"G mismatches: {g_mismatches}")
    if len(b_mismatches) > 0:
        raise AssertionError(f"B mismatches: {b_mismatches}")

validate_deactivated_lines_zero_admittance

Test that deactivated lines have zero power flows and admittances.

Source code in gridfm_datakit/validation.py
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
def validate_deactivated_lines_zero_admittance(
    generated_data: Dict[str, pd.DataFrame],
) -> None:
    """Test that deactivated lines have zero power flows and admittances."""
    branch_data = generated_data["branch_data"]
    deactivated_branches = branch_data[branch_data["br_status"] == 0]

    print(
        f"    Deactivated lines zero admittance: validating {len(deactivated_branches)} deactivated branches",
    )
    if not deactivated_branches.empty:
        assert (deactivated_branches["pf"] == 0).all(), (
            "Deactivated branches should have zero pf"
        )
        assert (deactivated_branches["qf"] == 0).all(), (
            "Deactivated branches should have zero qf"
        )
        assert (deactivated_branches["pt"] == 0).all(), (
            "Deactivated branches should have zero pt"
        )
        assert (deactivated_branches["qt"] == 0).all(), (
            "Deactivated branches should have zero qt"
        )
        assert (deactivated_branches["Yff_r"] == 0).all(), (
            "Deactivated branches should have zero Yff_r"
        )
        assert (deactivated_branches["Yff_i"] == 0).all(), (
            "Deactivated branches should have zero Yff_i"
        )
        assert (deactivated_branches["Yft_r"] == 0).all(), (
            "Deactivated branches should have zero Yft_r"
        )
        assert (deactivated_branches["Yft_i"] == 0).all(), (
            "Deactivated branches should have zero Yft_i"
        )
        assert (deactivated_branches["Ytf_r"] == 0).all(), (
            "Deactivated branches should have zero Ytf_r"
        )
        assert (deactivated_branches["Ytf_i"] == 0).all(), (
            "Deactivated branches should have zero Ytf_i"
        )
        assert (deactivated_branches["Ytt_r"] == 0).all(), (
            "Deactivated branches should have zero Ytt_r"
        )
        assert (deactivated_branches["Ytt_i"] == 0).all(), (
            "Deactivated branches should have zero Ytt_i"
        )

    print("    Deactivated lines zero admittance: OK")

validate_computed_vs_stored_power_flows

Test that computed power flows match stored power flows.

Source code in gridfm_datakit/validation.py
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
def validate_computed_vs_stored_power_flows(
    generated_data: Dict[str, pd.DataFrame],
    sn_mva: float,
) -> None:
    """Test that computed power flows match stored power flows."""

    print(
        f"    Validate computed vs stored power flows: validating {len(generated_data['branch_data'])} branches across {len(generated_data['branch_data']['scenario'].unique())} scenarios",
    )

    pf, qf, pt, qt = compute_branch_powers_vectorized(
        generated_data["branch_data"],
        generated_data["bus_data"],
        dc=False,
        sn_mva=sn_mva,
    )
    computed_flows = pd.DataFrame(
        {
            "pf": pf,
            "qf": qf,
            "pt": pt,
            "qt": qt,
            "scenario": generated_data["branch_data"]["scenario"],
            "from_bus": generated_data["branch_data"]["from_bus"],
            "to_bus": generated_data["branch_data"]["to_bus"],
        },
        index=generated_data["branch_data"].index,
    )

    flows_data = generated_data["branch_data"][
        ["pf", "qf", "pt", "qt", "scenario", "from_bus", "to_bus"]
    ]
    mismatch = ~np.isclose(computed_flows, flows_data, atol=1e-2, rtol=1e-3)
    # TODO investigate why atol has to be so large, especially for pf delta
    if mismatch.any():
        raise AssertionError(
            f"Computed power flows do not match stored power flows, stored: \n{flows_data[mismatch]}, computed: \n{computed_flows[mismatch]}",
        )

    print("    Computed vs stored power flows: OK")

validate_branch_loading_opf_mode

Test branch loading limits in OPF mode, compute loading statistics in PF mode.

Source code in gridfm_datakit/validation.py
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
def validate_branch_loading_opf_mode(generated_data: Dict[str, pd.DataFrame]) -> None:
    """Test branch loading limits in OPF mode, compute loading statistics in PF mode."""
    bus_data = generated_data["bus_data"]
    branch_data = generated_data["branch_data"]

    scenarios = bus_data["scenario"].unique()

    # Filter to active, rated branches
    rated_branches = branch_data[
        (branch_data["br_status"] == 1) & (branch_data["rate_a"] > 0)
    ].copy()

    mode_label = "opf" if generated_data["mode"] == "opf" else "pf"
    print(
        f"    Branch loading limits ({mode_label} mode): validating {len(rated_branches)} rated branches across {len(scenarios)} scenarios",
    )

    # Vectorized computation of loading
    # Compute apparent power: S = sqrt(P^2 + Q^2)
    s_from = np.sqrt(
        rated_branches["pf"].to_numpy() ** 2 + rated_branches["qf"].to_numpy() ** 2,
    )
    s_to = np.sqrt(
        rated_branches["pt"].to_numpy() ** 2 + rated_branches["qt"].to_numpy() ** 2,
    )
    rate_a = rated_branches["rate_a"].to_numpy()

    # Loading = max(S_from, S_to) / rate_a
    loading = np.maximum(s_from, s_to) / rate_a

    # Identify binding and overloaded branches
    binding_mask = loading >= 0.99
    overload_mask = loading > 1.01

    binding_loadings = loading[binding_mask]
    n_binding = len(binding_loadings)
    n_overloads = overload_mask.sum()

    # In OPF mode, assert no overloads
    if generated_data["mode"] == "opf":
        if n_overloads > 0:
            overloaded_idx = np.where(overload_mask)[0]
            overload_info = rated_branches.iloc[overloaded_idx[0]]
            raise AssertionError(
                f"Scenario {int(overload_info['scenario'])}, "
                f"Branch {int(overload_info['from_bus'])}->{int(overload_info['to_bus'])}: "
                f"Loading {loading[overloaded_idx[0]]:.3f} exceeds 1.01 in OPF mode",
            )

    print(
        f"    Binding loading constraints (>= 0.99): {n_binding} branches",
    )
    if generated_data["mode"] == "pf":
        print(f"    Overloaded branches (> 1.0): {n_overloads} branches")
        print("    Branch loading limits (PF mode): statistics computed")
    else:
        print("    Branch loading limits (OPF mode): OK")

validate_deactivated_generators_zero_output

Test that deactivated generators have zero output.

Source code in gridfm_datakit/validation.py
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
def validate_deactivated_generators_zero_output(
    generated_data: Dict[str, pd.DataFrame],
) -> None:
    """Test that deactivated generators have zero output."""
    gen_data = generated_data["gen_data"]
    deactivated_gens = gen_data[gen_data["in_service"] == 0]

    print(
        f"    Deactivated generators zero output: validating {len(deactivated_gens)} deactivated generators",
    )
    if not deactivated_gens.empty:
        assert (deactivated_gens["p_mw"] == 0).all(), (
            "Deactivated generators should have zero p_mw"
        )
        assert (deactivated_gens["q_mvar"] == 0).all(), (
            "Deactivated generators should have zero q_mvar"
        )

        if "p_mw_dc" in deactivated_gens.columns:
            # zero or nan (if no solution was found)
            assert (
                (deactivated_gens["p_mw_dc"] == 0)
                | (deactivated_gens["p_mw_dc"].isna())
            ).all(), "Deactivated generators should have zero p_mw_dc or be NaN"

    print("    Deactivated generators zero output: OK")

validate_generator_limits

Test that generator outputs respect their limits.

Source code in gridfm_datakit/validation.py
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
def validate_generator_limits(generated_data: Dict[str, pd.DataFrame]) -> None:
    """Test that generator outputs respect their limits."""
    gen_data = generated_data["gen_data"]
    gen_data = gen_data[gen_data["in_service"] == 1]
    # keep only the ones with limits for p_mw
    filtered_gens = gen_data[
        gen_data["max_p_mw"].notna() & gen_data["min_p_mw"].notna()
    ]

    if generated_data["mode"] == "pf":
        filtered_gens = filtered_gens[filtered_gens["is_slack_gen"] == 0]

    print(
        f"    Generator limits: validating {len(filtered_gens)} active generators (mode: {generated_data['mode']})",
    )

    # Count binding P limits
    binding_p_min = 0
    binding_p_max = 0
    if not filtered_gens.empty:
        p_within_limits = (
            filtered_gens["p_mw"] >= filtered_gens["min_p_mw"] - 1e-2
        ) & (filtered_gens["p_mw"] <= filtered_gens["max_p_mw"] + 1e-2)

        # Check for binding minimum limits
        p_at_min = (filtered_gens["p_mw"] <= filtered_gens["min_p_mw"] + 1e-2) & (
            filtered_gens["p_mw"] >= filtered_gens["min_p_mw"] - 1e-2
        )
        binding_p_min = p_at_min.sum()

        # Check for binding maximum limits
        p_at_max = (filtered_gens["p_mw"] <= filtered_gens["max_p_mw"] + 1e-2) & (
            filtered_gens["p_mw"] >= filtered_gens["max_p_mw"] - 1e-2
        )
        binding_p_max = p_at_max.sum()

        assert p_within_limits.all(), (
            f"Generator active power should be within limits, current: \n{filtered_gens.loc[~p_within_limits, ['bus', 'p_mw']]}, \nmax: \n{filtered_gens.loc[~p_within_limits, ['bus', 'max_p_mw']]}"
        )

    # Count binding Q limits (only in OPF mode)
    binding_q_min = 0
    binding_q_max = 0
    if generated_data["mode"] == "opf":
        filtered_gens_q = filtered_gens[
            filtered_gens["max_q_mvar"].notna() & filtered_gens["min_q_mvar"].notna()
        ]
        q_within_limits = (
            filtered_gens_q["q_mvar"] >= filtered_gens_q["min_q_mvar"] - 1e-2
        ) & (filtered_gens_q["q_mvar"] <= filtered_gens_q["max_q_mvar"] + 1e-2)

        # Check for binding minimum limits
        q_at_min = (
            filtered_gens_q["q_mvar"] <= filtered_gens_q["min_q_mvar"] + 1e-2
        ) & (filtered_gens_q["q_mvar"] >= filtered_gens_q["min_q_mvar"] - 1e-2)
        binding_q_min = q_at_min.sum()

        # Check for binding maximum limits
        q_at_max = (
            filtered_gens_q["q_mvar"] <= filtered_gens_q["max_q_mvar"] + 1e-2
        ) & (filtered_gens_q["q_mvar"] >= filtered_gens_q["max_q_mvar"] - 1e-2)
        binding_q_max = q_at_max.sum()

        assert q_within_limits.all(), (
            f"Generator reactive power should be within limits, expected: {filtered_gens_q.loc[~q_within_limits, ['bus', 'q_mvar']]}, actual: {filtered_gens_q.loc[~q_within_limits, ['bus', 'q_mvar']]}, max: {filtered_gens_q.loc[~q_within_limits, ['bus', 'max_q_mvar']]}"
        )

    print(
        f"    Binding P limits: {binding_p_min} at minimum, {binding_p_max} at maximum",
    )
    if generated_data["mode"] == "opf":
        print(
            f"    Binding Q limits: {binding_q_min} at minimum, {binding_q_max} at maximum",
        )
    print("    Generator limits: OK")

validate_voltage_magnitude_limits_opf_mode

Test voltage magnitude limits in OPF mode.

Source code in gridfm_datakit/validation.py
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
def validate_voltage_magnitude_limits_opf_mode(
    generated_data: Dict[str, pd.DataFrame],
) -> None:
    """Test voltage magnitude limits in OPF mode."""
    if generated_data["mode"] != "opf":
        print("    Voltage magnitude limits: skipped (not in OPF mode)")
        return

    bus_data = generated_data["bus_data"]
    print(
        f"    Voltage magnitude limits (OPF mode): validating {len(bus_data)} bus voltage entries",
    )
    vm_within_limits = (bus_data["Vm"] >= bus_data["min_vm_pu"] - 1e-6) & (
        bus_data["Vm"] <= bus_data["max_vm_pu"] + 1e-6
    )
    assert vm_within_limits.all(), "Voltage magnitudes should be within limits"
    print("    Voltage magnitude limits (OPF mode): OK")

validate_bus_generation_consistency

Test that Pg in bus data equals sum of generators at each bus (vectorized).

Source code in gridfm_datakit/validation.py
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
def validate_bus_generation_consistency(
    generated_data: Dict[str, pd.DataFrame],
) -> None:
    """Test that Pg in bus data equals sum of generators at each bus (vectorized)."""
    bus_data = generated_data["bus_data"]
    gen_data = generated_data["gen_data"]

    scenarios = bus_data["scenario"].unique()
    print(
        f"    Bus generation consistency: validating {len(bus_data)} bus entries across {len(scenarios)} scenarios",
    )

    # Aggregate generator outputs by (scenario, bus)
    gen_sum = (
        gen_data.groupby(["scenario", "bus"], as_index=False)
        .agg({"p_mw": "sum", "q_mvar": "sum"})
        .rename(columns={"p_mw": "pg_gen_sum", "q_mvar": "qg_gen_sum"})
    )

    # Prepare bus data with (scenario, bus) as key
    bus_keyed = bus_data[["scenario", "bus", "Pg", "Qg"]].copy()
    bus_keyed["scenario"] = bus_keyed["scenario"].astype(int)
    bus_keyed["bus"] = bus_keyed["bus"].astype(int)

    # Merge bus data with generator sums
    comparison = bus_keyed.merge(gen_sum, on=["scenario", "bus"], how="left").fillna(
        {"pg_gen_sum": 0.0, "qg_gen_sum": 0.0},
    )

    # Vectorized comparison
    tolerance = 1e-6
    pg_diff = np.abs(comparison["Pg"] - comparison["pg_gen_sum"])
    qg_diff = np.abs(comparison["Qg"] - comparison["qg_gen_sum"])

    pg_mismatches = comparison[pg_diff >= tolerance]
    qg_mismatches = comparison[qg_diff >= tolerance]

    if len(pg_mismatches) > 0:
        raise AssertionError(f"Pg mismatches: {pg_mismatches}")
    if len(qg_mismatches) > 0:
        raise AssertionError(f"Qg mismatches: {qg_mismatches}")

    print("    Bus generation consistency: OK")

validate_power_balance_equations

Test power balance equations (Kirchhoff's Current Law).

Source code in gridfm_datakit/validation.py
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
def validate_power_balance_equations(
    generated_data: Dict[str, pd.DataFrame],
    sn_mva: float,
) -> None:
    """Test power balance equations (Kirchhoff's Current Law)."""
    bus_data = generated_data["bus_data"]
    branch_data = generated_data["branch_data"]

    scenarios = bus_data["scenario"].unique()
    print(
        f"    Power balance equations (Kirchhoff's Law): validating {len(bus_data)} bus entries across {len(scenarios)} scenarios",
    )

    power_balance_ac = compute_bus_balance(
        bus_data,
        branch_data,
        branch_data[["pf", "qf", "pt", "qt"]],
        False,
        sn_mva=sn_mva,
    )
    not_close_zero = ~np.isclose(0.0, power_balance_ac["P_mis_ac"], atol=1e-3)
    # TODO investigate why atol has to be so large
    if not_close_zero.any():
        raise AssertionError(
            f"Power balance equations (Kirchhoff's Law) do not hold, mismatches: {power_balance_ac[not_close_zero]}",
        )

    print("    Power balance equations (Kirchhoff's Law): OK")

validate_constant_cost_generators_unchanged

Test that generators with constant-only costs remain unchanged across scenarios (vectorized).

Generators with constant costs (only c0 != 0, with c1 == 0 and c2 == 0) should not be perturbed or permuted, so their cost coefficients should remain identical across all scenarios. This validation checks that constraint.

Parameters:

Name Type Description Default
generated_data Dict[str, DataFrame]

Dictionary containing gen_data DataFrame.

required

Raises:

Type Description
AssertionError

If any constant-cost generator has varying costs across scenarios.

Source code in gridfm_datakit/validation.py
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
def validate_constant_cost_generators_unchanged(
    generated_data: Dict[str, pd.DataFrame],
) -> None:
    """Test that generators with constant-only costs remain unchanged across scenarios (vectorized).

    Generators with constant costs (only c0 != 0, with c1 == 0 and c2 == 0) should not
    be perturbed or permuted, so their cost coefficients should remain identical across
    all scenarios. This validation checks that constraint.

    Args:
        generated_data: Dictionary containing gen_data DataFrame.

    Raises:
        AssertionError: If any constant-cost generator has varying costs across scenarios.
    """
    gen_data = generated_data["gen_data"]

    if len(gen_data) == 0:
        print("    Constant cost generators unchanged: no generators to validate")
        return

    scenarios = gen_data["scenario"].unique()

    # Identify constant-cost generators (c1 == 0 and c2 == 0)
    # We check the first scenario to identify which generators have constant costs
    first_scenario_data = gen_data[gen_data["scenario"] == scenarios[0]].copy()

    # Check if generators have non-zero c1 or c2 (columns cp1_eur_per_mw, cp2_eur_per_mw2)
    # Constant-cost generators have both c1 and c2 equal to zero
    constant_cost_mask = (first_scenario_data["cp1_eur_per_mw"] == 0) & (
        first_scenario_data["cp2_eur_per_mw2"] == 0
    )

    # Use "idx" to uniquely identify generators (bus alone is not unique - multiple gens can be at same bus)
    constant_cost_gen_idx = first_scenario_data[constant_cost_mask]["idx"].values

    if len(constant_cost_gen_idx) == 0:
        print(
            "    Constant cost generators unchanged: no constant-cost generators found",
        )
        return

    print(
        f"    Constant cost generators unchanged: validating {len(constant_cost_gen_idx)} constant-cost generators across {len(scenarios)} scenarios",
    )

    # Filter to constant-cost generators only
    constant_gen_data = gen_data[gen_data["idx"].isin(constant_cost_gen_idx)][
        ["scenario", "idx", "bus", "cp0_eur", "cp1_eur_per_mw", "cp2_eur_per_mw2"]
    ].copy()

    # Get reference costs from first scenario (for each generator idx)
    reference_costs = constant_gen_data[
        constant_gen_data["scenario"] == scenarios[0]
    ].set_index("idx")[["cp0_eur", "cp1_eur_per_mw", "cp2_eur_per_mw2"]]

    # Merge reference costs with all scenarios for vectorized comparison
    comparison = constant_gen_data.merge(
        reference_costs,
        left_on="idx",
        right_index=True,
        suffixes=("", "_ref"),
    )

    # Vectorized comparison across all generators and scenarios
    tolerance = 1e-9
    c0_diff = np.abs(comparison["cp0_eur"] - comparison["cp0_eur_ref"])
    c1_diff = np.abs(comparison["cp1_eur_per_mw"] - comparison["cp1_eur_per_mw_ref"])
    c2_diff = np.abs(comparison["cp2_eur_per_mw2"] - comparison["cp2_eur_per_mw2_ref"])

    # Find any mismatches
    mismatches = (
        (c0_diff >= tolerance) | (c1_diff >= tolerance) | (c2_diff >= tolerance)
    )

    if mismatches.any():
        # Get first mismatch for error reporting
        mismatch_idx = np.where(mismatches)[0][0]
        mismatch_row = comparison.iloc[mismatch_idx]
        raise AssertionError(
            f"Generator idx={int(mismatch_row['idx'])} at bus {int(mismatch_row['bus'])} (constant-cost) has varying costs across scenarios. "
            f"Scenario {int(mismatch_row['scenario'])}: "
            f"c0={mismatch_row['cp0_eur']:.6f}, c1={mismatch_row['cp1_eur_per_mw']:.6f}, c2={mismatch_row['cp2_eur_per_mw2']:.6f} "
            f"vs reference: c0={mismatch_row['cp0_eur_ref']:.6f}, c1={mismatch_row['cp1_eur_per_mw_ref']:.6f}, c2={mismatch_row['cp2_eur_per_mw2_ref']:.6f}",
        )

    print("    Constant cost generators unchanged: OK")

validate_bus_type_generator_consistency

Test that bus types are consistent with generator presence (vectorized).

Validates fundamental power system constraints: - PV buses (voltage-controlled) must have at least one active generator - PQ buses (load buses) must have NO active generators - REF buses (slack/reference) must have at least one active generator

Parameters:

Name Type Description Default
generated_data Dict[str, DataFrame]

Dictionary containing bus_data and gen_data DataFrames.

required

Raises:

Type Description
AssertionError

If any bus type constraint is violated.

Source code in gridfm_datakit/validation.py
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
def validate_bus_type_generator_consistency(
    generated_data: Dict[str, pd.DataFrame],
) -> None:
    """Test that bus types are consistent with generator presence (vectorized).

    Validates fundamental power system constraints:
    - PV buses (voltage-controlled) must have at least one active generator
    - PQ buses (load buses) must have NO active generators
    - REF buses (slack/reference) must have at least one active generator

    Args:
        generated_data: Dictionary containing bus_data and gen_data DataFrames.

    Raises:
        AssertionError: If any bus type constraint is violated.
    """
    bus_data = generated_data["bus_data"]
    gen_data = generated_data["gen_data"]

    scenarios = bus_data["scenario"].unique()
    print(
        f"    Bus type-generator consistency: validating {len(bus_data)} bus entries across {len(scenarios)} scenarios",
    )

    # Count active generators per (scenario, bus)
    active_gens = (
        gen_data[gen_data["in_service"] == 1]
        .groupby(
            ["scenario", "bus"],
            as_index=False,
        )
        .size()
    )
    active_gens.columns = ["scenario", "bus", "n_active_gens"]

    # Merge with bus data
    bus_with_gen_counts = bus_data.merge(
        active_gens,
        on=["scenario", "bus"],
        how="left",
    ).fillna({"n_active_gens": 0})

    bus_with_gen_counts["n_active_gens"] = bus_with_gen_counts["n_active_gens"].astype(
        int,
    )

    # Validate PV buses have at least one active generator
    pv_buses = bus_with_gen_counts[bus_with_gen_counts["PV"] == 1]
    pv_no_gen = pv_buses[pv_buses["n_active_gens"] == 0]

    if len(pv_no_gen) > 0:
        first_violation = pv_no_gen.iloc[0]
        raise AssertionError(
            f"PV bus {int(first_violation['bus'])} in scenario {int(first_violation['scenario'])} "
            f"has no active generators. PV buses must have at least one active generator to control voltage. "
            f"Found {len(pv_no_gen)} total violations.",
        )

    # Validate PQ buses have NO active generators
    pq_buses = bus_with_gen_counts[bus_with_gen_counts["PQ"] == 1]
    pq_with_gen = pq_buses[pq_buses["n_active_gens"] > 0]

    if len(pq_with_gen) > 0:
        first_violation = pq_with_gen.iloc[0]
        raise AssertionError(
            f"PQ bus {int(first_violation['bus'])} in scenario {int(first_violation['scenario'])} "
            f"has {int(first_violation['n_active_gens'])} active generator(s). PQ buses (load buses) "
            f"must have no active generators. Found {len(pq_with_gen)} total violations.",
        )

    # Validate REF (slack) buses have at least one active generator
    ref_buses = bus_with_gen_counts[bus_with_gen_counts["REF"] == 1]
    ref_no_gen = ref_buses[ref_buses["n_active_gens"] == 0]

    if len(ref_no_gen) > 0:
        first_violation = ref_no_gen.iloc[0]
        raise AssertionError(
            f"REF/Slack bus {int(first_violation['bus'])} in scenario {int(first_violation['scenario'])} "
            f"has no active generators. REF buses must have at least one active generator to balance the system. "
            f"Found {len(ref_no_gen)} total violations.",
        )

    print(
        f"    Bus type-generator consistency: validated {len(pv_buses)} PV, {len(pq_buses)} PQ, {len(ref_buses)} REF bus entries",
    )
    print("    Bus type-generator consistency: OK")

validate_scenario_indexing_consistency

Test that scenario indices are consistent across all data files.

Source code in gridfm_datakit/validation.py
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
def validate_scenario_indexing_consistency(
    generated_data: Dict[str, pd.DataFrame],
) -> None:
    """Test that scenario indices are consistent across all data files."""
    bus_data = generated_data["bus_data"]
    branch_data = generated_data["branch_data"]
    gen_data = generated_data["gen_data"]
    y_bus_data = generated_data["y_bus_data"]

    bus_scenarios = set(bus_data["scenario"].unique())
    branch_scenarios = set(branch_data["scenario"].unique())
    gen_scenarios = set(gen_data["scenario"].unique())
    ybus_scenarios = set(y_bus_data["scenario"].unique())

    print(
        f"    Scenario indexing consistency: validating {len(bus_scenarios)} scenarios across 4 data files",
    )

    assert bus_scenarios == branch_scenarios == gen_scenarios == ybus_scenarios, (
        "All data files should contain the same set of scenario indices"
    )

    print("    Scenario indexing consistency: OK")

validate_bus_indexing_consistency

Test that bus indices are consistent across data files.

Source code in gridfm_datakit/validation.py
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
def validate_bus_indexing_consistency(generated_data: Dict[str, pd.DataFrame]) -> None:
    """Test that bus indices are consistent across data files."""
    bus_data = generated_data["bus_data"]
    branch_data = generated_data["branch_data"]
    gen_data = generated_data["gen_data"]

    bus_indices = set(bus_data["bus"].unique())
    branch_bus_indices = set(branch_data["from_bus"].unique()) | set(
        branch_data["to_bus"].unique(),
    )
    gen_bus_indices = set(gen_data["bus"].unique())

    print(
        f"    Bus indexing consistency: validating {len(bus_indices)} buses across 3 data files",
    )

    assert gen_bus_indices.issubset(bus_indices), (
        "All generator buses should exist in bus data"
    )
    assert branch_bus_indices.issubset(bus_indices), (
        "All branch endpoint buses should exist in bus data"
    )

    print("    Bus indexing consistency: OK")

validate_data_completeness

Test that all required columns are present and contain no NaN values.

Source code in gridfm_datakit/validation.py
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
def validate_data_completeness(generated_data: Dict[str, pd.DataFrame]) -> None:
    """Test that all required columns are present and contain no NaN values."""
    bus_data = generated_data["bus_data"]
    branch_data = generated_data["branch_data"]
    gen_data = generated_data["gen_data"]
    y_bus_data = generated_data["y_bus_data"]
    runtime_data = generated_data["runtime_data"]

    total_entries = len(bus_data) + len(branch_data) + len(gen_data) + len(y_bus_data)
    print(
        f"    Data completeness: validating {total_entries} total entries across 4 data files",
    )

    # 1) Ensure 'scenario' column exists everywhere
    for name, df in [
        ("Bus data", bus_data),
        ("Branch data", branch_data),
        ("Generator data", gen_data),
        ("Y-bus data", y_bus_data),
    ]:
        assert "scenario" in df.columns, f"{name} should have scenario column"

    if runtime_data is not None:
        assert "scenario" in runtime_data.columns, (
            "Runtime data should have scenario column"
        )

    dc = True if "Va_dc" in bus_data.columns else False

    # 2) Check required columns exist and contain no NaN values
    _require_columns(
        bus_data,
        "Bus data",
        BUS_COLUMNS + DC_BUS_COLUMNS if dc else BUS_COLUMNS,
    )
    _require_columns(
        branch_data,
        "Branch data",
        BRANCH_COLUMNS + DC_BRANCH_COLUMNS if dc else BRANCH_COLUMNS,
    )
    _require_columns(
        gen_data,
        "Generator data",
        GEN_COLUMNS + DC_GEN_COLUMNS if dc else GEN_COLUMNS,
    )
    _require_columns(y_bus_data, "Y-bus data", YBUS_COLUMNS)

    _check_no_nan(bus_data, "Bus data", BUS_COLUMNS)
    _check_no_nan(branch_data, "Branch data", BRANCH_COLUMNS)
    _check_no_nan(gen_data, "Generator data", GEN_COLUMNS)
    _check_no_nan(y_bus_data, "Y-bus data", YBUS_COLUMNS)
    if runtime_data is not None:
        _check_no_nan(runtime_data, "Runtime data", RUNTIME_COLUMNS)

    # 3) Non-emptiness
    assert len(bus_data) > 0, "Bus data should not be empty"
    assert len(branch_data) > 0, "Branch data should not be empty"
    assert len(gen_data) > 0, "Generator data should not be empty"
    assert len(y_bus_data) > 0, "Y-bus data should not be empty"

    print("    Data completeness: OK (all required columns present and NaN-free)")