#################################################################################
# WaterTAP Copyright (c) 2020-2025, The Regents of the University of California,
# through Lawrence Berkeley National Laboratory, Oak Ridge National Laboratory,
# National Renewable Energy Laboratory, and National Energy Technology
# Laboratory (subject to receipt of any required approvals from the U.S. Dept.
# of Energy). All rights reserved.
#
# Please see the files COPYRIGHT.md and LICENSE.md for full copyright and license
# information, respectively. These files are also available online at the URL
# "https://github.com/watertap-org/watertap/"
#################################################################################
import os
import pandas as pd
# Import IDAES libraries
from idaes.core.surrogate.plotting.sm_plotter import (
surrogate_parity,
)
from idaes.core.surrogate.pysmo_surrogate import (
PysmoPolyTrainer,
PysmoRBFTrainer,
PysmoKrigingTrainer,
PysmoSurrogate,
)
from idaes.core.surrogate.alamopy import AlamoTrainer, AlamoSurrogate
__author__ = "Marcus Holly"
local_path = os.path.dirname(os.path.abspath(__file__))
[docs]def get_data(
input_data_file=os.path.join(local_path, "input_data.csv"),
output_data_file=os.path.join(local_path, "output_data.csv"),
):
"""
Load input and output data from CSV files and combine them into a single feed DataFrame.
Args:
input_data_file: Path to the input variables CSV file.
output_data_file: Path to the output variables CSV file.
Returns:
tuple:
- ``feed_data`` : Combined input and output DataFrame used for training.
- ``input_data`` : Input variables only.
- ``output_data`` : Output variables only (first column dropped).
"""
input_data = pd.read_csv(input_data_file, header=0)
output_data = pd.read_csv(output_data_file, header=0).iloc[:, 1:]
feed_data = pd.concat([input_data, output_data], axis=1)
return feed_data, input_data, output_data
[docs]def outputs_selections(output_data):
"""
Filter and reorder output data columns to the standard ADM1 component set.
The selected outputs are (in order):
``S_su``, ``S_aa``, ``S_fa``, ``S_va``, ``S_bu``, ``S_pro``, ``S_ac``,
``S_h2``, ``S_ch4``, ``S_IC``, ``S_IN``, ``S_I``, ``X_c``, ``X_ch``,
``X_pr``, ``X_li``, ``X_su``, ``X_aa``, ``X_fa``, ``X_c4``, ``X_pro``,
``X_ac``, ``X_h2``, ``X_I``, ``VolumetricFlowrate``.
Args:
output_data (pd.DataFrame): Raw output DataFrame as loaded from CSV.
Returns:
pd.DataFrame: Output DataFrame restricted to the standard ADM1
component columns in the order listed above
"""
outputs_list = [
"S_su",
"S_aa",
"S_fa",
"S_va",
"S_bu",
"S_pro",
"S_ac",
"S_h2",
"S_ch4",
"S_IC",
"S_IN",
"S_I",
"X_c",
"X_ch",
"X_pr",
"X_li",
"X_su",
"X_aa",
"X_fa",
"X_c4",
"X_pro",
"X_ac",
"X_h2",
"X_I",
"VolumetricFlowrate",
]
output_data.columns = output_data.columns.str.strip().str.replace(" ", "")
print(output_data.columns.tolist())
output_data = output_data[outputs_list]
return output_data
[docs]def gen_surrogate_model(
tool="idaes", method="poly", feed_data=None, input_data=None, output_data=None
):
"""
Train an IDAES surrogate model and save it to disk alongside a parity plot.
Method-specific configuration:
- ``"poly"`` : PySMO polynomial up to order 6, multinomials enabled,
80/20 train/test split, 3-fold cross-validation.
- ``"kri"`` : PySMO Kriging with numerical gradients and regularization.
- ``"rbf"`` : PySMO RBF with cubic basis function.
- ``"alamo"`` : ALAMO with constant, linear, exponential, log, sin, cos
basis functions and monomial/multi-linear powers up to 3.
Args:
tool (str, optional): Surrogate toolbox identifier
method (str, optional): Surrogate modelling method
feed_data: Combined input and output
DataFrame used for training
input_data (pd.DataFrame, optional): Input variables DataFrame. Used
to derive input labels and bounds
output_data (pd.DataFrame, optional): Output variables DataFrame.
Returns:
None. Writes the following files to the module directory:
- ``<method>_surrogate.json`` : serialised surrogate model.
- ``<method>_parity.pdf`` : parity plot for all output variables.
"""
if method not in ("poly", "kri", "rbf", "alamo"):
raise ValueError(
f"Unsupported method: {method}. Choose from 'poly', 'kri', 'rbf', or 'alamo'."
)
if feed_data is None:
feed_data = pd.concat([input_data, output_data], axis=1)
input_labels = list(input_data.columns)
output_labels = list(output_data.columns)
xmin, xmax = input_data.min().tolist(), input_data.max().tolist()
input_bounds = {
input_labels[i]: (xmin[i], xmax[i]) for i in range(len(input_labels))
}
if method == "poly":
# Create PySMO trainer object
trainer = PysmoPolyTrainer(
input_labels=input_labels,
output_labels=output_labels,
training_dataframe=feed_data,
)
# Set PySMO options
trainer.config.maximum_polynomial_order = 6
trainer.config.multinomials = True
trainer.config.training_split = 0.8
trainer.config.number_of_crossvalidations = 3
# Train surrogate (calls PySMO through IDAES Python wrapper)
poly_train = trainer.train_surrogate()
poly_surr = PysmoSurrogate(
poly_train, input_labels, output_labels, input_bounds
)
poly_surr.save_to_file(
os.path.join(local_path, f"{method}_surrogate.json"), overwrite=True
)
surrogate_parity(
poly_surr,
feed_data,
filename=os.path.join(local_path, f"{method}_parity.pdf"),
)
elif method == "kri":
trainer = PysmoKrigingTrainer(
input_labels=input_labels,
output_labels=output_labels,
training_dataframe=feed_data,
)
# Set PySMO options
trainer.config.numerical_gradients = True
trainer.config.regularization = True
# Train surrogate (calls PySMO through IDAES Python wrapper)
krig_train = trainer.train_surrogate()
krig_surr = PysmoSurrogate(
krig_train, input_labels, output_labels, input_bounds
)
krig_surr.save_to_file(
os.path.join(local_path, f"{method}_surrogate.json"), overwrite=True
)
surrogate_parity(
krig_surr,
feed_data,
filename=os.path.join(local_path, f"{method}_parity.pdf"),
)
elif method == "rbf":
trainer = PysmoRBFTrainer(
input_labels=input_labels,
output_labels=output_labels,
training_dataframe=feed_data,
)
trainer.config.basis_function = "cubic"
rbf_train = trainer.train_surrogate()
rbf_surr = PysmoSurrogate(rbf_train, input_labels, output_labels, input_bounds)
rbf_surr.save_to_file(
os.path.join(local_path, f"{method}_surrogate.json"), overwrite=True
)
surrogate_parity(
rbf_surr,
feed_data,
filename=os.path.join(local_path, f"{method}_parity.pdf"),
)
elif method == "alamo":
trainer = AlamoTrainer(
input_labels=input_labels,
output_labels=output_labels,
training_dataframe=feed_data,
)
trainer.config.constant = 1
trainer.config.linfcns = 1
trainer.config.expfcns = 1
trainer.config.logfcns = 1
trainer.config.sinfcns = 1
trainer.config.cosfcns = 1
trainer.config.monomialpower = [2, 3, 4]
trainer.config.multi2power = [1, 2, 3]
trainer.config.multi3power = [1, 2, 3]
success, alm_surr, msg = trainer.train_surrogate()
surrogate_expressions = trainer._results["Model"]
alm_surr = AlamoSurrogate(
surrogate_expressions, input_labels, output_labels, input_bounds
)
alm_surr.save_to_file(
os.path.join(local_path, f"{method}_surrogate.json"), overwrite=True
)
surrogate_parity(
alm_surr,
feed_data,
filename=os.path.join(local_path, f"{method}_parity.pdf"),
)
if __name__ == "__main__":
feed_data, input_data, output_data = get_data()
output_data = outputs_selections(output_data)
feed_data = pd.concat([input_data, output_data], axis=1)
gen_surrogate_model(
tool="idaes",
method="poly", # kri, poly,rbf,alamo
feed_data=feed_data,
input_data=input_data,
output_data=output_data,
)