Source code for deepmd.utils.finetune

# SPDX-License-Identifier: LGPL-3.0-or-later
import logging
from typing import (
    TYPE_CHECKING,
    List,
)

import numpy as np

from deepmd.infer.deep_eval import (
    DeepEval,
)
from deepmd.utils.data_system import (
    DeepmdDataSystem,
)

if TYPE_CHECKING:
    pass

[docs] log = logging.getLogger(__name__)
[docs] def change_energy_bias_lower( data: DeepmdDataSystem, dp: DeepEval, origin_type_map: List[str], full_type_map: List[str], bias_atom_e: np.ndarray, bias_adjust_mode="change-by-statistic", ntest=10, ): """Change the energy bias according to the input data and the pretrained model. Parameters ---------- data : DeepmdDataSystem The training data. dp : str The DeepEval object. origin_type_map : list The original type_map in dataset, they are targets to change the energy bias. full_type_map : str The full type_map in pretrained model bias_atom_e : np.ndarray The old energy bias in the pretrained model. bias_adjust_mode : str The mode for changing energy bias : ['change-by-statistic', 'set-by-statistic'] 'change-by-statistic' : perform predictions on energies of target dataset, and do least sqaure on the errors to obtain the target shift as bias. 'set-by-statistic' : directly use the statistic energy bias in the target dataset. ntest : int The number of test samples in a system to change the energy bias. """ type_numbs = [] energy_ground_truth = [] energy_predict = [] sorter = np.argsort(full_type_map) idx_type_map = sorter[ np.searchsorted(full_type_map, origin_type_map, sorter=sorter) ] mixed_type = data.mixed_type numb_type = len(full_type_map) for sys in data.data_systems: test_data = sys.get_test() nframes = test_data["box"].shape[0] numb_test = min(nframes, ntest) if mixed_type: atype = test_data["type"][:numb_test].reshape([numb_test, -1]) else: atype = test_data["type"][0] assert np.array( [i in idx_type_map for i in list(set(atype.reshape(-1)))] ).all(), "Some types are not in 'type_map'!" energy_ground_truth.append( test_data["energy"][:numb_test].reshape([numb_test, 1]) ) if mixed_type: type_numbs.append( np.array( [(atype == i).sum(axis=-1) for i in idx_type_map], dtype=np.int32, ).T ) else: type_numbs.append( np.tile( np.bincount(atype, minlength=numb_type)[idx_type_map], (numb_test, 1), ) ) if bias_adjust_mode == "change-by-statistic": coord = test_data["coord"][:numb_test].reshape([numb_test, -1]) if sys.pbc: box = test_data["box"][:numb_test] else: box = None if dp.get_dim_fparam() > 0: fparam = test_data["fparam"][:numb_test] else: fparam = None if dp.get_dim_aparam() > 0: aparam = test_data["aparam"][:numb_test] else: aparam = None ret = dp.eval( coord, box, atype, mixed_type=mixed_type, fparam=fparam, aparam=aparam, ) energy_predict.append(ret[0].reshape([numb_test, 1])) type_numbs = np.concatenate(type_numbs) energy_ground_truth = np.concatenate(energy_ground_truth) old_bias = bias_atom_e[idx_type_map] if bias_adjust_mode == "change-by-statistic": energy_predict = np.concatenate(energy_predict) bias_diff = energy_ground_truth - energy_predict delta_bias = np.linalg.lstsq(type_numbs, bias_diff, rcond=None)[0] unbias_e = energy_predict + type_numbs @ delta_bias atom_numbs = type_numbs.sum(-1) rmse_ae = np.sqrt( np.mean( np.square((unbias_e.ravel() - energy_ground_truth.ravel()) / atom_numbs) ) ) bias_atom_e[idx_type_map] += delta_bias.reshape(-1) log.info( f"RMSE of atomic energy after linear regression is: {rmse_ae} eV/atom." ) elif bias_adjust_mode == "set-by-statistic": statistic_bias = np.linalg.lstsq(type_numbs, energy_ground_truth, rcond=None)[0] bias_atom_e[idx_type_map] = statistic_bias.reshape(-1) else: raise RuntimeError("Unknown bias_adjust_mode mode: " + bias_adjust_mode) log.info( f"Change energy bias of {origin_type_map!s} from {old_bias!s} to {bias_atom_e[idx_type_map]!s}." ) return bias_atom_e