Source code for FastMBAR.fastmbar

from typing import Union
import torch
import numpy as np
import scipy.optimize as optimize
import math

__version__ = "1.4.5"

## A small diagonal matrix with __EPS__ as its diagonal elements is added 
## to the Hessian matrix to avoid the case where the Hessian matrix is singular 
## due to underflow.
__EPS__ = 1e-16

[docs] class FastMBAR: """ The FastMBAR class is initialized with an energy matrix and an array of num of conformations. The corresponding MBAR equation is solved in the constructor. Therefore, the relative free energies of states used in the energy matrix is calculated in the constructor. The method **calculate_free_energies_for_perturbed_states** can be used to calculated the relative free energies of perturbed states. """
[docs] def __init__( self, energy: Union[np.ndarray, torch.Tensor], num_conf: Union[np.ndarray, torch.Tensor], cuda: bool = False, cuda_batch_mode: Union[bool, None] = None, bootstrap: bool = False, bootstrap_block_size: int = 3, bootstrap_num_rep: int = 100, verbose: bool = False, method: str = "Newton", ) -> None: """Initializer for the class FastMBAR Parameters ---------- energy : 2D ndarray or 2D tensor It has a size of (M x N), where M is the number of states and N is the total number of conformations. The entry energy[i,j] is the reduced (unitless) energy of conformation j in state i. If bootstrapping is used to calculate the uncertainty, the order of conformations matters. Conformations sampled from one state need to occpy a continuous chunk of collumns. Conformations sampled from state k need to occupy collumns to the left of conformations sampled from state l if k < l. If bootstrapping is not used, then the order of conformation does not matter. num_conf: 1D int ndarray or 1D tensor It should have a size of M, where num_conf[i] is the num of conformations sampled from state i. Therefore, np.sum(num_conf) has to be equal to N. All entries in num_conf have to be strictly greater than 0. cuda: bool, optional (default=False) If it is set to be True, then the calculation will be run on a graphical processing unit (GPU) using CUDA. cuda_batch_mode: bool, optional (default=None) The batch mode is turned on when the size of the energy matrix is too large for the memory of a GPU. When cuda_batch_mode is True, the energy matrix will be split into multiple batches which are used sequentially. If cuda_batch_mode = None, it will be set automatically based on the size of energy and the memory of the avaible GPU device. bootstrap: bool, optional (default=False) If bootstrap is True, the uncertainty of the calculated free energies will be estimate using block bootstraping. bootstrap_block_size: int, optional (default=3) block size used in block bootstrapping bootstrap_num_rep: int, optional (default=100) number of repreats in block bootstrapping verbose: bool, optional (default=False) if verbose is true, the detailed information of solving MBAR equations is printed. method: str, optional (default="Newton") the method used to solve the MBAR equation. The default is Newton's method. """ #### check the parameters: energy and num_conf ## Note that both self.energy and self.conf will be on CPUs no matter ## if cuda is True or False. ## energy needs to be a 2-D ndarray or a 2-D tensor. ## convert it into double precision if not if isinstance(energy, np.ndarray): energy = energy.astype(np.float64) self.energy = torch.from_numpy(energy) elif isinstance(energy, torch.Tensor): self.energy = energy.double().cpu() else: raise TypeError("energy has to be a 2-D ndarray or a 2-D tensor.") ## num_conf needs to be a 1-D ndarray or a 1-D tensor. if isinstance(num_conf, np.ndarray): num_conf = num_conf.astype(np.float64) self.num_conf = torch.from_numpy(num_conf) elif isinstance(num_conf, torch.Tensor): self.num_conf = num_conf.double().cpu() else: raise TypeError("num_conf has to be a 1-D ndarray or a 1-D tensor.") ## check the shape of energy and num_conf if energy.ndim != 2: raise ValueError("energy has to be a 2-D ndarray or a 2-D tensor.") if num_conf.ndim != 1: raise ValueError("num_conf has to be a 1-D ndarray or a 1-D tensor.") if energy.shape[0] != num_conf.shape[0]: raise ValueError( "the number of rows in energy has to be equal to the length of num_conf." ) ## check if the number of conformations sampled from each state is greater than 0 if torch.sum(self.num_conf <= 0) > 0: raise ValueError( "all entries in num_conf have to be strictly greater than 0." ) ## check if the total number of conformations is equal to the number of columns in energy if torch.sum(self.num_conf) != self.energy.shape[1]: raise ValueError( "the sum of num_conf has to be equal to the number of columns in energy." ) self.M = self.energy.shape[0] self.N = self.energy.shape[1] self.cuda = cuda if self.cuda is True: self.device = torch.device("cuda") else: self.device = torch.device("cpu") if self.cuda: ## check if the GPU device has enough memory. If not, cuda_batch_mode is turned on ## automatically determine if cuda_batch_mode is on or off self._total_GPU_memory = torch.cuda.get_device_properties(0).total_memory if ( self.energy.numel() * self.energy.element_size() < self._total_GPU_memory / 10 ): self.cuda_batch_mode = False else: self.cuda_batch_mode = True ## cuda_batch_mode can be enforced by passing the argument cuda_batch_mode if cuda_batch_mode is not None: self.cuda_batch_mode = cuda_batch_mode ## when cuda_batch_mode is used, we need to decide on the batch_size based on ## both the memory of the GPU device and the size of energy matrix self._batch_size = None if self.cuda and self.cuda_batch_mode: ## batch size for seperating conformations self._batch_size = int( self._total_GPU_memory / 20 / self.energy.shape[0] / self.energy.element_size() ) self._batch_size = min(1024, self._batch_size) ## whether to use bootstrap to estimate the uncertainty of the calculated free energies ## bootstrap needs to be a boolean if not isinstance(bootstrap, bool): raise TypeError("bootstrap has to be a boolean.") self.bootstrap = bootstrap ## block size used in block bootstrapping if not isinstance(bootstrap_block_size, int): raise TypeError("bootstrap_block_size has to be an integer.") self.bootstrap_block_size = bootstrap_block_size ## number of repeats in block bootstrapping if not isinstance(bootstrap_num_rep, int): raise TypeError("bootstrap_num_rep has to be an integer.") self.bootstrap_num_rep = bootstrap_num_rep ## whether to print the detailed information of solving MBAR equations ## verbose needs to be a boolean if not isinstance(verbose, bool): raise TypeError("verbose has to be a boolean.") self.verbose = verbose ## method used to solve the MBAR equation if not isinstance(method, str): raise TypeError("method has to be a string.") if method not in ["Newton", "L-BFGS-B"]: raise ValueError("method has to be Newton or L-BFGS-B.") self.method = method ## solve the MBAR equation if self.bootstrap is False: dF_init = self._initialize_dF(self.energy, self.num_conf) dF = _solve_mbar( dF_init.to(self.device), self.energy.to(self.device), self.num_conf.to(self.device), self.method, self._batch_size, verbose=self.verbose, ).cpu() ## shift self._F such that \sum_i F[i]*num_conf[i] = 0 F = torch.cat([dF.new_zeros(1), dF]) pi = self.num_conf / self.N self._F = F - torch.sum(pi * F) b = -self._F - torch.log(self.num_conf) self._log_prob_mix = torch.logsumexp(-(self.energy + b[:, None]), dim=0) self._log_mu = -self.log_prob_mix ## Compute self._F_cov under the constraint that \sum_i F[i]*num_conf[i] = 0 ## There are two ways to compute the covariance matrix of self._F ## See equation 4.2 and 6.4 in the following paper for details ## "Kong, A., et al. "A theory of statistical models for Monte Carlo integration." ## Journal of the Royal Statistical Society Series B: Statistical Methodology 65.3 ## (2003): 585-604." https://doi.org/10.1111/1467-9868.00404 ## The first way as shown in the following uses equation 4.2 ## this method is more general than the following method, meaning that ## it can also be used to compute covariance matrix for perturbed states. ## Therefore it is used here. self.P = torch.exp( -(self.energy - self._F[:, None] + self._log_prob_mix) ).t() W = torch.diag(self.num_conf) Q, R = torch.linalg.qr(self.P) A = torch.eye(self.M, device=W.device) - R @ W @ R.T self._F_cov = R.T @ torch.linalg.pinv(A, hermitian=True, rtol=1e-7) @ R # # The second way uses equation 6.4 # if self._batch_size is None: # H = _compute_hessian_log_likelihood_of_F( # self._F, self.energy, self.num_conf # ) # else: # H = _compute_hessian_log_likelihood_of_F_in_batch( # self._F, self.energy, self.num_conf, self._batch_size # ) # Hp = H.new_zeros((self.M + 1, self.M + 1)).cpu() # Hp[0 : self.M, 0 : self.M] = H # Hp[self.M, 0 : self.M] = -self.num_conf # Hp[0 : self.M, self.M] = -self.num_conf # Hp[self.M, self.M] = 0 # self._F_cov = torch.linalg.inv(-Hp)[0 : self.M, 0 : self.M] # self._F_cov = self._F_cov - torch.diag(1 / self.num_conf) + 1 / self.N self._F_std = self._F_cov.diagonal().sqrt() self._DeltaF = self._F[None, :] - self._F[:, None] self._DeltaF_cov = ( self._F_cov.diag()[:, None] + self._F_cov.diag()[None, :] - 2 * self._F_cov ) self._DeltaF_std = self._DeltaF_cov.sqrt() elif self.bootstrap is True: dFs = [] log_prob_mix = [] dF_init = self._initialize_dF(self.energy, self.num_conf) self._conf_idx = [] for _ in range(self.bootstrap_num_rep): conf_idx = _bootstrap_conf_idx( self.num_conf.to(torch.int64), self.bootstrap_block_size ) dF = _solve_mbar( dF_init.to(self.device), self.energy[:, conf_idx].to(self.device), self.num_conf.to(self.device), self.method, self._batch_size, verbose=self.verbose, ).cpu() dF_init = dF dFs.append(dF.clone()) F = torch.cat([dF.new_zeros(1), dF]) ## shift F such that \sum_i F[i]*num_conf[i] = 0 pi = self.num_conf / self.N F = F - torch.sum(pi * F) b = -F - torch.log(self.num_conf / self.N) log_prob_mix.append( torch.logsumexp(-(self.energy[:, conf_idx] + b[:, None]), dim=0) ) self._conf_idx.append(conf_idx) dF = torch.stack(dFs, dim=1) F = torch.cat([dF.new_zeros(1, dF.shape[1]), dF], dim=0) ## shift F such that \sum_i F[i]*num_conf[i] = 0 pi = self.num_conf / self.N self._F_bootstrap = F - torch.sum(pi[:, None] * F, dim=0) self._F = torch.mean(self._F_bootstrap, dim=1) self._F_std = torch.std(self._F_bootstrap, dim=1) self._F_cov = torch.cov(self._F_bootstrap) self._log_prob_mix = torch.stack(log_prob_mix, dim=0) DeltaF = self._F_bootstrap[None, :, :] - self._F_bootstrap[:, None, :] self._DeltaF = torch.mean(DeltaF, dim=2) self._DeltaF_std = torch.std(DeltaF, dim=2)
@property def F(self) -> np.ndarray: """Free energies of the states under the constraint :math:`\sum_{k=1}^{M} N_k * F_k = 0`, where :math:`N_k` is the number of conformations sampled from state k. """ return self._F.cpu().numpy() @property def F_std(self) -> np.ndarray: """Standard deviation of the free energies of the states under the constraint :math:`\sum_{k=1}^{M} N_k * F_k = 0`, where :math:`N_k` is the number of conformations sampled from state k. """ return self._F_std.cpu().numpy() @property def F_cov(self) -> np.ndarray: """Covariance matrix of the free energies of the states under the constraint :math:`\sum_{k=1}^{M} N_k * F_k = 0`, where :math:`N_k` is the number of conformations sampled from state k. """ return self._F_cov.cpu().numpy() @property def DeltaF(self) -> np.ndarray: """Free energy difference between states. :math:`\mathrm{DeltaF}[i,j]` is the free energy difference between state j and state i, i.e., :math:`\mathrm{DeltaF}[i,j] = F[j] - F[i]` . """ return self._DeltaF.cpu().numpy() @property def DeltaF_std(self) -> np.ndarray: """Standard deviation of the free energy difference between states. :math:`\mathrm{DeltaF_std}[i,j]` is the standard deviation of the free energy difference :math:`\mathrm{DeltaF}[i,j]`. """ return self._DeltaF_std.cpu().numpy() @property def log_prob_mix(self) -> np.ndarray: """the log probability density of conformations in the mixture distribution.""" return self._log_prob_mix.cpu().numpy() def _initialize_dF(self, energy, num_conf): ## Initilize dF by self-consistent iteration ## this is only required by the Newton's method, becuase when the inital guess ## is far away from the true solution, the Hessian matrix can be very close to ## singular due to underflow. This can be avoided by using a good initial guess. dF_init = energy.new_zeros(self.M - 1) for _ in range(10): F_init = torch.cat([dF_init.new_zeros(1), dF_init]) b = - F_init - torch.log(num_conf) log_prob_mix = torch.logsumexp(-(energy + b[:, None]), dim=0) du = energy + log_prob_mix F = -torch.logsumexp(-du, dim=1) dF_init = F[1:] - F[0] return dF_init
[docs] def calculate_free_energies_of_perturbed_states(self, energy_perturbed): """calculate free energies for perturbed states. Parameters ----------- energy_perturbed: 2-D float ndarray with size of (L,N) each row of the energy_perturbed matrix represents a state and the value energy_perturbed[l,n] represents the reduced energy of the n'th conformation in the l'th perturbed state. Returns ------- results: dict a dictionary containing the following keys: **F** - the free energy of the perturbed states. **F_std** - the standard deviation of the free energy of the perturbed states. **F_cov** - the covariance between the free energies of the perturbed states. **DeltaF** - :math:`\mathrm{DeltaF}[k,l]` is the free energy difference between state :math:`k` and state :math:`l`, i.e., :math:`\mathrm{DeltaF}[k,l] = F[l] - F[k]` . **DeltaF_std** - the standard deviation of the free energy difference. """ if isinstance(energy_perturbed, np.ndarray): energy_perturbed = energy_perturbed.astype(np.float64) energy_perturbed = torch.from_numpy(energy_perturbed) elif isinstance(energy_perturbed, torch.Tensor): energy_perturbed = energy_perturbed.double() else: raise TypeError("energy_perturbed has to be a 2-D ndarray or a 2-D tensor.") if energy_perturbed.ndim != 2: raise ValueError( "energy_perturbed has to be a 2-D ndarray or a 2-D tensor." ) if energy_perturbed.shape[1] != self.energy.shape[1]: raise ValueError( "the number of columns in energy_perturbed has to be equal to the number of columns in energy." ) L = energy_perturbed.shape[0] F = None F_cov = None F_std = None if not self.bootstrap: du = energy_perturbed + self._log_prob_mix F = -torch.logsumexp(-du, dim=1) F_ext = torch.cat([self._F, F]) U = torch.cat([self.energy, energy_perturbed], dim=0).t() self._P = torch.exp(-(U - F_ext + self._log_prob_mix[:, None])) W = torch.diag(torch.cat([self.num_conf, self.num_conf.new_zeros(L)])) Q, R = torch.linalg.qr(self._P) A = torch.eye(self.M + L, device=W.device) - R @ W @ R.T F_cov = R.T @ torch.linalg.pinv(A, hermitian=True, rtol=1e-8) @ R F_cov = F_cov[self.M :, self.M :] F_std = F_cov.diagonal().sqrt() DeltaF = F[None, :] - F[:, None] DeltaF_cov = F_cov.diag()[:, None] + F_cov.diag()[None, :] - 2 * F_cov DeltaF_std = DeltaF_cov.sqrt() else: F_list = [] for k in range(self.bootstrap_num_rep): du = energy_perturbed[:, self._conf_idx[k]] + self._log_prob_mix[k] F = -torch.logsumexp(-du, dim=1) F_list.append(F) F_list = torch.stack(F_list, dim=1) F = torch.mean(F_list, dim=1) F_std = torch.std(F_list, dim=1) F_cov = torch.cov(F_list) DeltaF = F[None, :] - F[:, None] DeltaF_std = torch.std(F_list[:, :, None] - F_list[:, None, :], dim=1) results = { "F": F.numpy(), "F_std": F_std.numpy(), "F_cov": F_cov.numpy(), "DeltaF": DeltaF.numpy(), "DeltaF_std": DeltaF_std.numpy(), } return results
def _compute_logp_of_F(F, energy, num_conf): logp = ( torch.sum(num_conf * F) - torch.logsumexp(-(energy.T - torch.log(num_conf) - F), dim=1).sum() ) return logp def _compute_loss_and_grad_of_dF(dF, energy, num_conf): """Compute the loss function and the gradient of dF given energy and num_conf. Parameters ---------- dF: 1-D float tensor with size (M-1) The free energy difference between state 0 and other states. The entry dF[i] is the free energy difference between state 0 and state i+1. energy: 2-D float tensor with size (M x N) A 2-D tensor with size of M x N, where M is the number of stats and N is the total number of conformations. The entry energy[i,j] num_conf: 1-D int tensor with size (M) A 1-D tensor with size of M, where num_conf[i] is the num of conformations sampled from state i. Therefore, torch.sum(num_conf) has to be equal to N. All entries in num_conf have to be strictly greater than 0. """ # compute the loss function F = torch.cat([dF.new_zeros(1), dF]) logp = _compute_logp_of_F(F, energy, num_conf) loss = -logp / num_conf.sum() u = energy - F[:, None] - torch.log(num_conf)[:, None] # compute the gradient of the loss function p = torch.softmax(-u, dim=0) grad = torch.mean(p, dim=1) - num_conf / num_conf.sum() grad = grad[1:] return loss, grad def _compute_loss_and_grad_of_dF_in_batch(dF, energy, num_conf, batch_size: int): """This is the batch version of _compute_loss_and_grad_of_dF. The batch version is used when the size of energy is too large to fit into the memory of a GPU. The energy matrix is split into multiple batches and the calculation is done sequentially. The parameters are the same as _compute_loss_and_grad_of_dF. """ n = num_conf N = torch.sum(n) n_over_N = n / N if energy.shape[1] % batch_size == 0: num_batch = energy.shape[1] // batch_size else: num_batch = energy.shape[1] // batch_size + 1 F = torch.cat([dF.new_zeros(1), dF]) loss = -torch.sum(num_conf * F) grad = -num_conf for i in range(num_batch): energy_batch = energy[:, i * batch_size : (i + 1) * batch_size] u = energy_batch.cuda() - torch.log(num_conf)[:, None] - F[:, None] loss += torch.logsumexp(-u, dim=0).sum() p = torch.softmax(-u, dim=0) grad += torch.sum(p, dim=1) loss = loss / N grad = grad / N grad = grad[1:] return loss, grad def _compute_hessian_log_likelihood_of_F(F, energy, num_conf): u = energy - F[:, None] - torch.log(num_conf)[:, None] p = torch.softmax(-u, dim=0) pp = torch.matmul(p, p.T) H = pp - torch.diag(torch.sum(p, 1)) return H def _compute_hessian_log_likelihood_of_F_in_batch(F, energy, num_conf, batch_size: int): if energy.shape[1] % batch_size == 0: num_batch = energy.shape[1] // batch_size else: num_batch = energy.shape[1] // batch_size + 1 H = F.new_zeros((F.shape[0], F.shape[0])).cuda() for i in range(num_batch): energy_batch = energy[:, i * batch_size : (i + 1) * batch_size] u = energy_batch.cuda() - F[:, None] - torch.log(num_conf)[:, None] p = torch.softmax(-u, dim=0) pp = torch.matmul(p, p.T) H += pp - torch.diag(torch.sum(p, 1)) return H def _compute_hessian_loss_of_dF(dF, energy, num_conf): N = torch.sum(num_conf) F = torch.cat([dF.new_zeros(1), dF]) H = _compute_hessian_log_likelihood_of_F(F, energy, num_conf) H = -H[1:, 1:] / N return H + __EPS__ * torch.eye(H.shape[0], device=H.device) def _compute_hessian_loss_of_dF_in_batch(dF, energy, num_conf, batch_size: int): N = torch.sum(num_conf) F = torch.cat([dF.new_zeros(1), dF]) H = _compute_hessian_log_likelihood_of_F_in_batch(F, energy, num_conf, batch_size) H = -H[1:, 1:] / N return H + __EPS__ * torch.eye(H.shape[0], device=H.device) def _solve_mbar(dF_init, energy, num_conf, method, batch_size=None, verbose=False): if batch_size is not None: if method == "Newton": results = fmin_newton( f=_compute_loss_and_grad_of_dF_in_batch, hess=_compute_hessian_loss_of_dF_in_batch, x_init=dF_init, args=(energy, num_conf, batch_size), verbose=verbose, ) return results["x"] elif method == "L-BFGS-B": options = {"disp": verbose, "gtol": 1e-8} results = optimize.minimize( lambda x: [ r.cpu().numpy() for r in _compute_loss_and_grad_of_dF_in_batch( energy.new_tensor(x), energy, num_conf, batch_size ) ], dF_init.cpu().numpy(), jac=True, method="L-BFGS-B", tol=1e-12, options=options, ) return torch.from_numpy(results["x"]) else: if method == "Newton": results = fmin_newton( f=_compute_loss_and_grad_of_dF, hess=_compute_hessian_loss_of_dF, x_init=dF_init, args=(energy, num_conf), verbose=verbose, ) return results["x"] elif method == "L-BFGS-B": options = {"disp": verbose, "gtol": 1e-8} results = optimize.minimize( lambda x: [ r.cpu().numpy() for r in _compute_loss_and_grad_of_dF( energy.new_tensor(x), energy, num_conf ) ], dF_init.cpu().numpy(), jac=True, method="L-BFGS-B", tol=1e-12, options=options, ) return torch.from_numpy(results["x"]) def _bootstrap_conf_idx(num_conf, bootstrap_block_size): num_conf_cumsum = torch.cumsum(num_conf, dim=0).tolist() num_conf_cumsum.pop(-1) num_conf_cumsum = [0] + num_conf_cumsum conf_idx = [] for i in range(len(num_conf)): len_seq = int(num_conf[i]) ## using circular block bootstrap num_sample_block = int(np.ceil(len_seq / bootstrap_block_size)) idxs = torch.randint(0, len_seq, (num_sample_block,)) sample_idx = torch.cat( [torch.arange(idx, idx + bootstrap_block_size) for idx in idxs] ) sample_idx = torch.remainder(sample_idx, len_seq) sample_idx = sample_idx[0:len_seq] sample_idx = sample_idx + num_conf_cumsum[i] conf_idx.append(sample_idx) conf_idx = torch.cat(conf_idx) return conf_idx def fmin_newton(f, hess, x_init, args=(), verbose=True, eps=1e-12, max_iter=300): """Minimize a function with the Newton's method. For details of the Newton's method, see Chapter 9.5 of Prof. Boyd's book `Convex Optimization <https://web.stanford.edu/~boyd/cvxbook/>`_. Args: f (callable): the objective function to be minimized. f(x:Tensor, *args) -> (y:Tensor, grad:Tensor), where y is the function value and grad is the gradient. hess (callable): the function to compute the Hessian matrix. hess(x:Tensor, *args) -> a two dimensional tensor. x_init (Tensor): the initial value. args (tuple): extra parameters for f and hess. verbose (bool): whether to print minimizing log information. eps (float): tolerance for stopping max_iter (int): maximum number of iterations. """ ## constants used in backtracking line search alpha, beta = 0.01, 0.2 ## Newton's method for minimizing the function indx_iter = 0 N_func = 0 if verbose: print("==================================================================") print(" RUNNING THE NEWTON'S METHOD \n") print(" * * * \n") print(f" Tolerance EPS = {eps:.5E} \n") ## check if x_init is a pytorch tensor if isinstance(x_init, torch.Tensor): x = x_init.clone().detach().requires_grad_(False) else: x = x_init while indx_iter < max_iter: loss, grad = f(x, *args) N_func += 1 if isinstance(x_init, torch.Tensor): with torch.no_grad(): H = hess(x, *args) else: H = hess(x, *args) if isinstance(grad, torch.Tensor): newton_direction = torch.linalg.solve(H, -grad) newton_decrement_square = torch.sum(-grad * newton_direction) else: newton_direction = np.linalg.solve(H, -grad) newton_decrement_square = np.sum(-grad * newton_direction) if verbose: print( f"At iterate {indx_iter:4d}; f= {loss.item():.5E};", f"|1/2*Newton_decrement^2|: {newton_decrement_square.item()/2:.5E}\n", ) if newton_decrement_square / 2.0 <= eps: break ## backtracking line search max_ls_iter = 100 step_size = 1.0 indx_ls_iter = 0 while indx_ls_iter < max_ls_iter: target_loss, _ = f(x + step_size * newton_direction, *args) N_func += 1 approximate_loss = loss + step_size * alpha * (-newton_decrement_square) if target_loss < approximate_loss: break else: step_size = step_size * beta indx_ls_iter += 1 x = x + step_size * newton_direction indx_iter += 1 if verbose: print("N_iter = total number of iterations") print("N_func = total number of function and gradient evaluations") print("F = final function value \n") print(" * * * \n") print("N_iter N_func F") print(f"{indx_iter+1:6d} {N_func:6d} {loss.item():.6E}") print(f" F = {loss.item():.12f} \n") if newton_decrement_square / 2.0 <= eps and indx_iter < max_iter: print("CONVERGENCE: 1/2*Newton_decrement^2 < EPS") else: print("CONVERGENCE: NUM_OF_ITERATION REACH MAX_ITERATION") return { "x": x, "N_iter": indx_iter + 1, "N_func": N_func, "F": loss.item(), "half_newton_decrement_square": newton_decrement_square / 2.0, }