Source code for lmflow.optim.adadelta

#!/usr/bin/env python

import torch
from torch.optim.optimizer import Optimizer


[docs] class Adadelta(Optimizer): def __init__(self, params, lr=1.0, rho=0.95, eps=1e-6): defaults = dict(lr=lr, rho=rho, eps=eps) super().__init__(params, defaults)
[docs] def step(self, closure=None): loss = None if closure is not None: loss = closure() for group in self.param_groups: for p in group["params"]: if p.grad is None: continue grad = p.grad.data state = self.state[p] if len(state) == 0: state["step"] = 0 state["square_avg"] = torch.zeros_like(p.data) state["acc_delta"] = torch.zeros_like(p.data) square_avg, acc_delta = state["square_avg"], state["acc_delta"] rho, eps = group["rho"], group["eps"] state["step"] += 1 square_avg.mul_(rho).addcmul_(1 - rho, grad, grad) std = square_avg.add(eps).sqrt_() delta = acc_delta.add(eps).sqrt_().div_(std).mul_(grad) p.data.add_(-delta) acc_delta.mul_(rho).addcmul_(1 - rho, delta, delta) return loss