optimizer.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. from __future__ import unicode_literals
  18. from paddle import optimizer as optim
  19. class Momentum(object):
  20. """
  21. Simple Momentum optimizer with velocity state.
  22. Args:
  23. learning_rate (float|Variable) - The learning rate used to update parameters.
  24. Can be a float value or a Variable with one float value as data element.
  25. momentum (float) - Momentum factor.
  26. regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
  27. """
  28. def __init__(self,
  29. learning_rate,
  30. momentum,
  31. weight_decay=None,
  32. grad_clip=None,
  33. **args):
  34. super(Momentum, self).__init__()
  35. self.learning_rate = learning_rate
  36. self.momentum = momentum
  37. self.weight_decay = weight_decay
  38. self.grad_clip = grad_clip
  39. def __call__(self, parameters):
  40. opt = optim.Momentum(
  41. learning_rate=self.learning_rate,
  42. momentum=self.momentum,
  43. weight_decay=self.weight_decay,
  44. grad_clip=self.grad_clip,
  45. parameters=parameters)
  46. return opt
  47. class Adam(object):
  48. def __init__(self,
  49. learning_rate=0.001,
  50. beta1=0.9,
  51. beta2=0.999,
  52. epsilon=1e-08,
  53. parameter_list=None,
  54. weight_decay=None,
  55. grad_clip=None,
  56. name=None,
  57. lazy_mode=False,
  58. **kwargs):
  59. self.learning_rate = learning_rate
  60. self.beta1 = beta1
  61. self.beta2 = beta2
  62. self.epsilon = epsilon
  63. self.parameter_list = parameter_list
  64. self.learning_rate = learning_rate
  65. self.weight_decay = weight_decay
  66. self.grad_clip = grad_clip
  67. self.name = name
  68. self.lazy_mode = lazy_mode
  69. def __call__(self, parameters):
  70. opt = optim.Adam(
  71. learning_rate=self.learning_rate,
  72. beta1=self.beta1,
  73. beta2=self.beta2,
  74. epsilon=self.epsilon,
  75. weight_decay=self.weight_decay,
  76. grad_clip=self.grad_clip,
  77. name=self.name,
  78. lazy_mode=self.lazy_mode,
  79. parameters=parameters)
  80. return opt
  81. class RMSProp(object):
  82. """
  83. Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning rate method.
  84. Args:
  85. learning_rate (float|Variable) - The learning rate used to update parameters.
  86. Can be a float value or a Variable with one float value as data element.
  87. momentum (float) - Momentum factor.
  88. rho (float) - rho value in equation.
  89. epsilon (float) - avoid division by zero, default is 1e-6.
  90. regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
  91. """
  92. def __init__(self,
  93. learning_rate,
  94. momentum=0.0,
  95. rho=0.95,
  96. epsilon=1e-6,
  97. weight_decay=None,
  98. grad_clip=None,
  99. **args):
  100. super(RMSProp, self).__init__()
  101. self.learning_rate = learning_rate
  102. self.momentum = momentum
  103. self.rho = rho
  104. self.epsilon = epsilon
  105. self.weight_decay = weight_decay
  106. self.grad_clip = grad_clip
  107. def __call__(self, parameters):
  108. opt = optim.RMSProp(
  109. learning_rate=self.learning_rate,
  110. momentum=self.momentum,
  111. rho=self.rho,
  112. epsilon=self.epsilon,
  113. weight_decay=self.weight_decay,
  114. grad_clip=self.grad_clip,
  115. parameters=parameters)
  116. return opt