@inproceedings{maas2013rectifier, title={Rectifier nonlinearities improve neural network acoustic models}, author={Maas, Andrew L and Hannun, Awni Y and Ng, Andrew Y}, booktitle={Proc. ICML}, volume={30}, number={1}, year={2013} } @inproceedings{nair2010rectified, title={Rectified linear units improve restricted {Boltzmann} machines}, author={Nair, Vinod and Hinton, Geoffrey E}, booktitle={Proc ICML}, pages={807--814}, year={2010} } @article{clevert2015fast, title={Fast and accurate deep network learning by exponential linear units ({ELU}s)}, author={Clevert, Djork-Arn{\'e} and Unterthiner, Thomas and Hochreiter, Sepp}, journal={arXiv preprint arXiv:1511.07289}, year={2015} } @article{klambauer2017self, title={Self-Normalizing Neural Networks}, author={Klambauer, G{\"u}nter and Unterthiner, Thomas and Mayr, Andreas and Hochreiter, Sepp}, journal={arXiv preprint arXiv:1706.02515}, year={2017} } @article{cohen2017emnist, title = {{EMNIST}: an extension of {MNIST} to handwritten letters}, author = {Cohen, G. and Afshar, S. and Tapson, J. and van Schaik, A.}, journal={arXiv preprint arXiv:1702.05373}, year={2017}, url = {https://arxiv.org/abs/1702.05373} } @inproceedings{kingma2015adam, title = {Adam: A Method for Stochastic Optimization}, author = {Diederik P. Kingma and Jimmy Ba}, booktitle = {ICML}, year = {2015}, url = {https://arxiv.org/abs/1412.6980} } @article{tieleman2012rmsprop, title={Lecture 6.5-rmsprop: Divide the gradient by a running average of its recent magnitude}, author={Tieleman, T. and Hinton, G. E.}, journal={COURSERA: Neural Networks for Machine Learning}, volume={4}, number={2}, year={2012}, url = {https://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf} } @inproceedings{ioffe2015batch, title={Batch normalization: Accelerating deep network training by reducing internal covariate shift}, author={Ioffe, Sergey and Szegedy, Christian}, booktitle={ICML}, pages={448--456}, year={2015}, url = {http://proceedings.mlr.press/v37/ioffe15.html} }