@inproceedings{goodfellow2013maxout, title={Maxout networks}, author={Goodfellow, Ian and Warde-Farley, David and Mirza, Mehdi and Courville, Aaron and Bengio, Yoshua}, booktitle={International conference on machine learning}, pages={1319--1327}, year={2013}, organization={PMLR} } @article{srivastava2014dropout, title={Dropout: a simple way to prevent neural networks from overfitting}, author={Srivastava, Nitish and Hinton, Geoffrey and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan}, journal={The journal of machine learning research}, volume={15}, number={1}, pages={1929--1958}, year={2014}, publisher={JMLR. org} } @book{Goodfellow-et-al-2016, title={Deep Learning}, author={Ian Goodfellow and Yoshua Bengio and Aaron Courville}, publisher={MIT Press}, note={\url{http://www.deeplearningbook.org}}, year={2016} } @inproceedings{ng2004feature, title={Feature selection, L1 vs. L2 regularization, and rotational invariance}, author={Ng, Andrew Y}, booktitle={Proceedings of the twenty-first international conference on Machine learning}, pages={78}, year={2004} } @inproceedings{loshchilov2019decoupled, title={Decoupled weight decay regularization}, author={Loshchilov, Ilya and Hutter, Frank}, booktitle={International Conference on Learning Representations (ICLR)}, year={2019} }