2024-10-14 10:56:47 +02:00
|
|
|
|
|
|
|
@inproceedings{goodfellow2013maxout,
|
|
|
|
title={Maxout networks},
|
|
|
|
author={Goodfellow, Ian and Warde-Farley, David and Mirza, Mehdi and Courville, Aaron and Bengio, Yoshua},
|
|
|
|
booktitle={International conference on machine learning},
|
|
|
|
pages={1319--1327},
|
|
|
|
year={2013},
|
|
|
|
organization={PMLR}
|
|
|
|
}
|
|
|
|
|
|
|
|
@article{srivastava2014dropout,
|
|
|
|
title={Dropout: a simple way to prevent neural networks from overfitting},
|
|
|
|
author={Srivastava, Nitish and Hinton, Geoffrey and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan},
|
|
|
|
journal={The journal of machine learning research},
|
|
|
|
volume={15},
|
|
|
|
number={1},
|
|
|
|
pages={1929--1958},
|
|
|
|
year={2014},
|
|
|
|
publisher={JMLR. org}
|
|
|
|
}
|
|
|
|
|
|
|
|
@book{Goodfellow-et-al-2016,
|
|
|
|
title={Deep Learning},
|
|
|
|
author={Ian Goodfellow and Yoshua Bengio and Aaron Courville},
|
|
|
|
publisher={MIT Press},
|
|
|
|
note={\url{http://www.deeplearningbook.org}},
|
|
|
|
year={2016}
|
|
|
|
}
|
|
|
|
|
|
|
|
@inproceedings{ng2004feature,
|
|
|
|
title={Feature selection, L1 vs. L2 regularization, and rotational invariance},
|
|
|
|
author={Ng, Andrew Y},
|
|
|
|
booktitle={Proceedings of the twenty-first international conference on Machine learning},
|
|
|
|
pages={78},
|
|
|
|
year={2004}
|
|
|
|
}
|
|
|
|
|
2024-11-11 12:34:32 +01:00
|
|
|
@article{simonyan2014very,
|
|
|
|
title={Very deep convolutional networks for large-scale image recognition},
|
|
|
|
author={Simonyan, Karen and Zisserman, Andrew},
|
|
|
|
journal={arXiv preprint arXiv:1409.1556},
|
|
|
|
year={2014}
|
|
|
|
}
|
|
|
|
|
|
|
|
@inproceedings{he2016deep,
|
|
|
|
title={Deep residual learning for image recognition},
|
|
|
|
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
|
|
|
|
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
|
|
|
|
pages={770--778},
|
|
|
|
year={2016}
|
|
|
|
}
|
|
|
|
|
|
|
|
@inproceedings{glorot2010understanding,
|
|
|
|
title={Understanding the difficulty of training deep feedforward neural networks},
|
|
|
|
author={Glorot, Xavier and Bengio, Yoshua},
|
|
|
|
booktitle={Proceedings of the thirteenth international conference on artificial intelligence and statistics},
|
|
|
|
pages={249--256},
|
|
|
|
year={2010},
|
|
|
|
organization={JMLR Workshop and Conference Proceedings}
|
|
|
|
}
|
|
|
|
|
|
|
|
@inproceedings{bengio1993problem,
|
|
|
|
title={The problem of learning long-term dependencies in recurrent networks},
|
|
|
|
author={Bengio, Yoshua and Frasconi, Paolo and Simard, Patrice},
|
|
|
|
booktitle={IEEE international conference on neural networks},
|
|
|
|
pages={1183--1188},
|
|
|
|
year={1993},
|
|
|
|
organization={IEEE}
|
|
|
|
}
|
|
|
|
|
|
|
|
@inproceedings{ide2017improvement,
|
|
|
|
title={Improvement of learning for CNN with ReLU activation by sparse regularization},
|
|
|
|
author={Ide, Hidenori and Kurita, Takio},
|
|
|
|
booktitle={2017 International Joint Conference on Neural Networks (IJCNN)},
|
|
|
|
pages={2684--2691},
|
|
|
|
year={2017},
|
|
|
|
organization={IEEE}
|
|
|
|
}
|
|
|
|
|
|
|
|
@inproceedings{ioffe2015batch,
|
|
|
|
title={Batch normalization: Accelerating deep network training by reducing internal covariate shift},
|
|
|
|
author={Ioffe, Sergey and Szegedy, Christian},
|
|
|
|
booktitle={International conference on machine learning},
|
|
|
|
pages={448--456},
|
|
|
|
year={2015},
|
|
|
|
organization={PMLR}
|
|
|
|
}
|
|
|
|
|
|
|
|
@inproceedings{huang2017densely,
|
|
|
|
title={Densely connected convolutional networks},
|
|
|
|
author={Huang, Gao and Liu, Zhuang and Van Der Maaten, Laurens and Weinberger, Kilian Q},
|
|
|
|
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
|
|
|
|
pages={4700--4708},
|
|
|
|
year={2017}
|
|
|
|
}
|
|
|
|
|
|
|
|
@article{rumelhart1986learning,
|
|
|
|
title={Learning representations by back-propagating errors},
|
|
|
|
author={Rumelhart, David E and Hinton, Geoffrey E and Williams, Ronald J},
|
|
|
|
journal={nature},
|
|
|
|
volume={323},
|
|
|
|
number={6088},
|
|
|
|
pages={533--536},
|
|
|
|
year={1986},
|
|
|
|
publisher={Nature Publishing Group}
|
|
|
|
}
|
|
|
|
|
|
|
|
@inproceedings{du2019gradient,
|
|
|
|
title={Gradient descent finds global minima of deep neural networks},
|
|
|
|
author={Du, Simon and Lee, Jason and Li, Haochuan and Wang, Liwei and Zhai, Xiyu},
|
|
|
|
booktitle={International Conference on Machine Learning},
|
|
|
|
pages={1675--1685},
|
|
|
|
year={2019},
|
|
|
|
organization={PMLR}
|
|
|
|
}
|
|
|
|
|
|
|
|
@inproceedings{pascanu2013difficulty,
|
|
|
|
title={On the difficulty of training recurrent neural networks},
|
|
|
|
author={Pascanu, Razvan and Mikolov, Tomas and Bengio, Yoshua},
|
|
|
|
booktitle={International conference on machine learning},
|
|
|
|
pages={1310--1318},
|
|
|
|
year={2013},
|
|
|
|
organization={PMLR}
|
|
|
|
}
|
|
|
|
|
|
|
|
@article{li2017visualizing,
|
|
|
|
title={Visualizing the loss landscape of neural nets},
|
|
|
|
author={Li, Hao and Xu, Zheng and Taylor, Gavin and Studer, Christoph and Goldstein, Tom},
|
|
|
|
journal={arXiv preprint arXiv:1712.09913},
|
|
|
|
year={2017}
|
|
|
|
}
|
|
|
|
|
|
|
|
@inproceedings{santurkar2018does,
|
|
|
|
title={How does batch normalization help optimization?},
|
|
|
|
author={Santurkar, Shibani and Tsipras, Dimitris and Ilyas, Andrew and M{\k{a}}dry, Aleksander},
|
|
|
|
booktitle={Proceedings of the 32nd international conference on neural information processing systems},
|
|
|
|
pages={2488--2498},
|
|
|
|
year={2018}
|
|
|
|
}
|
|
|
|
|
|
|
|
@article{krizhevsky2009learning,
|
|
|
|
title={Learning multiple layers of features from tiny images},
|
|
|
|
author={Krizhevsky, Alex and Hinton, Geoffrey and others},
|
|
|
|
journal={},
|
|
|
|
year={2009},
|
|
|
|
publisher={Citeseer}
|
|
|
|
}
|
|
|
|
|
|
|
|
@incollection{lecun2012efficient,
|
|
|
|
title={Efficient backprop},
|
|
|
|
author={LeCun, Yann A and Bottou, L{\'e}on and Orr, Genevieve B and M{\"u}ller, Klaus-Robert},
|
|
|
|
booktitle={Neural networks: Tricks of the trade},
|
|
|
|
pages={9--48},
|
|
|
|
year={2012},
|
|
|
|
publisher={Springer}
|
|
|
|
}
|
|
|
|
|
|
|
|
@book{bishop1995neural,
|
|
|
|
title={Neural networks for pattern recognition},
|
|
|
|
author={Bishop, Christopher M and others},
|
|
|
|
year={1995},
|
|
|
|
publisher={Oxford university press}
|
2024-10-14 10:56:47 +02:00
|
|
|
}
|
2024-11-22 10:26:24 +01:00
|
|
|
|
|
|
|
@article{vaswani2017attention,
|
|
|
|
author = {Ashish Vaswani and
|
|
|
|
Noam Shazeer and
|
|
|
|
Niki Parmar and
|
|
|
|
Jakob Uszkoreit and
|
|
|
|
Llion Jones and
|
|
|
|
Aidan N. Gomez and
|
|
|
|
Lukasz Kaiser and
|
|
|
|
Illia Polosukhin},
|
|
|
|
title = {Attention Is All You Need},
|
|
|
|
journal = {CoRR},
|
|
|
|
volume = {abs/1706.03762},
|
|
|
|
year = {2017},
|
|
|
|
url = {http://arxiv.org/abs/1706.03762},
|
|
|
|
eprinttype = {arXiv},
|
|
|
|
eprint = {1706.03762},
|
|
|
|
timestamp = {Sat, 23 Jan 2021 01:20:40 +0100},
|
|
|
|
biburl = {https://dblp.org/rec/journals/corr/VaswaniSPUJGKP17.bib},
|
|
|
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|
|
|
}
|