mlpractical/report/refs.bib


@inproceedings{goodfellow2013maxout,
  title={Maxout networks},
  author={Goodfellow, Ian and Warde-Farley, David and Mirza, Mehdi and Courville, Aaron and Bengio, Yoshua},
  booktitle={International conference on machine learning},
  pages={1319--1327},
  year={2013},
  organization={PMLR}
}

@article{srivastava2014dropout,
  title={Dropout: a simple way to prevent neural networks from overfitting},
  author={Srivastava, Nitish and Hinton, Geoffrey and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan},
  journal={The journal of machine learning research},
  volume={15},
  number={1},
  pages={1929--1958},
  year={2014},
  publisher={JMLR. org}
}

@book{Goodfellow-et-al-2016,
    title={Deep Learning},
    author={Ian Goodfellow and Yoshua Bengio and Aaron Courville},
    publisher={MIT Press},
    note={\url{http://www.deeplearningbook.org}},
    year={2016}
}

@inproceedings{ng2004feature,
  title={Feature selection, L1 vs. L2 regularization, and rotational invariance},
  author={Ng, Andrew Y},
  booktitle={Proceedings of the twenty-first international conference on Machine learning},
  pages={78},
  year={2004}
}

@article{simonyan2014very,
  title={Very deep convolutional networks for large-scale image recognition},
  author={Simonyan, Karen and Zisserman, Andrew},
  journal={arXiv preprint arXiv:1409.1556},
  year={2014}
}

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

@inproceedings{glorot2010understanding,
  title={Understanding the difficulty of training deep feedforward neural networks},
  author={Glorot, Xavier and Bengio, Yoshua},
  booktitle={Proceedings of the thirteenth international conference on artificial intelligence and statistics},
  pages={249--256},
  year={2010},
  organization={JMLR Workshop and Conference Proceedings}
}

@inproceedings{bengio1993problem,
  title={The problem of learning long-term dependencies in recurrent networks},
  author={Bengio, Yoshua and Frasconi, Paolo and Simard, Patrice},
  booktitle={IEEE international conference on neural networks},
  pages={1183--1188},
  year={1993},
  organization={IEEE}
}

@inproceedings{ide2017improvement,
  title={Improvement of learning for CNN with ReLU activation by sparse regularization},
  author={Ide, Hidenori and Kurita, Takio},
  booktitle={2017 International Joint Conference on Neural Networks (IJCNN)},
  pages={2684--2691},
  year={2017},
  organization={IEEE}
}

@inproceedings{ioffe2015batch,
  title={Batch normalization: Accelerating deep network training by reducing internal covariate shift},
  author={Ioffe, Sergey and Szegedy, Christian},
  booktitle={International conference on machine learning},
  pages={448--456},
  year={2015},
  organization={PMLR}
}

@inproceedings{huang2017densely,
  title={Densely connected convolutional networks},
  author={Huang, Gao and Liu, Zhuang and Van Der Maaten, Laurens and Weinberger, Kilian Q},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={4700--4708},
  year={2017}
}

@article{rumelhart1986learning,
  title={Learning representations by back-propagating errors},
  author={Rumelhart, David E and Hinton, Geoffrey E and Williams, Ronald J},
  journal={nature},
  volume={323},
  number={6088},
  pages={533--536},
  year={1986},
  publisher={Nature Publishing Group}
}

@inproceedings{du2019gradient,
  title={Gradient descent finds global minima of deep neural networks},
  author={Du, Simon and Lee, Jason and Li, Haochuan and Wang, Liwei and Zhai, Xiyu},
  booktitle={International Conference on Machine Learning},
  pages={1675--1685},
  year={2019},
  organization={PMLR}
}

@inproceedings{pascanu2013difficulty,
  title={On the difficulty of training recurrent neural networks},
  author={Pascanu, Razvan and Mikolov, Tomas and Bengio, Yoshua},
  booktitle={International conference on machine learning},
  pages={1310--1318},
  year={2013},
  organization={PMLR}
}

@article{li2017visualizing,
  title={Visualizing the loss landscape of neural nets},
  author={Li, Hao and Xu, Zheng and Taylor, Gavin and Studer, Christoph and Goldstein, Tom},
  journal={arXiv preprint arXiv:1712.09913},
  year={2017}
}

@inproceedings{santurkar2018does,
  title={How does batch normalization help optimization?},
  author={Santurkar, Shibani and Tsipras, Dimitris and Ilyas, Andrew and M{\k{a}}dry, Aleksander},
  booktitle={Proceedings of the 32nd international conference on neural information processing systems},
  pages={2488--2498},
  year={2018}
}

@article{krizhevsky2009learning,
  title={Learning multiple layers of features from tiny images},
  author={Krizhevsky, Alex and Hinton, Geoffrey and others},
  journal={},
  year={2009},
  publisher={Citeseer}
}

@incollection{lecun2012efficient,
  title={Efficient backprop},
  author={LeCun, Yann A and Bottou, L{\'e}on and Orr, Genevieve B and M{\"u}ller, Klaus-Robert},
  booktitle={Neural networks: Tricks of the trade},
  pages={9--48},
  year={2012},
  publisher={Springer}
}

@book{bishop1995neural,
  title={Neural networks for pattern recognition},
  author={Bishop, Christopher M and others},
  year={1995},
  publisher={Oxford university press}
}

@article{vaswani2017attention,
  author       = {Ashish Vaswani and
                  Noam Shazeer and
                  Niki Parmar and
                  Jakob Uszkoreit and
                  Llion Jones and
                  Aidan N. Gomez and
                  Lukasz Kaiser and
                  Illia Polosukhin},
  title        = {Attention Is All You Need},
  journal      = {CoRR},
  volume       = {abs/1706.03762},
  year         = {2017},
  url          = {http://arxiv.org/abs/1706.03762},
  eprinttype    = {arXiv},
  eprint       = {1706.03762},
  timestamp    = {Sat, 23 Jan 2021 01:20:40 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/VaswaniSPUJGKP17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
Add cw1 2024-10-14 10:56:47 +02:00
			`@inproceedings{goodfellow2013maxout,`
			`title={Maxout networks},`
			`author={Goodfellow, Ian and Warde-Farley, David and Mirza, Mehdi and Courville, Aaron and Bengio, Yoshua},`
			`booktitle={International conference on machine learning},`
			`pages={1319--1327},`
			`year={2013},`
			`organization={PMLR}`
			`}`

			`@article{srivastava2014dropout,`
			`title={Dropout: a simple way to prevent neural networks from overfitting},`
			`author={Srivastava, Nitish and Hinton, Geoffrey and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan},`
			`journal={The journal of machine learning research},`
			`volume={15},`
			`number={1},`
			`pages={1929--1958},`
			`year={2014},`
			`publisher={JMLR. org}`
			`}`

			`@book{Goodfellow-et-al-2016,`
			`title={Deep Learning},`
			`author={Ian Goodfellow and Yoshua Bengio and Aaron Courville},`
			`publisher={MIT Press},`
			`note={\url{http://www.deeplearningbook.org}},`
			`year={2016}`
			`}`

			`@inproceedings{ng2004feature,`
			`title={Feature selection, L1 vs. L2 regularization, and rotational invariance},`
			`author={Ng, Andrew Y},`
			`booktitle={Proceedings of the twenty-first international conference on Machine learning},`
			`pages={78},`
			`year={2004}`
			`}`

Update 2024-11-11 12:34:32 +01:00			`@article{simonyan2014very,`
			`title={Very deep convolutional networks for large-scale image recognition},`
			`author={Simonyan, Karen and Zisserman, Andrew},`
			`journal={arXiv preprint arXiv:1409.1556},`
			`year={2014}`
			`}`

			`@inproceedings{he2016deep,`
			`title={Deep residual learning for image recognition},`
			`author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},`
			`booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},`
			`pages={770--778},`
			`year={2016}`
			`}`

			`@inproceedings{glorot2010understanding,`
			`title={Understanding the difficulty of training deep feedforward neural networks},`
			`author={Glorot, Xavier and Bengio, Yoshua},`
			`booktitle={Proceedings of the thirteenth international conference on artificial intelligence and statistics},`
			`pages={249--256},`
			`year={2010},`
			`organization={JMLR Workshop and Conference Proceedings}`
			`}`

			`@inproceedings{bengio1993problem,`
			`title={The problem of learning long-term dependencies in recurrent networks},`
			`author={Bengio, Yoshua and Frasconi, Paolo and Simard, Patrice},`
			`booktitle={IEEE international conference on neural networks},`
			`pages={1183--1188},`
			`year={1993},`
			`organization={IEEE}`
			`}`

			`@inproceedings{ide2017improvement,`
			`title={Improvement of learning for CNN with ReLU activation by sparse regularization},`
			`author={Ide, Hidenori and Kurita, Takio},`
			`booktitle={2017 International Joint Conference on Neural Networks (IJCNN)},`
			`pages={2684--2691},`
			`year={2017},`
			`organization={IEEE}`
			`}`

			`@inproceedings{ioffe2015batch,`
			`title={Batch normalization: Accelerating deep network training by reducing internal covariate shift},`
			`author={Ioffe, Sergey and Szegedy, Christian},`
			`booktitle={International conference on machine learning},`
			`pages={448--456},`
			`year={2015},`
			`organization={PMLR}`
			`}`

			`@inproceedings{huang2017densely,`
			`title={Densely connected convolutional networks},`
			`author={Huang, Gao and Liu, Zhuang and Van Der Maaten, Laurens and Weinberger, Kilian Q},`
			`booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},`
			`pages={4700--4708},`
			`year={2017}`
			`}`

			`@article{rumelhart1986learning,`
			`title={Learning representations by back-propagating errors},`
			`author={Rumelhart, David E and Hinton, Geoffrey E and Williams, Ronald J},`
			`journal={nature},`
			`volume={323},`
			`number={6088},`
			`pages={533--536},`
			`year={1986},`
			`publisher={Nature Publishing Group}`
			`}`

			`@inproceedings{du2019gradient,`
			`title={Gradient descent finds global minima of deep neural networks},`
			`author={Du, Simon and Lee, Jason and Li, Haochuan and Wang, Liwei and Zhai, Xiyu},`
			`booktitle={International Conference on Machine Learning},`
			`pages={1675--1685},`
			`year={2019},`
			`organization={PMLR}`
			`}`

			`@inproceedings{pascanu2013difficulty,`
			`title={On the difficulty of training recurrent neural networks},`
			`author={Pascanu, Razvan and Mikolov, Tomas and Bengio, Yoshua},`
			`booktitle={International conference on machine learning},`
			`pages={1310--1318},`
			`year={2013},`
			`organization={PMLR}`
			`}`

			`@article{li2017visualizing,`
			`title={Visualizing the loss landscape of neural nets},`
			`author={Li, Hao and Xu, Zheng and Taylor, Gavin and Studer, Christoph and Goldstein, Tom},`
			`journal={arXiv preprint arXiv:1712.09913},`
			`year={2017}`
			`}`

			`@inproceedings{santurkar2018does,`
			`title={How does batch normalization help optimization?},`
			`author={Santurkar, Shibani and Tsipras, Dimitris and Ilyas, Andrew and M{\k{a}}dry, Aleksander},`
			`booktitle={Proceedings of the 32nd international conference on neural information processing systems},`
			`pages={2488--2498},`
			`year={2018}`
			`}`

			`@article{krizhevsky2009learning,`
			`title={Learning multiple layers of features from tiny images},`
			`author={Krizhevsky, Alex and Hinton, Geoffrey and others},`
			`journal={},`
			`year={2009},`
			`publisher={Citeseer}`
			`}`

			`@incollection{lecun2012efficient,`
			`title={Efficient backprop},`
			`author={LeCun, Yann A and Bottou, L{\'e}on and Orr, Genevieve B and M{\"u}ller, Klaus-Robert},`
			`booktitle={Neural networks: Tricks of the trade},`
			`pages={9--48},`
			`year={2012},`
			`publisher={Springer}`
			`}`

			`@book{bishop1995neural,`
			`title={Neural networks for pattern recognition},`
			`author={Bishop, Christopher M and others},`
			`year={1995},`
			`publisher={Oxford university press}`
Add cw1 2024-10-14 10:56:47 +02:00			`}`
final changes 2024-11-22 10:26:24 +01:00
			`@article{vaswani2017attention,`
			`author = {Ashish Vaswani and`
			`Noam Shazeer and`
			`Niki Parmar and`
			`Jakob Uszkoreit and`
			`Llion Jones and`
			`Aidan N. Gomez and`
			`Lukasz Kaiser and`
			`Illia Polosukhin},`
			`title = {Attention Is All You Need},`
			`journal = {CoRR},`
			`volume = {abs/1706.03762},`
			`year = {2017},`
			`url = {http://arxiv.org/abs/1706.03762},`
			`eprinttype = {arXiv},`
			`eprint = {1706.03762},`
			`timestamp = {Sat, 23 Jan 2021 01:20:40 +0100},`
			`biburl = {https://dblp.org/rec/journals/corr/VaswaniSPUJGKP17.bib},`
			`bibsource = {dblp computer science bibliography, https://dblp.org}`
			`}`