@UNPUBLISHED{heiss2019, series = {arXiv}, author = {Heiss, Jakob and Teichmann, Josef and Wutte, Hanna}, publisher = {Cornell University}, year = {2019}, copyright = {In Copyright - Non-Commercial Use Permitted}, keywords = {early stopping; implicit regularization; machine learning; neural networks; spline; regression; gradient descent; artificial intelligence}, size = {53 p.}, DOI = {10.3929/ethz-b-000402003}, title = {How Implicit Regularization of Neural Networks Affects the Learned Function – Part I}, PAGES = {1911.02903}, } @article{Dropout, author = {Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov}, title = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting}, journal = {Journal of Machine Learning Research}, year = 2014, volume = 15, number = 56, pages = {1929-1958}, Comment url = {http://jmlr.org/papers/v15/srivastava14a.html} } @article{ADADELTA, author = {Matthew D. Zeiler}, title = {{ADADELTA:} An Adaptive Learning Rate Method}, journal = {CoRR}, volume = {abs/1212.5701}, year = 2012, Comment url = {http://arxiv.org/abs/1212.5701}, archivePrefix = {arXiv}, eprint = {1212.5701}, timestamp = {Mon, 13 Aug 2018 16:45:57 +0200}, } @article{backprop, author={Rumelhart, David E. and Hinton, Geoffrey E. and Williams, Ronald J.}, title={Learning representations by back-propagating errors}, journal={Nature}, year={1986}, month={Oct}, day={01}, volume={323}, number={6088}, pages={533-536}, issn={1476-4687}, doi={10.1038/323533a0}, Comment url={https://doi.org/10.1038/323533a0} } @article{MNIST, added-at = {2010-06-28T21:16:30.000+0200}, author = {LeCun, Yann and Cortes, Corinna}, groups = {public}, howpublished = {http://yann.lecun.com/exdb/mnist/}, keywords = {MSc _checked character_recognition mnist network neural}, lastchecked = {2016-01-14 14:24:11}, timestamp = {2016-07-12T19:25:30.000+0200}, title = {{MNIST} handwritten digit database}, Comment url = {http://yann.lecun.com/exdb/mnist/}, year = 2010 } @INPROCEEDINGS{resnet, author={Kaiming {He} and Xiangyu {Zhang} and Shaoqing {Ren} and Jian {Sun}}, booktitle={2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, title={Deep Residual Learning for Image Recognition}, year={2016}, volume={}, number={}, pages={770-778},} @book{PRML, title = {Pattern Recognition and Machine Learning}, author = {Christopher M. Bishop}, publisher = {Springer}, isbn = {9780387310732,0387310738}, year = 2006, series = {Information science and statistics}, edition = {1st ed. 2006. Corr. 2nd printing}, pages = {209} } @article{ADAGRAD, author = {Duchi, John and Hazan, Elad and Singer, Yoram}, title = {Adaptive Subgradient Methods for Online Learning and Stochastic Optimization}, year = {2011}, issue_date = {2/1/2011}, publisher = {JMLR.org}, volume = {12}, number = {null}, issn = {1532-4435}, journal = {J. Mach. Learn. Res.}, month = jul, pages = {2121–2159}, numpages = {39} } @article{DBLP:journals/corr/DauphinPGCGB14, author = {Dauphin, Yann and Pascanu, Razvan and Gulcehre, Caglar and Cho, Kyunghyun and Ganguli, Surya and Bengio, Y.}, year = {2014}, month = {06}, pages = {}, title = {Identifying and attacking the saddle point problem in high-dimensional non-convex optimization}, volume = {27}, journal = {NIPS} } @article{saddle_point, author = {Yann N. Dauphin and Razvan Pascanu and {\c{C}}aglar G{\"{u}}l{\c{c}}ehre and Kyunghyun Cho and Surya Ganguli and Yoshua Bengio}, title = {Identifying and attacking the saddle point problem in high-dimensional non-convex optimization}, journal = {CoRR}, volume = {abs/1406.2572}, year = {2014}, Comment url = {http://arxiv.org/abs/1406.2572}, archivePrefix = {arXiv}, eprint = {1406.2572}, timestamp = {Mon, 22 Jul 2019 13:15:46 +0200}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{Dropout1, author = {Geoffrey E. Hinton and Nitish Srivastava and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov}, title = {Improving neural networks by preventing co-adaptation of feature detectors}, journal = {CoRR}, volume = {abs/1207.0580}, year = {2012}, Comment url = {http://arxiv.org/abs/1207.0580}, archivePrefix = {arXiv}, eprint = {1207.0580}, timestamp = {Mon, 13 Aug 2018 16:46:10 +0200}, } @inproceedings{ rADAM, title={On the Variance of the Adaptive Learning Rate and Beyond}, author={Liyuan Liu and Haoming Jiang and Pengcheng He and Weizhu Chen and Xiaodong Liu and Jianfeng Gao and Jiawei Han}, booktitle={International Conference on Learning Representations}, year={2020}, Comment url={https://openreview.net/forum?id=rkgz2aEKDr} } @inproceedings{ADAM, author = {Diederik P. Kingma and Jimmy Ba}, @Comment editor = {Yoshua Bengio and @Comment Yann LeCun}, title = {Adam: {A} Method for Stochastic Optimization}, booktitle = {3rd International Conference on Learning Representations, {ICLR} 2015, San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings}, year = {2015}, Comment url = {http://arxiv.org/abs/1412.6980}, timestamp = {Thu, 25 Jul 2019 14:25:37 +0200}, } @article{transfer_learning, author = {Zhao,Wei}, title = {Research on the deep learning of the small sample data based on transfer learning}, journal = {AIP Conference Proceedings}, volume = {1864}, number = {1}, pages = {020018}, year = {2017}, doi = {10.1063/1.4992835}, eprint = {https://aip.scitation.org/doi/pdf/10.1063/1.4992835} } @article{gan, author = "Maayan Frid-Adar and Idit Diamant and Eyal Klang and Michal Amitai and Jacob Goldberger and Hayit Greenspan", title = "GAN-based synthetic medical image augmentation for increased CNN performance in liver lesion classification", journal = "Neurocomputing", volume = 321, pages = "321 - 331", year = 2018, issn = "0925-2312", doi = "https://doi.org/10.1016/j.neucom.2018.09.013", Comment url = "http://www.sciencedirect.com/science/article/pii/S0925231218310749", } @online{fashionMNIST, author = {Han Xiao and Kashif Rasul and Roland Vollgraf}, title = {Fashion-MNIST: a Novel Image Dataset for Benchmarking Machine Learning Algorithms}, date = {2017-08-28}, year = {2017}, eprintclass = {cs.LG}, eprinttype = {arXiv}, eprint = {cs.LG/1708.07747}, } @inproceedings{10.1145/3206098.3206111, author = {Kowsari, Kamran and Heidarysafa, Mojtaba and Brown, Donald E. and Meimandi, Kiana Jafari and Barnes, Laura E.}, title = {RMDL: Random Multimodel Deep Learning for Classification}, year = {2018}, isbn = {9781450363549}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, Comment url = {https://doi.org/10.1145/3206098.3206111}, doi = {10.1145/3206098.3206111}, booktitle = {Proceedings of the 2nd International Conference on Information System and Data Mining}, pages = {19–28}, numpages = {10}, keywords = {Supervised Learning, Deep Learning, Data Mining, Text Classification, Deep Neural Networks, Image Classification}, location = {Lakeland, FL, USA}, series = {ICISDM '18} } @article{random_erasing, author = {Zhun Zhong and Liang Zheng and Guoliang Kang and Shaozi Li and Yi Yang}, title = {Random Erasing Data Augmentation}, journal = {CoRR}, volume = {abs/1708.04896}, year = 2017, Comment url = {http://arxiv.org/abs/1708.04896}, archivePrefix = {arXiv}, eprint = {1708.04896}, timestamp = {Mon, 13 Aug 2018 16:47:52 +0200}, } @misc{draw_convnet, title = {Python script for illustrating Convolutional Neural Network (ConvNet)}, howpublished = {\url{https://github.com/gwding/draw_convnet}}, note = {Accessed: 30.08.2020}, author = {Gavin Weiguang Ding}, year = 2018 } @book{Haykin, added-at = {2009-06-26T15:25:19.000+0200}, author = {Haykin, Simon}, note = {2nd edition}, publisher = {Prentice Hall}, title = {Neural Networks: {A} Comprehensive Foundation}, year = 1999 } @book{Goodfellow, title={Deep Learning}, author={Ian Goodfellow and Yoshua Bengio and Aaron Courville}, publisher={MIT Press}, note={\url{http://www.deeplearningbook.org}}, year=2016 } @article{ruder, author = {Sebastian Ruder}, title = {An overview of gradient descent optimization algorithms}, journal = {CoRR}, volume = {abs/1609.04747}, year = {2016}, url = {http://arxiv.org/abs/1609.04747}, archivePrefix = {arXiv}, eprint = {1609.04747}, timestamp = {Mon, 13 Aug 2018 16:48:10 +0200}, biburl = {https://dblp.org/rec/journals/corr/Ruder16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @incollection{goodfellow_gan, title = {Generative Adversarial Nets}, author = {Goodfellow, Ian and Pouget-Abadie, Jean and Mirza, Mehdi and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and Courville, Aaron and Bengio, Yoshua}, booktitle = {Advances in Neural Information Processing Systems 27}, pages = {2672--2680}, year = {2014}, publisher = {Curran Associates, Inc.}, url = {http://papers.nips.cc/paper/5423-generative-adversarial-nets.pdf} } @book{hastie01statisticallearning, added-at = {2008-05-16T16:17:42.000+0200}, address = {New York, NY, USA}, author = {Hastie, Trevor and Tibshirani, Robert and Friedman, Jerome}, biburl = {https://www.bibsonomy.org/bibtex/2f58afc5c9793fcc8ad8389824e57984c/sb3000}, interhash = {d585aea274f2b9b228fc1629bc273644}, intrahash = {f58afc5c9793fcc8ad8389824e57984c}, keywords = {ml statistics}, publisher = {Springer New York Inc.}, series = {Springer Series in Statistics}, timestamp = {2008-05-16T16:17:43.000+0200}, title = {The Elements of Statistical Learning}, year = 2001 }