@UNPUBLISHED{heiss2019,
	series = {arXiv},
	author = {Heiss, Jakob and Teichmann, Josef and Wutte, Hanna},
	publisher = {Cornell University},
	year = {2019},	copyright = {In Copyright - Non-Commercial Use Permitted},
	keywords = {early stopping; implicit regularization; machine learning; neural networks; spline; regression; gradient descent; artificial intelligence},
	size = {53 p.},
	DOI = {10.3929/ethz-b-000402003},
	title = {How Implicit Regularization of Neural Networks Affects the Learned Function – Part I},
	PAGES = {1911.02903},
}

@article{Dropout,
  author  = {Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov},
  title   = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting},
  journal = {Journal of Machine Learning Research},
  year    = 2014,
  volume  = 15,
  number  = 56,
  pages   = {1929-1958},
  Comment url     = {http://jmlr.org/papers/v15/srivastava14a.html}
}

@article{ADADELTA,
  author    = {Matthew D. Zeiler},
  title     = {{ADADELTA:} An Adaptive Learning Rate Method},
  journal   = {CoRR},
  volume    = {abs/1212.5701},
  year      = 2012,
  Comment url       = {http://arxiv.org/abs/1212.5701},
  archivePrefix = {arXiv},
  eprint    = {1212.5701},
  timestamp = {Mon, 13 Aug 2018 16:45:57 +0200},
}

@article{backprop,
author={Rumelhart, David E.
and Hinton, Geoffrey E.
and Williams, Ronald J.},
title={Learning representations by back-propagating errors},
journal={Nature},
year={1986},
month={Oct},
day={01},
volume={323},
number={6088},
pages={533-536},
issn={1476-4687},
doi={10.1038/323533a0},
Comment url={https://doi.org/10.1038/323533a0}
}

@article{MNIST,
  added-at = {2010-06-28T21:16:30.000+0200},
  author = {LeCun, Yann and Cortes, Corinna},
  groups = {public},
  howpublished = {http://yann.lecun.com/exdb/mnist/},
  keywords = {MSc _checked character_recognition mnist network neural},
  lastchecked = {2016-01-14 14:24:11},
  timestamp = {2016-07-12T19:25:30.000+0200},
  title = {{MNIST} handwritten digit database},
  Comment url = {http://yann.lecun.com/exdb/mnist/},
  year = 2010
}
@INPROCEEDINGS{resnet,
  author={Kaiming {He} and Xiangyu {Zhang} and Shaoqing {Ren} and Jian {Sun}},
  booktitle={2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 
  title={Deep Residual Learning for Image Recognition}, 
  year={2016},
  volume={},
  number={},
  pages={770-778},}

@book{PRML,
   title =     {Pattern Recognition and Machine Learning},
   author =    {Christopher M. Bishop},
   publisher = {Springer},
   isbn =      {9780387310732,0387310738},
   year =      2006,
   series =    {Information science and statistics},
   edition =   {1st ed. 2006. Corr. 2nd printing},
   pages =     {209}
}

@article{ADAGRAD,
author = {Duchi, John and Hazan, Elad and Singer, Yoram},
title = {Adaptive Subgradient Methods for Online Learning and Stochastic Optimization},
year = {2011},
issue_date = {2/1/2011},
publisher = {JMLR.org},
volume = {12},
number = {null},
issn = {1532-4435},
journal = {J. Mach. Learn. Res.},
month = jul,
pages = {2121–2159},
numpages = {39}
}

@article{DBLP:journals/corr/DauphinPGCGB14,
author = {Dauphin, Yann and Pascanu, Razvan and Gulcehre, Caglar and Cho, Kyunghyun and Ganguli, Surya and Bengio, Y.},
year = {2014},
month = {06},
pages = {},
title = {Identifying and attacking the saddle point problem in high-dimensional non-convex optimization},
volume = {27},
journal = {NIPS}
}
@article{saddle_point,
  author    = {Yann N. Dauphin and
               Razvan Pascanu and
               {\c{C}}aglar G{\"{u}}l{\c{c}}ehre and
               Kyunghyun Cho and
               Surya Ganguli and
               Yoshua Bengio},
  title     = {Identifying and attacking the saddle point problem in high-dimensional
               non-convex optimization},
  journal   = {CoRR},
  volume    = {abs/1406.2572},
  year      = {2014},
  Comment url       = {http://arxiv.org/abs/1406.2572},
  archivePrefix = {arXiv},
  eprint    = {1406.2572},
  timestamp = {Mon, 22 Jul 2019 13:15:46 +0200},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@article{Dropout1,
  author    = {Geoffrey E. Hinton and
               Nitish Srivastava and
               Alex Krizhevsky and
               Ilya Sutskever and
               Ruslan Salakhutdinov},
  title     = {Improving neural networks by preventing co-adaptation of feature detectors},
  journal   = {CoRR},
  volume    = {abs/1207.0580},
  year      = {2012},
  Comment url = {http://arxiv.org/abs/1207.0580},
  archivePrefix = {arXiv},
  eprint    = {1207.0580},
  timestamp = {Mon, 13 Aug 2018 16:46:10 +0200},
}

@inproceedings{
rADAM,
title={On the Variance of the Adaptive Learning Rate and Beyond},
author={Liyuan Liu and Haoming Jiang and Pengcheng He and Weizhu Chen and Xiaodong Liu and Jianfeng Gao and Jiawei Han},
booktitle={International Conference on Learning Representations},
year={2020},
Comment url={https://openreview.net/forum?id=rkgz2aEKDr}
}

@inproceedings{ADAM,
  author    = {Diederik P. Kingma and
               Jimmy Ba},
  @Comment editor    = {Yoshua Bengio and
  @Comment              Yann LeCun},
  title     = {Adam: {A} Method for Stochastic Optimization},
  booktitle = {3rd International Conference on Learning Representations, {ICLR} 2015,
               San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings},
  year      = {2015},
  Comment url       = {http://arxiv.org/abs/1412.6980},
  timestamp = {Thu, 25 Jul 2019 14:25:37 +0200},
}

@article{transfer_learning,
  author = {Zhao,Wei},
  title = {Research on the deep learning of the small sample data based on transfer learning},
  journal = {AIP Conference Proceedings},
  volume = {1864},
  number = {1},
  pages = {020018},
  year = {2017},
  doi = {10.1063/1.4992835},
  eprint = {https://aip.scitation.org/doi/pdf/10.1063/1.4992835}
}

@article{gan,
  author = "Maayan Frid-Adar and Idit Diamant and Eyal Klang and Michal Amitai and Jacob Goldberger and Hayit Greenspan",
  title = "GAN-based synthetic medical image augmentation for increased CNN performance in liver lesion classification",
  journal = "Neurocomputing",
  volume = 321,
  pages = "321 - 331",
  year = 2018,
  issn = "0925-2312",
  doi = "https://doi.org/10.1016/j.neucom.2018.09.013",
  Comment url = "http://www.sciencedirect.com/science/article/pii/S0925231218310749",
}

@online{fashionMNIST,
  author       = {Han Xiao and Kashif Rasul and Roland Vollgraf},
  title        = {Fashion-MNIST: a Novel Image Dataset for Benchmarking Machine Learning Algorithms},
  date         = {2017-08-28},
  year         = {2017},
  eprintclass  = {cs.LG},
  eprinttype   = {arXiv},
  eprint       = {cs.LG/1708.07747},
}

@inproceedings{10.1145/3206098.3206111,
author = {Kowsari, Kamran and Heidarysafa, Mojtaba and Brown, Donald E. and Meimandi, Kiana Jafari and Barnes, Laura E.},
title = {RMDL: Random Multimodel Deep Learning for Classification},
year = {2018},
isbn = {9781450363549},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
Comment url = {https://doi.org/10.1145/3206098.3206111},
doi = {10.1145/3206098.3206111},
booktitle = {Proceedings of the 2nd International Conference on Information System and Data Mining},
pages = {19–28},
numpages = {10},
keywords = {Supervised Learning, Deep Learning, Data Mining, Text Classification, Deep Neural Networks, Image Classification},
location = {Lakeland, FL, USA},
series = {ICISDM '18}
}

@article{random_erasing,
  author    = {Zhun Zhong and
               Liang Zheng and
               Guoliang Kang and
               Shaozi Li and
               Yi Yang},
  title     = {Random Erasing Data Augmentation},
  journal   = {CoRR},
  volume    = {abs/1708.04896},
  year      = 2017,
  Comment url       = {http://arxiv.org/abs/1708.04896},
  archivePrefix = {arXiv},
  eprint    = {1708.04896},
  timestamp = {Mon, 13 Aug 2018 16:47:52 +0200},
}

@misc{draw_convnet,
  title = {Python script for illustrating Convolutional Neural Network (ConvNet)},
  howpublished = {\url{https://github.com/gwding/draw_convnet}},
  note = {Accessed: 30.08.2020},
  author = {Gavin Weiguang Ding},
  year = 2018
}

@book{Haykin,
  added-at = {2009-06-26T15:25:19.000+0200},
  author = {Haykin, Simon},
  note = {2nd edition},
  publisher = {Prentice Hall},
  title = {Neural Networks: {A} Comprehensive Foundation},
  year = 1999
}

@book{Goodfellow,
    title={Deep Learning},
    author={Ian Goodfellow and Yoshua Bengio and Aaron Courville},
    publisher={MIT Press},
    note={\url{http://www.deeplearningbook.org}},
    year=2016
}

@article{ruder,
  author    = {Sebastian Ruder},
  title     = {An overview of gradient descent optimization algorithms},
  journal   = {CoRR},
  volume    = {abs/1609.04747},
  year      = {2016},
  url       = {http://arxiv.org/abs/1609.04747},
  archivePrefix = {arXiv},
  eprint    = {1609.04747},
  timestamp = {Mon, 13 Aug 2018 16:48:10 +0200},
  biburl    = {https://dblp.org/rec/journals/corr/Ruder16.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@incollection{goodfellow_gan,
title = {Generative Adversarial Nets},
author = {Goodfellow, Ian and Pouget-Abadie, Jean and Mirza, Mehdi and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and Courville, Aaron and Bengio, Yoshua},
booktitle = {Advances in Neural Information Processing Systems 27},
pages = {2672--2680},
year = {2014},
publisher = {Curran Associates, Inc.},
url = {http://papers.nips.cc/paper/5423-generative-adversarial-nets.pdf}
}

@book{hastie01statisticallearning,
  added-at = {2008-05-16T16:17:42.000+0200},
  address = {New York, NY, USA},
  author = {Hastie, Trevor and Tibshirani, Robert and Friedman, Jerome},
  biburl = {https://www.bibsonomy.org/bibtex/2f58afc5c9793fcc8ad8389824e57984c/sb3000},
  interhash = {d585aea274f2b9b228fc1629bc273644},
  intrahash = {f58afc5c9793fcc8ad8389824e57984c},
  keywords = {ml statistics},
  publisher = {Springer New York Inc.},
  series = {Springer Series in Statistics},
  timestamp = {2008-05-16T16:17:43.000+0200},
  title = {The Elements of Statistical Learning},
  year = 2001
}