|
|
@UNPUBLISHED{heiss2019,
|
|
|
series = {arXiv},
|
|
|
author = {Heiss, Jakob and Teichmann, Josef and Wutte, Hanna},
|
|
|
publisher = {Cornell University},
|
|
|
year = {2019}, copyright = {In Copyright - Non-Commercial Use Permitted},
|
|
|
keywords = {early stopping; implicit regularization; machine learning; neural networks; spline; regression; gradient descent; artificial intelligence},
|
|
|
size = {53 p.},
|
|
|
DOI = {10.3929/ethz-b-000402003},
|
|
|
title = {How Implicit Regularization of Neural Networks Affects the Learned Function – Part I},
|
|
|
PAGES = {1911.02903},
|
|
|
}
|
|
|
|
|
|
@article{Dropout,
|
|
|
author = {Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov},
|
|
|
title = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting},
|
|
|
journal = {Journal of Machine Learning Research},
|
|
|
year = 2014,
|
|
|
volume = 15,
|
|
|
number = 56,
|
|
|
pages = {1929-1958},
|
|
|
Comment url = {http://jmlr.org/papers/v15/srivastava14a.html}
|
|
|
}
|
|
|
|
|
|
@article{ADADELTA,
|
|
|
author = {Matthew D. Zeiler},
|
|
|
title = {{ADADELTA:} An Adaptive Learning Rate Method},
|
|
|
journal = {CoRR},
|
|
|
volume = {abs/1212.5701},
|
|
|
year = 2012,
|
|
|
Comment url = {http://arxiv.org/abs/1212.5701},
|
|
|
archivePrefix = {arXiv},
|
|
|
eprint = {1212.5701},
|
|
|
timestamp = {Mon, 13 Aug 2018 16:45:57 +0200},
|
|
|
}
|
|
|
|
|
|
@article{backprop,
|
|
|
author={Rumelhart, David E.
|
|
|
and Hinton, Geoffrey E.
|
|
|
and Williams, Ronald J.},
|
|
|
title={Learning representations by back-propagating errors},
|
|
|
journal={Nature},
|
|
|
year={1986},
|
|
|
month={Oct},
|
|
|
day={01},
|
|
|
volume={323},
|
|
|
number={6088},
|
|
|
pages={533-536},
|
|
|
issn={1476-4687},
|
|
|
doi={10.1038/323533a0},
|
|
|
Comment url={https://doi.org/10.1038/323533a0}
|
|
|
}
|
|
|
|
|
|
@article{MNIST,
|
|
|
added-at = {2010-06-28T21:16:30.000+0200},
|
|
|
author = {LeCun, Yann and Cortes, Corinna},
|
|
|
groups = {public},
|
|
|
howpublished = {http://yann.lecun.com/exdb/mnist/},
|
|
|
keywords = {MSc _checked character_recognition mnist network neural},
|
|
|
lastchecked = {2016-01-14 14:24:11},
|
|
|
timestamp = {2016-07-12T19:25:30.000+0200},
|
|
|
title = {{MNIST} handwritten digit database},
|
|
|
Comment url = {http://yann.lecun.com/exdb/mnist/},
|
|
|
year = 2010
|
|
|
}
|
|
|
@INPROCEEDINGS{resnet,
|
|
|
author={Kaiming {He} and Xiangyu {Zhang} and Shaoqing {Ren} and Jian {Sun}},
|
|
|
booktitle={2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
|
|
|
title={Deep Residual Learning for Image Recognition},
|
|
|
year={2016},
|
|
|
volume={},
|
|
|
number={},
|
|
|
pages={770-778},}
|
|
|
|
|
|
@book{PRML,
|
|
|
title = {Pattern Recognition and Machine Learning},
|
|
|
author = {Christopher M. Bishop},
|
|
|
publisher = {Springer},
|
|
|
isbn = {9780387310732,0387310738},
|
|
|
year = 2006,
|
|
|
series = {Information science and statistics},
|
|
|
edition = {1st ed. 2006. Corr. 2nd printing},
|
|
|
pages = {209}
|
|
|
}
|
|
|
|
|
|
@article{ADAGRAD,
|
|
|
author = {Duchi, John and Hazan, Elad and Singer, Yoram},
|
|
|
title = {Adaptive Subgradient Methods for Online Learning and Stochastic Optimization},
|
|
|
year = {2011},
|
|
|
issue_date = {2/1/2011},
|
|
|
publisher = {JMLR.org},
|
|
|
volume = {12},
|
|
|
number = {null},
|
|
|
issn = {1532-4435},
|
|
|
journal = {J. Mach. Learn. Res.},
|
|
|
month = jul,
|
|
|
pages = {2121–2159},
|
|
|
numpages = {39}
|
|
|
}
|
|
|
|
|
|
@article{DBLP:journals/corr/DauphinPGCGB14,
|
|
|
author = {Dauphin, Yann and Pascanu, Razvan and Gulcehre, Caglar and Cho, Kyunghyun and Ganguli, Surya and Bengio, Y.},
|
|
|
year = {2014},
|
|
|
month = {06},
|
|
|
pages = {},
|
|
|
title = {Identifying and attacking the saddle point problem in high-dimensional non-convex optimization},
|
|
|
volume = {27},
|
|
|
journal = {NIPS}
|
|
|
}
|
|
|
@article{saddle_point,
|
|
|
author = {Yann N. Dauphin and
|
|
|
Razvan Pascanu and
|
|
|
{\c{C}}aglar G{\"{u}}l{\c{c}}ehre and
|
|
|
Kyunghyun Cho and
|
|
|
Surya Ganguli and
|
|
|
Yoshua Bengio},
|
|
|
title = {Identifying and attacking the saddle point problem in high-dimensional
|
|
|
non-convex optimization},
|
|
|
journal = {CoRR},
|
|
|
volume = {abs/1406.2572},
|
|
|
year = {2014},
|
|
|
Comment url = {http://arxiv.org/abs/1406.2572},
|
|
|
archivePrefix = {arXiv},
|
|
|
eprint = {1406.2572},
|
|
|
timestamp = {Mon, 22 Jul 2019 13:15:46 +0200},
|
|
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|
|
}
|
|
|
|
|
|
@article{Dropout1,
|
|
|
author = {Geoffrey E. Hinton and
|
|
|
Nitish Srivastava and
|
|
|
Alex Krizhevsky and
|
|
|
Ilya Sutskever and
|
|
|
Ruslan Salakhutdinov},
|
|
|
title = {Improving neural networks by preventing co-adaptation of feature detectors},
|
|
|
journal = {CoRR},
|
|
|
volume = {abs/1207.0580},
|
|
|
year = {2012},
|
|
|
Comment url = {http://arxiv.org/abs/1207.0580},
|
|
|
archivePrefix = {arXiv},
|
|
|
eprint = {1207.0580},
|
|
|
timestamp = {Mon, 13 Aug 2018 16:46:10 +0200},
|
|
|
}
|
|
|
|
|
|
@inproceedings{
|
|
|
rADAM,
|
|
|
title={On the Variance of the Adaptive Learning Rate and Beyond},
|
|
|
author={Liyuan Liu and Haoming Jiang and Pengcheng He and Weizhu Chen and Xiaodong Liu and Jianfeng Gao and Jiawei Han},
|
|
|
booktitle={International Conference on Learning Representations},
|
|
|
year={2020},
|
|
|
Comment url={https://openreview.net/forum?id=rkgz2aEKDr}
|
|
|
}
|
|
|
|
|
|
@inproceedings{ADAM,
|
|
|
author = {Diederik P. Kingma and
|
|
|
Jimmy Ba},
|
|
|
@Comment editor = {Yoshua Bengio and
|
|
|
@Comment Yann LeCun},
|
|
|
title = {Adam: {A} Method for Stochastic Optimization},
|
|
|
booktitle = {3rd International Conference on Learning Representations, {ICLR} 2015,
|
|
|
San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings},
|
|
|
year = {2015},
|
|
|
Comment url = {http://arxiv.org/abs/1412.6980},
|
|
|
timestamp = {Thu, 25 Jul 2019 14:25:37 +0200},
|
|
|
}
|
|
|
|
|
|
@article{transfer_learning,
|
|
|
author = {Zhao,Wei},
|
|
|
title = {Research on the deep learning of the small sample data based on transfer learning},
|
|
|
journal = {AIP Conference Proceedings},
|
|
|
volume = {1864},
|
|
|
number = {1},
|
|
|
pages = {020018},
|
|
|
year = {2017},
|
|
|
doi = {10.1063/1.4992835},
|
|
|
eprint = {https://aip.scitation.org/doi/pdf/10.1063/1.4992835}
|
|
|
}
|
|
|
|
|
|
@article{gan,
|
|
|
author = "Maayan Frid-Adar and Idit Diamant and Eyal Klang and Michal Amitai and Jacob Goldberger and Hayit Greenspan",
|
|
|
title = "GAN-based synthetic medical image augmentation for increased CNN performance in liver lesion classification",
|
|
|
journal = "Neurocomputing",
|
|
|
volume = 321,
|
|
|
pages = "321 - 331",
|
|
|
year = 2018,
|
|
|
issn = "0925-2312",
|
|
|
doi = "https://doi.org/10.1016/j.neucom.2018.09.013",
|
|
|
Comment url = "http://www.sciencedirect.com/science/article/pii/S0925231218310749",
|
|
|
}
|
|
|
|
|
|
@online{fashionMNIST,
|
|
|
author = {Han Xiao and Kashif Rasul and Roland Vollgraf},
|
|
|
title = {Fashion-MNIST: a Novel Image Dataset for Benchmarking Machine Learning Algorithms},
|
|
|
date = {2017-08-28},
|
|
|
year = {2017},
|
|
|
eprintclass = {cs.LG},
|
|
|
eprinttype = {arXiv},
|
|
|
eprint = {cs.LG/1708.07747},
|
|
|
}
|
|
|
|
|
|
@inproceedings{10.1145/3206098.3206111,
|
|
|
author = {Kowsari, Kamran and Heidarysafa, Mojtaba and Brown, Donald E. and Meimandi, Kiana Jafari and Barnes, Laura E.},
|
|
|
title = {RMDL: Random Multimodel Deep Learning for Classification},
|
|
|
year = {2018},
|
|
|
isbn = {9781450363549},
|
|
|
publisher = {Association for Computing Machinery},
|
|
|
address = {New York, NY, USA},
|
|
|
Comment url = {https://doi.org/10.1145/3206098.3206111},
|
|
|
doi = {10.1145/3206098.3206111},
|
|
|
booktitle = {Proceedings of the 2nd International Conference on Information System and Data Mining},
|
|
|
pages = {19–28},
|
|
|
numpages = {10},
|
|
|
keywords = {Supervised Learning, Deep Learning, Data Mining, Text Classification, Deep Neural Networks, Image Classification},
|
|
|
location = {Lakeland, FL, USA},
|
|
|
series = {ICISDM '18}
|
|
|
}
|
|
|
|
|
|
@article{random_erasing,
|
|
|
author = {Zhun Zhong and
|
|
|
Liang Zheng and
|
|
|
Guoliang Kang and
|
|
|
Shaozi Li and
|
|
|
Yi Yang},
|
|
|
title = {Random Erasing Data Augmentation},
|
|
|
journal = {CoRR},
|
|
|
volume = {abs/1708.04896},
|
|
|
year = 2017,
|
|
|
Comment url = {http://arxiv.org/abs/1708.04896},
|
|
|
archivePrefix = {arXiv},
|
|
|
eprint = {1708.04896},
|
|
|
timestamp = {Mon, 13 Aug 2018 16:47:52 +0200},
|
|
|
}
|
|
|
|
|
|
@misc{draw_convnet,
|
|
|
title = {Python script for illustrating Convolutional Neural Network (ConvNet)},
|
|
|
howpublished = {\url{https://github.com/gwding/draw_convnet}},
|
|
|
note = {Accessed: 30.08.2020},
|
|
|
author = {Gavin Weiguang Ding},
|
|
|
year = 2018
|
|
|
}
|
|
|
|
|
|
@book{Haykin,
|
|
|
added-at = {2009-06-26T15:25:19.000+0200},
|
|
|
author = {Haykin, Simon},
|
|
|
note = {2nd edition},
|
|
|
publisher = {Prentice Hall},
|
|
|
title = {Neural Networks: {A} Comprehensive Foundation},
|
|
|
year = 1999
|
|
|
}
|
|
|
|
|
|
@book{Goodfellow,
|
|
|
title={Deep Learning},
|
|
|
author={Ian Goodfellow and Yoshua Bengio and Aaron Courville},
|
|
|
publisher={MIT Press},
|
|
|
note={\url{http://www.deeplearningbook.org}},
|
|
|
year=2016
|
|
|
}
|
|
|
|
|
|
@article{ruder,
|
|
|
author = {Sebastian Ruder},
|
|
|
title = {An overview of gradient descent optimization algorithms},
|
|
|
journal = {CoRR},
|
|
|
volume = {abs/1609.04747},
|
|
|
year = {2016},
|
|
|
url = {http://arxiv.org/abs/1609.04747},
|
|
|
archivePrefix = {arXiv},
|
|
|
eprint = {1609.04747},
|
|
|
timestamp = {Mon, 13 Aug 2018 16:48:10 +0200},
|
|
|
biburl = {https://dblp.org/rec/journals/corr/Ruder16.bib},
|
|
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|
|
}
|
|
|
|
|
|
@incollection{goodfellow_gan,
|
|
|
title = {Generative Adversarial Nets},
|
|
|
author = {Goodfellow, Ian and Pouget-Abadie, Jean and Mirza, Mehdi and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and Courville, Aaron and Bengio, Yoshua},
|
|
|
booktitle = {Advances in Neural Information Processing Systems 27},
|
|
|
pages = {2672--2680},
|
|
|
year = {2014},
|
|
|
publisher = {Curran Associates, Inc.},
|
|
|
url = {http://papers.nips.cc/paper/5423-generative-adversarial-nets.pdf}
|
|
|
}
|
|
|
|
|
|
@book{hastie01statisticallearning,
|
|
|
added-at = {2008-05-16T16:17:42.000+0200},
|
|
|
address = {New York, NY, USA},
|
|
|
author = {Hastie, Trevor and Tibshirani, Robert and Friedman, Jerome},
|
|
|
biburl = {https://www.bibsonomy.org/bibtex/2f58afc5c9793fcc8ad8389824e57984c/sb3000},
|
|
|
interhash = {d585aea274f2b9b228fc1629bc273644},
|
|
|
intrahash = {f58afc5c9793fcc8ad8389824e57984c},
|
|
|
keywords = {ml statistics},
|
|
|
publisher = {Springer New York Inc.},
|
|
|
series = {Springer Series in Statistics},
|
|
|
timestamp = {2008-05-16T16:17:43.000+0200},
|
|
|
title = {The Elements of Statistical Learning},
|
|
|
year = 2001
|
|
|
}
|