You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

249 lines
10 KiB
BibTeX

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

@UNPUBLISHED{heiss2019,
series = {arXiv},
author = {Heiss, Jakob and Teichmann, Josef and Wutte, Hanna},
publisher = {Cornell University},
year = {2019},
language = {en},
copyright = {In Copyright - Non-Commercial Use Permitted},
keywords = {early stopping; implicit regularization; machine learning; neural networks; spline; regression; gradient descent; artificial intelligence},
size = {53 p.},
abstract = {Today, various forms of neural networks are trained to perform approximation tasks in many fields. However, the solutions obtained are not fully understood. Empirical results suggest that typical training algorithms favor regularized solutions.These observations motivate us to analyze properties of the solutions found by gradient descent initialized close to zero, that is frequently employed to perform the training task. As a starting point, we consider one dimensional (shallow) ReLU neural networks in which weights are chosen randomly and only the terminal layer is trained. We show that the resulting solution converges to the smooth spline interpolation of the training data as the number of hidden nodes tends to infinity. Moreover, we derive a correspondence between the early stopped gradient descent and the smoothing spline regression. This might give valuable insight on the properties of the solutions obtained using gradient descent methods in general settings.},
DOI = {10.3929/ethz-b-000402003},
title = {How Implicit Regularization of Neural Networks Affects the Learned Function Part I},
PAGES = {1911.02903}
}
@article{Dropout,
author = {Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov},
title = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting},
journal = {Journal of Machine Learning Research},
year = 2014,
volume = 15,
number = 56,
pages = {1929-1958},
url = {http://jmlr.org/papers/v15/srivastava14a.html}
}
@article{ADADELTA,
author = {Matthew D. Zeiler},
title = {{ADADELTA:} An Adaptive Learning Rate Method},
journal = {CoRR},
volume = {abs/1212.5701},
year = 2012,
url = {http://arxiv.org/abs/1212.5701},
archivePrefix = {arXiv},
eprint = {1212.5701},
timestamp = {Mon, 13 Aug 2018 16:45:57 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1212-5701.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{backprop,
author={Rumelhart, David E.
and Hinton, Geoffrey E.
and Williams, Ronald J.},
title={Learning representations by back-propagating errors},
journal={Nature},
year={1986},
month={Oct},
day={01},
volume={323},
number={6088},
pages={533-536},
abstract={We describe a new learning procedure, back-propagation, for networks of neurone-like units. The procedure repeatedly adjusts the weights of the connections in the network so as to minimize a measure of the difference between the actual output vector of the net and the desired output vector. As a result of the weight adjustments, internal `hidden' units which are not part of the input or output come to represent important features of the task domain, and the regularities in the task are captured by the interactions of these units. The ability to create useful new features distinguishes back-propagation from earlier, simpler methods such as the perceptron-convergence procedure1.},
issn={1476-4687},
doi={10.1038/323533a0},
url={https://doi.org/10.1038/323533a0}
}
@article{MNIST,
added-at = {2010-06-28T21:16:30.000+0200},
author = {LeCun, Yann and Cortes, Corinna},
biburl = {https://www.bibsonomy.org/bibtex/2935bad99fa1f65e03c25b315aa3c1032/mhwombat},
groups = {public},
howpublished = {http://yann.lecun.com/exdb/mnist/},
interhash = {21b9d0558bd66279df9452562df6e6f3},
intrahash = {935bad99fa1f65e03c25b315aa3c1032},
keywords = {MSc _checked character_recognition mnist network neural},
lastchecked = {2016-01-14 14:24:11},
timestamp = {2016-07-12T19:25:30.000+0200},
title = {{MNIST} handwritten digit database},
url = {http://yann.lecun.com/exdb/mnist/},
username = {mhwombat},
year = 2010
}
@article{resnet,
author = {Kaiming He and
Xiangyu Zhang and
Shaoqing Ren and
Jian Sun},
title = {Deep Residual Learning for Image Recognition},
journal = {CoRR},
volume = {abs/1512.03385},
year = 2015,
url = {http://arxiv.org/abs/1512.03385},
archivePrefix = {arXiv},
eprint = {1512.03385},
timestamp = {Wed, 17 Apr 2019 17:23:45 +0200},
biburl = {https://dblp.org/rec/journals/corr/HeZRS15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@book{PRML,
title = {Pattern Recognition and Machine Learning},
author = {Christopher M. Bishop},
publisher = {Springer},
isbn = {9780387310732,0387310738},
year = 2006,
series = {Information science and statistics},
edition = {1st ed. 2006. Corr. 2nd printing},
pages = {209}
}
@article{ADAGRAD,
author = {Duchi, John and Hazan, Elad and Singer, Yoram},
title = {Adaptive Subgradient Methods for Online Learning and Stochastic Optimization},
year = {2011},
issue_date = {2/1/2011},
publisher = {JMLR.org},
volume = {12},
number = {null},
issn = {1532-4435},
journal = {J. Mach. Learn. Res.},
month = jul,
pages = {21212159},
numpages = {39}
}
@article{DBLP:journals/corr/DauphinPGCGB14,
author = {Yann N. Dauphin and
Razvan Pascanu and
{\c{C}}aglar G{\"{u}}l{\c{c}}ehre and
Kyunghyun Cho and
Surya Ganguli and
Yoshua Bengio},
title = {Identifying and attacking the saddle point problem in high-dimensional
non-convex optimization},
journal = {CoRR},
volume = {abs/1406.2572},
year = {2014},
url = {http://arxiv.org/abs/1406.2572},
archivePrefix = {arXiv},
eprint = {1406.2572},
timestamp = {Mon, 22 Jul 2019 13:15:46 +0200},
biburl = {https://dblp.org/rec/journals/corr/DauphinPGCGB14.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{Dropout1,
author = {Geoffrey E. Hinton and
Nitish Srivastava and
Alex Krizhevsky and
Ilya Sutskever and
Ruslan Salakhutdinov},
title = {Improving neural networks by preventing co-adaptation of feature detectors},
journal = {CoRR},
volume = {abs/1207.0580},
year = {2012},
url = {http://arxiv.org/abs/1207.0580},
archivePrefix = {arXiv},
eprint = {1207.0580},
timestamp = {Mon, 13 Aug 2018 16:46:10 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1207-0580.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{
rADAM,
title={On the Variance of the Adaptive Learning Rate and Beyond},
author={Liyuan Liu and Haoming Jiang and Pengcheng He and Weizhu Chen and Xiaodong Liu and Jianfeng Gao and Jiawei Han},
booktitle={International Conference on Learning Representations},
year={2020},
url={https://openreview.net/forum?id=rkgz2aEKDr}
}
@inproceedings{ADAM,
author = {Diederik P. Kingma and
Jimmy Ba},
editor = {Yoshua Bengio and
Yann LeCun},
title = {Adam: {A} Method for Stochastic Optimization},
booktitle = {3rd International Conference on Learning Representations, {ICLR} 2015,
San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings},
year = {2015},
url = {http://arxiv.org/abs/1412.6980},
timestamp = {Thu, 25 Jul 2019 14:25:37 +0200},
biburl = {https://dblp.org/rec/journals/corr/KingmaB14.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{transfer_learning,
author = {Zhao,Wei},
title = {Research on the deep learning of the small sample data based on transfer learning},
journal = {AIP Conference Proceedings},
volume = {1864},
number = {1},
pages = {020018},
year = {2017},
doi = {10.1063/1.4992835},
URL = {https://aip.scitation.org/doi/abs/10.1063/1.4992835},
eprint = {https://aip.scitation.org/doi/pdf/10.1063/1.4992835}
}
@article{gan,
title = "GAN-based synthetic medical image augmentation for increased CNN performance in liver lesion classification",
journal = "Neurocomputing",
volume = 321,
pages = "321 - 331",
year = 2018,
issn = "0925-2312",
doi = "https://doi.org/10.1016/j.neucom.2018.09.013",
url = "http://www.sciencedirect.com/science/article/pii/S0925231218310749",
author = "Maayan Frid-Adar and Idit Diamant and Eyal Klang and Michal Amitai and Jacob Goldberger and Hayit Greenspan"
}
@online{fashionMNIST,
author = {Han Xiao and Kashif Rasul and Roland Vollgraf},
title = {Fashion-MNIST: a Novel Image Dataset for Benchmarking Machine Learning Algorithms},
date = {2017-08-28},
year = {2017},
eprintclass = {cs.LG},
eprinttype = {arXiv},
eprint = {cs.LG/1708.07747},
}
@inproceedings{10.1145/3206098.3206111,
author = {Kowsari, Kamran and Heidarysafa, Mojtaba and Brown, Donald E. and Meimandi, Kiana Jafari and Barnes, Laura E.},
title = {RMDL: Random Multimodel Deep Learning for Classification},
year = {2018},
isbn = {9781450363549},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3206098.3206111},
doi = {10.1145/3206098.3206111},
booktitle = {Proceedings of the 2nd International Conference on Information System and Data Mining},
pages = {1928},
numpages = {10},
keywords = {Supervised Learning, Deep Learning, Data Mining, Text Classification, Deep Neural Networks, Image Classification},
location = {Lakeland, FL, USA},
series = {ICISDM '18}
}
@article{random_erasing,
author = {Zhun Zhong and
Liang Zheng and
Guoliang Kang and
Shaozi Li and
Yi Yang},
title = {Random Erasing Data Augmentation},
journal = {CoRR},
volume = {abs/1708.04896},
year = {2017},
url = {http://arxiv.org/abs/1708.04896},
archivePrefix = {arXiv},
eprint = {1708.04896},
timestamp = {Mon, 13 Aug 2018 16:47:52 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1708-04896.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}