|
|
@UNPUBLISHED{heiss2019,
|
|
|
series = {arXiv},
|
|
|
author = {Heiss, Jakob and Teichmann, Josef and Wutte, Hanna},
|
|
|
publisher = {Cornell University},
|
|
|
year = {2019},
|
|
|
language = {en},
|
|
|
copyright = {In Copyright - Non-Commercial Use Permitted},
|
|
|
keywords = {early stopping; implicit regularization; machine learning; neural networks; spline; regression; gradient descent; artificial intelligence},
|
|
|
size = {53 p.},
|
|
|
abstract = {Today, various forms of neural networks are trained to perform approximation tasks in many fields. However, the solutions obtained are not fully understood. Empirical results suggest that typical training algorithms favor regularized solutions.These observations motivate us to analyze properties of the solutions found by gradient descent initialized close to zero, that is frequently employed to perform the training task. As a starting point, we consider one dimensional (shallow) ReLU neural networks in which weights are chosen randomly and only the terminal layer is trained. We show that the resulting solution converges to the smooth spline interpolation of the training data as the number of hidden nodes tends to infinity. Moreover, we derive a correspondence between the early stopped gradient descent and the smoothing spline regression. This might give valuable insight on the properties of the solutions obtained using gradient descent methods in general settings.},
|
|
|
DOI = {10.3929/ethz-b-000402003},
|
|
|
title = {How Implicit Regularization of Neural Networks Affects the Learned Function – Part I},
|
|
|
PAGES = {1911.02903}
|
|
|
}
|
|
|
|
|
|
@article{Dropout,
|
|
|
author = {Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov},
|
|
|
title = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting},
|
|
|
journal = {Journal of Machine Learning Research},
|
|
|
year = 2014,
|
|
|
volume = 15,
|
|
|
number = 56,
|
|
|
pages = {1929-1958},
|
|
|
url = {http://jmlr.org/papers/v15/srivastava14a.html}
|
|
|
}
|
|
|
|
|
|
@article{ADADELTA,
|
|
|
author = {Matthew D. Zeiler},
|
|
|
title = {{ADADELTA:} An Adaptive Learning Rate Method},
|
|
|
journal = {CoRR},
|
|
|
volume = {abs/1212.5701},
|
|
|
year = 2012,
|
|
|
url = {http://arxiv.org/abs/1212.5701},
|
|
|
archivePrefix = {arXiv},
|
|
|
eprint = {1212.5701},
|
|
|
timestamp = {Mon, 13 Aug 2018 16:45:57 +0200},
|
|
|
biburl = {https://dblp.org/rec/journals/corr/abs-1212-5701.bib},
|
|
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|
|
}
|
|
|
|
|
|
@article{backprop,
|
|
|
author={Rumelhart, David E.
|
|
|
and Hinton, Geoffrey E.
|
|
|
and Williams, Ronald J.},
|
|
|
title={Learning representations by back-propagating errors},
|
|
|
journal={Nature},
|
|
|
year={1986},
|
|
|
month={Oct},
|
|
|
day={01},
|
|
|
volume={323},
|
|
|
number={6088},
|
|
|
pages={533-536},
|
|
|
abstract={We describe a new learning procedure, back-propagation, for networks of neurone-like units. The procedure repeatedly adjusts the weights of the connections in the network so as to minimize a measure of the difference between the actual output vector of the net and the desired output vector. As a result of the weight adjustments, internal `hidden' units which are not part of the input or output come to represent important features of the task domain, and the regularities in the task are captured by the interactions of these units. The ability to create useful new features distinguishes back-propagation from earlier, simpler methods such as the perceptron-convergence procedure1.},
|
|
|
issn={1476-4687},
|
|
|
doi={10.1038/323533a0},
|
|
|
url={https://doi.org/10.1038/323533a0}
|
|
|
}
|
|
|
|
|
|
@article{MNIST,
|
|
|
added-at = {2010-06-28T21:16:30.000+0200},
|
|
|
author = {LeCun, Yann and Cortes, Corinna},
|
|
|
biburl = {https://www.bibsonomy.org/bibtex/2935bad99fa1f65e03c25b315aa3c1032/mhwombat},
|
|
|
groups = {public},
|
|
|
howpublished = {http://yann.lecun.com/exdb/mnist/},
|
|
|
interhash = {21b9d0558bd66279df9452562df6e6f3},
|
|
|
intrahash = {935bad99fa1f65e03c25b315aa3c1032},
|
|
|
keywords = {MSc _checked character_recognition mnist network neural},
|
|
|
lastchecked = {2016-01-14 14:24:11},
|
|
|
timestamp = {2016-07-12T19:25:30.000+0200},
|
|
|
title = {{MNIST} handwritten digit database},
|
|
|
url = {http://yann.lecun.com/exdb/mnist/},
|
|
|
username = {mhwombat},
|
|
|
year = 2010
|
|
|
}
|
|
|
|
|
|
@article{resnet,
|
|
|
author = {Kaiming He and
|
|
|
Xiangyu Zhang and
|
|
|
Shaoqing Ren and
|
|
|
Jian Sun},
|
|
|
title = {Deep Residual Learning for Image Recognition},
|
|
|
journal = {CoRR},
|
|
|
volume = {abs/1512.03385},
|
|
|
year = 2015,
|
|
|
url = {http://arxiv.org/abs/1512.03385},
|
|
|
archivePrefix = {arXiv},
|
|
|
eprint = {1512.03385},
|
|
|
timestamp = {Wed, 17 Apr 2019 17:23:45 +0200},
|
|
|
biburl = {https://dblp.org/rec/journals/corr/HeZRS15.bib},
|
|
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|
|
}
|
|
|
|
|
|
@book{PRML,
|
|
|
title = {Pattern Recognition and Machine Learning},
|
|
|
author = {Christopher M. Bishop},
|
|
|
publisher = {Springer},
|
|
|
isbn = {9780387310732,0387310738},
|
|
|
year = 2006,
|
|
|
series = {Information science and statistics},
|
|
|
edition = {1st ed. 2006. Corr. 2nd printing},
|
|
|
pages = {209}
|
|
|
}
|
|
|
|
|
|
@article{ADAGRAD,
|
|
|
author = {Duchi, John and Hazan, Elad and Singer, Yoram},
|
|
|
title = {Adaptive Subgradient Methods for Online Learning and Stochastic Optimization},
|
|
|
year = {2011},
|
|
|
issue_date = {2/1/2011},
|
|
|
publisher = {JMLR.org},
|
|
|
volume = {12},
|
|
|
number = {null},
|
|
|
issn = {1532-4435},
|
|
|
journal = {J. Mach. Learn. Res.},
|
|
|
month = jul,
|
|
|
pages = {2121–2159},
|
|
|
numpages = {39}
|
|
|
}
|
|
|
|
|
|
@article{DBLP:journals/corr/DauphinPGCGB14,
|
|
|
author = {Yann N. Dauphin and
|
|
|
Razvan Pascanu and
|
|
|
{\c{C}}aglar G{\"{u}}l{\c{c}}ehre and
|
|
|
Kyunghyun Cho and
|
|
|
Surya Ganguli and
|
|
|
Yoshua Bengio},
|
|
|
title = {Identifying and attacking the saddle point problem in high-dimensional
|
|
|
non-convex optimization},
|
|
|
journal = {CoRR},
|
|
|
volume = {abs/1406.2572},
|
|
|
year = {2014},
|
|
|
url = {http://arxiv.org/abs/1406.2572},
|
|
|
archivePrefix = {arXiv},
|
|
|
eprint = {1406.2572},
|
|
|
timestamp = {Mon, 22 Jul 2019 13:15:46 +0200},
|
|
|
biburl = {https://dblp.org/rec/journals/corr/DauphinPGCGB14.bib},
|
|
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|
|
}
|
|
|
|
|
|
@article{Dropout1,
|
|
|
author = {Geoffrey E. Hinton and
|
|
|
Nitish Srivastava and
|
|
|
Alex Krizhevsky and
|
|
|
Ilya Sutskever and
|
|
|
Ruslan Salakhutdinov},
|
|
|
title = {Improving neural networks by preventing co-adaptation of feature detectors},
|
|
|
journal = {CoRR},
|
|
|
volume = {abs/1207.0580},
|
|
|
year = {2012},
|
|
|
url = {http://arxiv.org/abs/1207.0580},
|
|
|
archivePrefix = {arXiv},
|
|
|
eprint = {1207.0580},
|
|
|
timestamp = {Mon, 13 Aug 2018 16:46:10 +0200},
|
|
|
biburl = {https://dblp.org/rec/journals/corr/abs-1207-0580.bib},
|
|
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|
|
}
|
|
|
|
|
|
@inproceedings{
|
|
|
rADAM,
|
|
|
title={On the Variance of the Adaptive Learning Rate and Beyond},
|
|
|
author={Liyuan Liu and Haoming Jiang and Pengcheng He and Weizhu Chen and Xiaodong Liu and Jianfeng Gao and Jiawei Han},
|
|
|
booktitle={International Conference on Learning Representations},
|
|
|
year={2020},
|
|
|
url={https://openreview.net/forum?id=rkgz2aEKDr}
|
|
|
}
|
|
|
|
|
|
@inproceedings{ADAM,
|
|
|
author = {Diederik P. Kingma and
|
|
|
Jimmy Ba},
|
|
|
editor = {Yoshua Bengio and
|
|
|
Yann LeCun},
|
|
|
title = {Adam: {A} Method for Stochastic Optimization},
|
|
|
booktitle = {3rd International Conference on Learning Representations, {ICLR} 2015,
|
|
|
San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings},
|
|
|
year = {2015},
|
|
|
url = {http://arxiv.org/abs/1412.6980},
|
|
|
timestamp = {Thu, 25 Jul 2019 14:25:37 +0200},
|
|
|
biburl = {https://dblp.org/rec/journals/corr/KingmaB14.bib},
|
|
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|
|
}
|
|
|
|
|
|
@article{transfer_learning,
|
|
|
author = {Zhao,Wei},
|
|
|
title = {Research on the deep learning of the small sample data based on transfer learning},
|
|
|
journal = {AIP Conference Proceedings},
|
|
|
volume = {1864},
|
|
|
number = {1},
|
|
|
pages = {020018},
|
|
|
year = {2017},
|
|
|
doi = {10.1063/1.4992835},
|
|
|
URL = {https://aip.scitation.org/doi/abs/10.1063/1.4992835},
|
|
|
eprint = {https://aip.scitation.org/doi/pdf/10.1063/1.4992835}
|
|
|
}
|
|
|
|
|
|
@article{gan,
|
|
|
title = "GAN-based synthetic medical image augmentation for increased CNN performance in liver lesion classification",
|
|
|
journal = "Neurocomputing",
|
|
|
volume = 321,
|
|
|
pages = "321 - 331",
|
|
|
year = 2018,
|
|
|
issn = "0925-2312",
|
|
|
doi = "https://doi.org/10.1016/j.neucom.2018.09.013",
|
|
|
url = "http://www.sciencedirect.com/science/article/pii/S0925231218310749",
|
|
|
author = "Maayan Frid-Adar and Idit Diamant and Eyal Klang and Michal Amitai and Jacob Goldberger and Hayit Greenspan"
|
|
|
}
|
|
|
|
|
|
@online{fashionMNIST,
|
|
|
author = {Han Xiao and Kashif Rasul and Roland Vollgraf},
|
|
|
title = {Fashion-MNIST: a Novel Image Dataset for Benchmarking Machine Learning Algorithms},
|
|
|
date = {2017-08-28},
|
|
|
year = {2017},
|
|
|
eprintclass = {cs.LG},
|
|
|
eprinttype = {arXiv},
|
|
|
eprint = {cs.LG/1708.07747},
|
|
|
}
|
|
|
|
|
|
@inproceedings{10.1145/3206098.3206111,
|
|
|
author = {Kowsari, Kamran and Heidarysafa, Mojtaba and Brown, Donald E. and Meimandi, Kiana Jafari and Barnes, Laura E.},
|
|
|
title = {RMDL: Random Multimodel Deep Learning for Classification},
|
|
|
year = {2018},
|
|
|
isbn = {9781450363549},
|
|
|
publisher = {Association for Computing Machinery},
|
|
|
address = {New York, NY, USA},
|
|
|
url = {https://doi.org/10.1145/3206098.3206111},
|
|
|
doi = {10.1145/3206098.3206111},
|
|
|
booktitle = {Proceedings of the 2nd International Conference on Information System and Data Mining},
|
|
|
pages = {19–28},
|
|
|
numpages = {10},
|
|
|
keywords = {Supervised Learning, Deep Learning, Data Mining, Text Classification, Deep Neural Networks, Image Classification},
|
|
|
location = {Lakeland, FL, USA},
|
|
|
series = {ICISDM '18}
|
|
|
}
|
|
|
|
|
|
@article{random_erasing,
|
|
|
author = {Zhun Zhong and
|
|
|
Liang Zheng and
|
|
|
Guoliang Kang and
|
|
|
Shaozi Li and
|
|
|
Yi Yang},
|
|
|
title = {Random Erasing Data Augmentation},
|
|
|
journal = {CoRR},
|
|
|
volume = {abs/1708.04896},
|
|
|
year = {2017},
|
|
|
url = {http://arxiv.org/abs/1708.04896},
|
|
|
archivePrefix = {arXiv},
|
|
|
eprint = {1708.04896},
|
|
|
timestamp = {Mon, 13 Aug 2018 16:47:52 +0200},
|
|
|
biburl = {https://dblp.org/rec/journals/corr/abs-1708-04896.bib},
|
|
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|
|
} |