Compare commits
No commits in common. "master" and "main" have entirely different histories.
5
.gitignore
vendored
5
.gitignore
vendored
@ -8,7 +8,6 @@
|
|||||||
*.bbl
|
*.bbl
|
||||||
*.tdo
|
*.tdo
|
||||||
*.blg
|
*.blg
|
||||||
*.lof
|
|
||||||
TeX/auto/*
|
TeX/auto/*
|
||||||
main-blx.bib
|
main-blx.bib
|
||||||
|
|
||||||
@ -16,7 +15,6 @@ main-blx.bib
|
|||||||
*.tex~
|
*.tex~
|
||||||
*#*.tex*
|
*#*.tex*
|
||||||
*~
|
*~
|
||||||
*#*
|
|
||||||
|
|
||||||
# no pdfs
|
# no pdfs
|
||||||
*.pdf
|
*.pdf
|
||||||
@ -26,7 +24,6 @@ main-blx.bib
|
|||||||
*.png
|
*.png
|
||||||
*.jpg
|
*.jpg
|
||||||
*.xcf
|
*.xcf
|
||||||
*.gif
|
|
||||||
|
|
||||||
# no slurm logs
|
# no slurm logs
|
||||||
*slurm*.out
|
*slurm*.out
|
||||||
@ -34,5 +31,3 @@ main-blx.bib
|
|||||||
# no plot data
|
# no plot data
|
||||||
*.csv
|
*.csv
|
||||||
*.mean
|
*.mean
|
||||||
|
|
||||||
*Plots_*
|
|
@ -1,76 +0,0 @@
|
|||||||
import breeze.stats.distributions.Uniform
|
|
||||||
import breeze.stats.distributions.Gaussian
|
|
||||||
import scala.language.postfixOps
|
|
||||||
|
|
||||||
object Activation {
|
|
||||||
def apply(x: Double): Double = math.max(0, x)
|
|
||||||
|
|
||||||
def d(x: Double): Double = if (x > 0) 1 else 0
|
|
||||||
}
|
|
||||||
|
|
||||||
class RSNN(val n: Int, val gamma: Double = 0.001) {
|
|
||||||
val g = Uniform(-10, 10)
|
|
||||||
val g_1 = Uniform(-5, 5)//scala.math.exp(1))
|
|
||||||
val g_3 = Gaussian(0, 5)
|
|
||||||
|
|
||||||
val xis = g.sample(n)
|
|
||||||
val vs = g_3.sample(n)
|
|
||||||
val bs = xis zip vs map {case(xi, v) => xi * v}
|
|
||||||
//val vs = g_1.sample(n)
|
|
||||||
//val bs = g.sample(n)
|
|
||||||
|
|
||||||
def computeL1(x: Double) = (bs zip vs) map { case (b, v) => Activation(b + v * x) }
|
|
||||||
|
|
||||||
def computeL2(l1: Seq[Double], ws: Seq[Double]): Double =
|
|
||||||
(l1 zip ws) map { case (l, w) => w * l } sum
|
|
||||||
|
|
||||||
def output(ws: Seq[Double])(x: Double): Double = computeL2(computeL1(x), ws)
|
|
||||||
|
|
||||||
def learn(data: Seq[(Double, Double)], ws: Seq[Double], lambda: Double, gamma: Double): Seq[Double] = {
|
|
||||||
// data: N \times 2
|
|
||||||
// ws: n \times 1
|
|
||||||
|
|
||||||
lazy val deltas = data.map {
|
|
||||||
case (x, y) =>
|
|
||||||
val l1 = computeL1(x) // n
|
|
||||||
val out = computeL2(l1, ws) // 1
|
|
||||||
(l1 zip ws) map {case (l1, w) => (l1 * 2 * (out - y) + lambda * 2 * w) * gamma * -1} // n
|
|
||||||
}
|
|
||||||
// deltas: N × n
|
|
||||||
|
|
||||||
deltas.foldRight(ws)(
|
|
||||||
(delta, ws) => // delta: n
|
|
||||||
ws zip (delta) map { case (w, d) => w + d } // n
|
|
||||||
)// map (w => w - lambda * gamma * 2 * w)
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
def train(data: Seq[(Double, Double)], iter: Int, lambda: Double, gamma: Double = gamma): (Seq[Double], Double => Double)= {
|
|
||||||
val ws = (1 to iter).foldRight((1 to n).map(_ => 0.0) :Seq[Double])((i, w) => {
|
|
||||||
println(s"Training iteration $i")
|
|
||||||
println(w.sum/w.length)
|
|
||||||
learn(data, w, lambda, gamma / 10)
|
|
||||||
})
|
|
||||||
(ws, output(ws))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
object Main {
|
|
||||||
|
|
||||||
def main(args: Array[String]): Unit = {
|
|
||||||
val nn = new RSNN(10, gamma = 0.0001)
|
|
||||||
val data = (1 to 100) map (_ * 0.01) map (t => (t, math.sin(t)))
|
|
||||||
val (ws, evaluate) = nn.train(data, iter = 1000, lambda = 0.8)
|
|
||||||
|
|
||||||
val results = data.map(_._1).map(evaluate(_))
|
|
||||||
data zip results foreach {
|
|
||||||
println(_)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
object EqSeq {
|
|
||||||
def apply(left: Double, right: Double, steps: Int): Seq[Double] =
|
|
||||||
(0 to steps) map (_ * (right - left) / steps + left)
|
|
||||||
}
|
|
@ -149,8 +149,8 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
############################
|
############################
|
||||||
# in between layers
|
# in between layers
|
||||||
start_ratio_list = [[0.4, 0.5], [0.4, 0.8], [0.4,0.5], [0.4, 0.8], [0.4, 0.5],[0.4,0.8]]
|
start_ratio_list = [[0.4, 0.5], [0.4, 0.8], [0.4,0.8], [0.4, 0.5], [0.4, 0.8],[0.4,0.8]]
|
||||||
end_ratio_list = [[0.4, 0.5], [0.4, 0.8], [0.4,0.5], [0.4, 0.8], [0.4, 0.5],[0.4,0.8]]
|
end_ratio_list = [[0.4, 0.5], [0.4, 0.8], [0.4,0.8], [0.4, 0.5], [0.4, 0.8],[0.4,0.8]]
|
||||||
patch_size_list = [(3, 3), (3, 3), (2, 2), (3,3), (3, 3), (2, 2)]
|
patch_size_list = [(3, 3), (3, 3), (2, 2), (3,3), (3, 3), (2, 2)]
|
||||||
ind_bgn_list = range(len(patch_size_list))
|
ind_bgn_list = range(len(patch_size_list))
|
||||||
text_list = ['Conv.', 'Conv.', 'Max-pool.', 'Conv.', 'Conv.', 'Max-pool.']
|
text_list = ['Conv.', 'Conv.', 'Max-pool.', 'Conv.', 'Conv.', 'Max-pool.']
|
||||||
@ -211,7 +211,7 @@ if __name__ == '__main__':
|
|||||||
# plt.show()
|
# plt.show()
|
||||||
fig.set_size_inches(8, 2.5)
|
fig.set_size_inches(8, 2.5)
|
||||||
|
|
||||||
fig_dir = '/home/tobi/Masterarbeit/TeX/Figures/Data/'
|
fig_dir = '/home/tobi/Masterarbeit/TeX/Plots/Data/'
|
||||||
fig_ext = '.pdf'
|
fig_ext = '.pdf'
|
||||||
fig.savefig(os.path.join(fig_dir, 'cnn_fashion_fig1' + fig_ext),
|
fig.savefig(os.path.join(fig_dir, 'cnn_fashion_fig' + fig_ext),
|
||||||
bbox_inches='tight', pad_inches=0)
|
bbox_inches='tight', pad_inches=0)
|
||||||
|
@ -1,52 +0,0 @@
|
|||||||
import breeze.plot._
|
|
||||||
import breeze.plot.DomainFunction._
|
|
||||||
import breeze.linalg._
|
|
||||||
import breeze.stats.distributions.Gaussian
|
|
||||||
|
|
||||||
val nn = new RSNN(5000, 0.0000001)
|
|
||||||
|
|
||||||
val g = Gaussian(0, 0.3)
|
|
||||||
|
|
||||||
//val data = EqSeq(-math.Pi, math.Pi, 15) map (t => (t, math.sin(t)+ g.sample(1).last))
|
|
||||||
val (ws, evaluate) = nn.train(data, iter = 100000, lambda = (1.0/20) / 5 * (nn.n * 8) * 1)
|
|
||||||
|
|
||||||
val f = Figure()
|
|
||||||
val p = f.subplot(0)
|
|
||||||
val x = linspace(-5, 5)
|
|
||||||
val y = x.map(evaluate)
|
|
||||||
//print_data(nn, x, y, 3)
|
|
||||||
p += plot(x, y)
|
|
||||||
p += scatter(data.map(_._1), data.map(_._2), x => 0.1)
|
|
||||||
f.saveas("lines.png")
|
|
||||||
|
|
||||||
|
|
||||||
val x_i = data map {case (x,y) => x}
|
|
||||||
val y_i = data map {case (x,y) => y}
|
|
||||||
|
|
||||||
def print_data(nn: RSNN, x: DenseVector[Double], y: DenseVector[Double], tlambda: Double): Unit = {
|
|
||||||
val n = nn.n
|
|
||||||
reflect.io.File("C:/Users/tobia/Documents/Studium/Masterarbeit/Outputs/scala_out_d_1.csv").appendAll(s"x_n_$n"+s"_tl_$tlambda;" + x.toArray.mkString(";") + "\n")
|
|
||||||
reflect.io.File("C:/Users/tobia/Documents/Studium/Masterarbeit/Outputs/scala_out_d_1.csv").appendAll(s"y_n_$n"+s"_tl_$tlambda;" + y.toArray.mkString(";") + "\n")
|
|
||||||
}
|
|
||||||
reflect.io.File("C:/Users/tobia/Documents/Studium/Masterarbeit/Outputs/data_sin_d.csv").appendAll(x_i.mkString(";") + "\n")
|
|
||||||
reflect.io.File("C:/Users/tobia/Documents/Studium/Masterarbeit/Outputs/data_sin_d.csv").appendAll(y_i.mkString(";") + "\n")
|
|
||||||
|
|
||||||
|
|
||||||
reflect.io.File("C:/Users/tobia/Documents/Studium/Masterarbeit/Outputs/vals1.csv").appendAll(x.toArray.mkString(";") + "\n")
|
|
||||||
reflect.io.File("C:/Users/tobia/Documents/Studium/Masterarbeit/Outputs/vals1.csv").appendAll(y.toArray.mkString(";") + "\n")
|
|
||||||
|
|
||||||
for(j <- List(0.1, 1, 3)) {
|
|
||||||
for (i <- 3 until 4) {
|
|
||||||
val nn = new RSNN((5 * math.pow(10, i)).asInstanceOf[Int], 0.0000001)
|
|
||||||
val (ws, evaluate) = nn.train(data, iter = 100000, lambda = (1.0 / 20) / 5 * (nn.n * 8) * j)
|
|
||||||
|
|
||||||
val x = linspace(-5, 5)
|
|
||||||
val y = x.map(evaluate)
|
|
||||||
print_data(nn, x, y, j)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
val x_i = Seq(-3.141592653589793,-2.722713633111154,-2.303834612632515,-1.8849555921538759,-1.4660765716752369,-1.0471975511965979,-0.6283185307179586,-0.2094395102393194,0.2094395102393194,0.6283185307179586,1.0471975511965974,1.4660765716752362,1.8849555921538759,2.3038346126325155,2.7227136331111543,3.1415926535897922)
|
|
||||||
val y_i = Seq(0.0802212608585366,-0.3759376368887911,-1.3264180339054117,-0.8971334213504949,-0.7724344034354425,-0.9501497164520739,-0.6224628757084738,-0.35622668982623207,-0.18377660088356823,0.7836770998126841,0.5874762732054489,1.0696991264956026,1.1297065441952743,0.7587275382323738,-0.030547103790458163,0.044327111895927106)
|
|
||||||
|
|
||||||
val data = x_i zip y_i
|
|
@ -1,621 +0,0 @@
|
|||||||
\section{Implementations}
|
|
||||||
In this section the implementations of the models used are given.
|
|
||||||
The randomized shallow neural network used in Section~\ref{sec:conv} is
|
|
||||||
implemented in Scala. No pre-existing frameworks were used to ensure
|
|
||||||
the implementation was according to the definitions used in Theorem~\ref{theo:main1}.
|
|
||||||
|
|
||||||
The neural networks used in Section~\ref{sec:cnn} are implemented in Python using
|
|
||||||
the Keras framework given in TensorFlow. TensorFlow is a library
|
|
||||||
containing highly efficient GPU implementations of a wide variety of
|
|
||||||
tensor operations and algorithms
|
|
||||||
for training neural networks.% (computing derivatives, updating parameters).
|
|
||||||
|
|
||||||
\vspace*{-0.5cm}
|
|
||||||
\begin{lstfloat}
|
|
||||||
\begin{lstlisting}[language=iPython]
|
|
||||||
import breeze.stats.distributions.Uniform
|
|
||||||
import breeze.stats.distributions.Gaussian
|
|
||||||
import scala.language.postfixOps
|
|
||||||
|
|
||||||
object Activation {
|
|
||||||
def apply(x: Double): Double = math.max(0, x)
|
|
||||||
|
|
||||||
def d(x: Double): Double = if (x > 0) 1 else 0
|
|
||||||
}
|
|
||||||
|
|
||||||
class RSNN(val n: Int, val gamma: Double = 0.001) {
|
|
||||||
val g_unif = Uniform(-10, 10)
|
|
||||||
val g_gauss = Gaussian(0, 5)
|
|
||||||
|
|
||||||
val xis = g_unif.sample(n)
|
|
||||||
val vs = g_gauss.sample(n)
|
|
||||||
val bs = xis zip vs map {case(xi, v) => xi * v}
|
|
||||||
|
|
||||||
def computeL1(x: Double) = (bs zip vs) map {
|
|
||||||
case (b, v) => Activation(b + v * x) }
|
|
||||||
|
|
||||||
def computeL2(l1: Seq[Double], ws: Seq[Double]): Double =
|
|
||||||
(l1 zip ws) map { case (l, w) => w * l } sum
|
|
||||||
|
|
||||||
def output(ws: Seq[Double])(x: Double): Double =
|
|
||||||
computeL2(computeL1(x), ws)
|
|
||||||
|
|
||||||
def learn(data: Seq[(Double, Double)], ws: Seq[Double],
|
|
||||||
lamb: Double, gamma: Double): Seq[Double] = {
|
|
||||||
|
|
||||||
lazy val deltas = data.map {
|
|
||||||
case (x, y) =>
|
|
||||||
val l1 = computeL1(x)
|
|
||||||
val out = computeL2(l1, ws)
|
|
||||||
(l1 zip ws) map {case (l1, w) => (l1 * 2 * (out - y) +
|
|
||||||
lam * 2 * w) * gamma * -1}
|
|
||||||
}
|
|
||||||
|
|
||||||
deltas.foldRight(ws)(
|
|
||||||
(delta, ws) => ws zip (delta) map { case (w, d) => w + d })
|
|
||||||
}
|
|
||||||
|
|
||||||
def train(data: Seq[(Double, Double)], iter: Int, lam: Double,
|
|
||||||
gamma: Double = gamma): (Seq[Double], Double => Double) = {
|
|
||||||
|
|
||||||
val ws = (1 to iter).foldRight((1 to n).map(
|
|
||||||
_ => 0.0) :Seq[Double])((i, w) => {
|
|
||||||
println(s"Training iteration $i")
|
|
||||||
println(w.sum/w.length)
|
|
||||||
learn(data, w, lam, gamma / 10)
|
|
||||||
})
|
|
||||||
(ws, output(ws))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
\end{lstlisting}
|
|
||||||
\caption{Scala code used to build and train the ridge penalized
|
|
||||||
randomized shallow neural network in Section~\ref{sec:rsnn_sim}.}
|
|
||||||
% The parameter \textit{lam}
|
|
||||||
% in the train function represents the $\lambda$ parameter in the error
|
|
||||||
% function. The parameters \textit{n} and \textit{gamma} set the number
|
|
||||||
% of hidden nodes and the stepsize for training.}
|
|
||||||
\label{lst:rsnn}
|
|
||||||
\end{lstfloat}
|
|
||||||
\clearpage
|
|
||||||
\begin{lstfloat}
|
|
||||||
\begin{lstlisting}[language=iPython]
|
|
||||||
import tensorflow as tf
|
|
||||||
import numpy as np
|
|
||||||
from tensorflow.keras.callbacks import CSVLogger
|
|
||||||
from tensorflow.keras.preprocessing.image import ImageDataGenerator
|
|
||||||
|
|
||||||
mnist = tf.keras.datasets.mnist
|
|
||||||
|
|
||||||
(x_train, y_train), (x_test, y_test) = mnist.load_data()
|
|
||||||
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
|
|
||||||
x_train = x_train / 255.0
|
|
||||||
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
|
|
||||||
x_test = x_test / 255.0
|
|
||||||
|
|
||||||
y_train = tf.keras.utils.to_categorical(y_train)
|
|
||||||
y_test = tf.keras.utils.to_categorical(y_test)
|
|
||||||
|
|
||||||
model = tf.keras.models.Sequential()
|
|
||||||
model.add(tf.keras.layers.Conv2D(24,kernel_size=5,padding='same',
|
|
||||||
activation='relu',input_shape=(28,28,1)))
|
|
||||||
model.add(tf.keras.layers.MaxPool2D())
|
|
||||||
model.add(tf.keras.layers.Conv2D(64,kernel_size=5,padding='same',
|
|
||||||
activation='relu'))
|
|
||||||
model.add(tf.keras.layers.MaxPool2D(padding='same'))
|
|
||||||
model.add(tf.keras.layers.Flatten())
|
|
||||||
model.add(tf.keras.layers.Dense(256, activation='relu'))
|
|
||||||
model.add(tf.keras.layers.Dropout(0.2))
|
|
||||||
model.add(tf.keras.layers.Dense(10, activation='softmax'))
|
|
||||||
model.compile(optimizer='adam', loss="categorical_crossentropy",
|
|
||||||
metrics=["accuracy"])
|
|
||||||
|
|
||||||
datagen = ImageDataGenerator(
|
|
||||||
rotation_range = 30,
|
|
||||||
zoom_range = 0.15,
|
|
||||||
width_shift_range=2,
|
|
||||||
height_shift_range=2,
|
|
||||||
shear_range = 1)
|
|
||||||
|
|
||||||
csv_logger = CSVLogger(<Target File>)
|
|
||||||
|
|
||||||
history = model.fit(datagen.flow(x_train, y_train, batch_size=50),
|
|
||||||
validation_data=(x_test, y_test),
|
|
||||||
epochs=125, callbacks=[csv_logger],
|
|
||||||
steps_per_epoch = x_train.shape[0]//50)
|
|
||||||
|
|
||||||
\end{lstlisting}
|
|
||||||
\caption{Python code used to build the network modeling the MNIST
|
|
||||||
handwritten digits data set.}
|
|
||||||
\label{lst:handwriting}
|
|
||||||
\end{lstfloat}
|
|
||||||
\clearpage
|
|
||||||
\begin{lstfloat}
|
|
||||||
\begin{lstlisting}[language=iPython]
|
|
||||||
import tensorflow as tf
|
|
||||||
import numpy as np
|
|
||||||
from tensorflow.keras.callbacks import CSVLogger
|
|
||||||
from tensorflow.keras.preprocessing.image import ImageDataGenerator
|
|
||||||
mnist = tf.keras.datasets.fashion_mnist
|
|
||||||
|
|
||||||
(x_train, y_train), (x_test, y_test) = mnist.load_data()
|
|
||||||
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
|
|
||||||
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
|
|
||||||
x_train, x_test = x_train / 255.0, x_test / 255.0
|
|
||||||
|
|
||||||
y_train = tf.keras.utils.to_categorical(y_train)
|
|
||||||
y_test = tf.keras.utils.to_categorical(y_test)
|
|
||||||
|
|
||||||
model = tf.keras.Sequential()
|
|
||||||
model.add(tf.keras.layers.Conv2D(filters = 32, kernel_size = (3, 3),
|
|
||||||
activation='relu', input_shape = (28, 28, 1), padding='same'))
|
|
||||||
model.add(tf.keras.layers.Conv2D(filters = 32, kernel_size = (2, 2), activation='relu', padding = 'same'))
|
|
||||||
model.add(tf.keras.layers.MaxPool2D(strides=(2,2)))
|
|
||||||
model.add(tf.keras.layers.Conv2D(filters = 64, kernel_size = (3, 3), activation='relu', padding='same'))
|
|
||||||
model.add(tf.keras.layers.Conv2D(filters = 64, kernel_size = (3, 3), activation='relu', padding='same'))
|
|
||||||
model.add(tf.keras.layers.MaxPool2D(strides=(2,2)))
|
|
||||||
model.add(tf.keras.layers.Flatten())
|
|
||||||
model.add(tf.keras.layers.Dense(256, activation='relu'))
|
|
||||||
model.add(tf.keras.layers.Dropout(0.2))
|
|
||||||
model.add(tf.keras.layers.Dense(10, activation='softmax'))
|
|
||||||
|
|
||||||
model.compile(optimizer=tf.keras.optimizers.Adam(lr = 1e-3), loss="categorical_crossentropy", metrics=["accuracy"])
|
|
||||||
|
|
||||||
datagen = ImageDataGenerator(
|
|
||||||
rotation_range = 6,
|
|
||||||
zoom_range = 0.15,
|
|
||||||
width_shift_range=2,
|
|
||||||
height_shift_range=2,
|
|
||||||
shear_range = 0.15,
|
|
||||||
fill_mode = 'constant',
|
|
||||||
cval = 0)
|
|
||||||
|
|
||||||
csv_logger = CSVLogger(<Target File>)
|
|
||||||
|
|
||||||
history = model.fit(datagen.flow(x_train, y_train, batch_size=30),
|
|
||||||
steps_per_epoch=x_train.shape[0]//30,
|
|
||||||
validation_data=(x_test, y_test),
|
|
||||||
epochs=125, callbacks=[csv_logger],
|
|
||||||
shuffle=True)
|
|
||||||
|
|
||||||
\end{lstlisting}
|
|
||||||
\caption[Python Code for fashion MNIST]{Python code
|
|
||||||
used to build the network modeling the fashion MNIST data set.}
|
|
||||||
\label{lst:fashion}
|
|
||||||
\end{lstfloat}
|
|
||||||
\clearpage
|
|
||||||
\begin{lstfloat}
|
|
||||||
\begin{lstlisting}[language=iPython]
|
|
||||||
def get_random_sample(a, b, number_of_samples=10):
|
|
||||||
x = []
|
|
||||||
y = []
|
|
||||||
for category_number in range(0,10):
|
|
||||||
# get all samples of a category
|
|
||||||
train_data_category = a[b==category_number]
|
|
||||||
# pick a number of random samples from the category
|
|
||||||
train_data_category = train_data_category[np.random.randint(
|
|
||||||
train_data_category.shape[0], size=number_of_samples), :]
|
|
||||||
x.extend(train_data_category)
|
|
||||||
y.append([category_number]*number_of_samples)
|
|
||||||
|
|
||||||
return (np.asarray(x).reshape(-1, 28, 28, 1),
|
|
||||||
np.asarray(y).reshape(10*number_of_samples,1))
|
|
||||||
\end{lstlisting}
|
|
||||||
\caption{Python code used to generate the data sets containing a
|
|
||||||
certain amount of random data points per class.}
|
|
||||||
\end{lstfloat}
|
|
||||||
|
|
||||||
\section{Additional Comparisons}
|
|
||||||
\label{app:comp}
|
|
||||||
In this section, comparisons of cross entropy loss and training
|
|
||||||
accuracy for the models trained in Section~\ref{sec:smalldata} are given.
|
|
||||||
\begin{figure}[h]
|
|
||||||
\centering
|
|
||||||
\small
|
|
||||||
\begin{subfigure}[h]{\textwidth}
|
|
||||||
\begin{tikzpicture}
|
|
||||||
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
|
||||||
/pgf/number format/precision=3},tick style = {draw = none}, width = \textwidth,
|
|
||||||
height = 0.4\textwidth, legend style={at={(0.9825,0.0175)},anchor=south east},
|
|
||||||
xlabel = {Epoch},ylabel = {Test Loss}, cycle
|
|
||||||
list/Dark2, every axis plot/.append style={line width
|
|
||||||
=1.25pt}]
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_1.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_dropout_02_1.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_datagen_1.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_datagen_dropout_02_1.mean};
|
|
||||||
|
|
||||||
|
|
||||||
\addlegendentry{\footnotesize{Default}}
|
|
||||||
\addlegendentry{\footnotesize{D. 0.2}}
|
|
||||||
\addlegendentry{\footnotesize{G.}}
|
|
||||||
\addlegendentry{\footnotesize{G. + D. 0.2}}
|
|
||||||
\addlegendentry{\footnotesize{D. 0.4}}
|
|
||||||
\addlegendentry{\footnotesize{Default}}
|
|
||||||
\end{axis}
|
|
||||||
\end{tikzpicture}
|
|
||||||
\caption{1 Sample per Class}
|
|
||||||
\vspace{0.25cm}
|
|
||||||
\end{subfigure}
|
|
||||||
\begin{subfigure}[h]{\textwidth}
|
|
||||||
\begin{tikzpicture}
|
|
||||||
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
|
||||||
/pgf/number format/precision=3},tick style = {draw = none}, width = \textwidth,
|
|
||||||
height = 0.4\textwidth, legend style={at={(0.9825,0.0175)},anchor=south east},
|
|
||||||
xlabel = {Epoch},ylabel = {Test Loss}, cycle
|
|
||||||
list/Dark2, every axis plot/.append style={line width
|
|
||||||
=1.25pt}]
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_dropout_00_10.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_dropout_02_10.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_datagen_dropout_00_10.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_datagen_dropout_02_10.mean};
|
|
||||||
|
|
||||||
|
|
||||||
\addlegendentry{\footnotesize{Default.}}
|
|
||||||
\addlegendentry{\footnotesize{D. 0.2}}
|
|
||||||
\addlegendentry{\footnotesize{G.}}
|
|
||||||
\addlegendentry{\footnotesize{G + D. 0.2}}
|
|
||||||
\end{axis}
|
|
||||||
\end{tikzpicture}
|
|
||||||
\caption{10 Samples per Class}
|
|
||||||
\end{subfigure}
|
|
||||||
\begin{subfigure}[h]{\textwidth}
|
|
||||||
\begin{tikzpicture}
|
|
||||||
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
|
||||||
/pgf/number format/precision=3},tick style = {draw = none}, width = 0.9875\textwidth,
|
|
||||||
height = 0.4\textwidth, legend style={at={(0.9825,0.0175)},anchor=south east},
|
|
||||||
xlabel = {Epoch}, ylabel = {Test Loss}, cycle
|
|
||||||
list/Dark2, every axis plot/.append style={line width
|
|
||||||
=1.25pt}]
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_dropout_00_100.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_dropout_02_100.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_datagen_dropout_00_100.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_datagen_dropout_02_100.mean};
|
|
||||||
|
|
||||||
\addlegendentry{\footnotesize{Default.}}
|
|
||||||
\addlegendentry{\footnotesize{D. 0.2}}
|
|
||||||
\addlegendentry{\footnotesize{G.}}
|
|
||||||
\addlegendentry{\footnotesize{G + D. 0.2}}
|
|
||||||
\end{axis}
|
|
||||||
\end{tikzpicture}
|
|
||||||
\caption{100 Samples per Class}
|
|
||||||
\vspace{.25cm}
|
|
||||||
\end{subfigure}
|
|
||||||
\caption[Mean Test Loss for Subsets of MNIST Handwritten
|
|
||||||
Digits]{Mean test cross entropy loss of the models fitting the
|
|
||||||
sampled subsets of MNIST
|
|
||||||
handwritten digits over the 125 epochs of training.}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
\begin{figure}[h]
|
|
||||||
\centering
|
|
||||||
\small
|
|
||||||
\begin{subfigure}[h]{\textwidth}
|
|
||||||
\begin{tikzpicture}
|
|
||||||
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
|
||||||
/pgf/number format/precision=3},tick style =
|
|
||||||
{draw = none}, width = \textwidth,
|
|
||||||
height = 0.4\textwidth, legend style={at={(0.9825,0.0175)},anchor=south east},
|
|
||||||
xlabel = {Epoch},ylabel = {Test Loss}, cycle
|
|
||||||
list/Dark2, every axis plot/.append style={line width
|
|
||||||
=1.25pt}]
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_dropout_0_1.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_dropout_2_1.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_datagen_dropout_0_1.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_datagen_dropout_2_1.mean};
|
|
||||||
|
|
||||||
|
|
||||||
\addlegendentry{\footnotesize{Default}}
|
|
||||||
\addlegendentry{\footnotesize{D. 0.2}}
|
|
||||||
\addlegendentry{\footnotesize{G.}}
|
|
||||||
\addlegendentry{\footnotesize{G. + D. 0.2}}
|
|
||||||
\addlegendentry{\footnotesize{D. 0.4}}
|
|
||||||
\end{axis}
|
|
||||||
\end{tikzpicture}
|
|
||||||
\caption{1 Sample per Class}
|
|
||||||
\vspace{0.25cm}
|
|
||||||
\end{subfigure}
|
|
||||||
\begin{subfigure}[h]{\textwidth}
|
|
||||||
\begin{tikzpicture}
|
|
||||||
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
|
||||||
/pgf/number format/precision=3},tick style = {draw = none}, width = \textwidth,
|
|
||||||
height = 0.4\textwidth, legend style={at={(0.9825,0.0175)},anchor=south east},
|
|
||||||
xlabel = {Epoch},ylabel = {Test Loss}, cycle
|
|
||||||
list/Dark2, every axis plot/.append style={line width
|
|
||||||
=1.25pt}, ymin = {0.62}]
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_dropout_0_10.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_dropout_2_10.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_datagen_dropout_0_10.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_datagen_dropout_2_10.mean};
|
|
||||||
|
|
||||||
|
|
||||||
\addlegendentry{\footnotesize{Default.}}
|
|
||||||
\addlegendentry{\footnotesize{D. 0.2}}
|
|
||||||
\addlegendentry{\footnotesize{G.}}
|
|
||||||
\addlegendentry{\footnotesize{G + D. 0.2}}
|
|
||||||
\end{axis}
|
|
||||||
\end{tikzpicture}
|
|
||||||
\caption{10 Samples per Class}
|
|
||||||
\end{subfigure}
|
|
||||||
\begin{subfigure}[h]{\textwidth}
|
|
||||||
\begin{tikzpicture}
|
|
||||||
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
|
||||||
/pgf/number format/precision=3},tick style = {draw = none}, width = 0.9875\textwidth,
|
|
||||||
height = 0.4\textwidth, legend style={at={(0.9825,0.0175)},anchor=south east},
|
|
||||||
xlabel = {Epoch}, ylabel = {Test Loss}, cycle
|
|
||||||
list/Dark2, every axis plot/.append style={line width
|
|
||||||
=1.25pt}]
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_dropout_0_100.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_dropout_2_100.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_datagen_dropout_0_100.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_loss, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_datagen_dropout_2_100.mean};
|
|
||||||
|
|
||||||
\addlegendentry{\footnotesize{Default.}}
|
|
||||||
\addlegendentry{\footnotesize{D. 0.2}}
|
|
||||||
\addlegendentry{\footnotesize{G.}}
|
|
||||||
\addlegendentry{\footnotesize{G + D. 0.2}}
|
|
||||||
\end{axis}
|
|
||||||
\end{tikzpicture}
|
|
||||||
\caption{100 Samples per Class}
|
|
||||||
\vspace{.25cm}
|
|
||||||
\end{subfigure}
|
|
||||||
\caption[Mean Test Accuracies for Subsets of Fashion MNIST]{Mean
|
|
||||||
test cross entropy loss of the models fitting the sampled subsets
|
|
||||||
of fashion MNIST
|
|
||||||
over the 125 epochs of training.}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
\begin{figure}[h]
|
|
||||||
\centering
|
|
||||||
\small
|
|
||||||
\begin{subfigure}[h]{\textwidth}
|
|
||||||
\begin{tikzpicture}
|
|
||||||
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
|
||||||
/pgf/number format/precision=3},tick style = {draw = none}, width = \textwidth,
|
|
||||||
height = 0.4\textwidth, legend style={at={(0.9825,0.0175)},anchor=south east},
|
|
||||||
xlabel = {Epoch},ylabel = {Training Accuracy}, cycle
|
|
||||||
list/Dark2, every axis plot/.append style={line width
|
|
||||||
=1.25pt}]
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_1.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_dropout_02_1.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_datagen_1.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_datagen_dropout_02_1.mean};
|
|
||||||
|
|
||||||
|
|
||||||
\addlegendentry{\footnotesize{Default}}
|
|
||||||
\addlegendentry{\footnotesize{D. 0.2}}
|
|
||||||
\addlegendentry{\footnotesize{G.}}
|
|
||||||
\addlegendentry{\footnotesize{G. + D. 0.2}}
|
|
||||||
\addlegendentry{\footnotesize{D. 0.4}}
|
|
||||||
\addlegendentry{\footnotesize{Default}}
|
|
||||||
\end{axis}
|
|
||||||
\end{tikzpicture}
|
|
||||||
\caption{1 Sample per Class}
|
|
||||||
\vspace{0.25cm}
|
|
||||||
\end{subfigure}
|
|
||||||
\begin{subfigure}[h]{\textwidth}
|
|
||||||
\begin{tikzpicture}
|
|
||||||
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
|
||||||
/pgf/number format/precision=3},tick style = {draw = none}, width = \textwidth,
|
|
||||||
height = 0.4\textwidth, legend style={at={(0.9825,0.0175)},anchor=south east},
|
|
||||||
xlabel = {Epoch},ylabel = {Test Accuracy}, cycle
|
|
||||||
list/Dark2, every axis plot/.append style={line width
|
|
||||||
=1.25pt}]
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_dropout_00_10.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_dropout_02_10.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_datagen_dropout_00_10.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_datagen_dropout_02_10.mean};
|
|
||||||
|
|
||||||
|
|
||||||
\addlegendentry{\footnotesize{Default.}}
|
|
||||||
\addlegendentry{\footnotesize{D. 0.2}}
|
|
||||||
\addlegendentry{\footnotesize{G.}}
|
|
||||||
\addlegendentry{\footnotesize{G + D. 0.2}}
|
|
||||||
\end{axis}
|
|
||||||
\end{tikzpicture}
|
|
||||||
\caption{10 Samples per Class}
|
|
||||||
\end{subfigure}
|
|
||||||
\begin{subfigure}[h]{\textwidth}
|
|
||||||
\begin{tikzpicture}
|
|
||||||
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
|
||||||
/pgf/number format/precision=3},tick style = {draw = none}, width = 0.9875\textwidth,
|
|
||||||
height = 0.4\textwidth, legend style={at={(0.9825,0.0175)},anchor=south east},
|
|
||||||
xlabel = {Epoch}, ylabel = {Training Accuracy}, cycle
|
|
||||||
list/Dark2, every axis plot/.append style={line width
|
|
||||||
=1.25pt}, ymin = {0.92}]
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_dropout_00_100.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_dropout_02_100.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_datagen_dropout_00_100.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/adam_datagen_dropout_02_100.mean};
|
|
||||||
|
|
||||||
\addlegendentry{\footnotesize{Default.}}
|
|
||||||
\addlegendentry{\footnotesize{D. 0.2}}
|
|
||||||
\addlegendentry{\footnotesize{G.}}
|
|
||||||
\addlegendentry{\footnotesize{G + D. 0.2}}
|
|
||||||
\end{axis}
|
|
||||||
\end{tikzpicture}
|
|
||||||
\caption{100 Samples per Class}
|
|
||||||
\vspace{.25cm}
|
|
||||||
\end{subfigure}
|
|
||||||
\caption[Mean Training Accuracies for Subsets of MNIST Handwritten
|
|
||||||
Digits]{Mean training accuracies of the models fitting the sampled
|
|
||||||
subsets of MNIST
|
|
||||||
handwritten digits over the 125 epochs of training.}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
\begin{figure}[h]
|
|
||||||
\centering
|
|
||||||
\small
|
|
||||||
\begin{subfigure}[h]{\textwidth}
|
|
||||||
\begin{tikzpicture}
|
|
||||||
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
|
||||||
/pgf/number format/precision=3},tick style =
|
|
||||||
{draw = none}, width = \textwidth,
|
|
||||||
height = 0.4\textwidth, legend style={at={(0.9825,0.0175)},anchor=south east},
|
|
||||||
xlabel = {Epoch},ylabel = {Training Accuracy}, cycle
|
|
||||||
list/Dark2, every axis plot/.append style={line width
|
|
||||||
=1.25pt}]
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_dropout_0_1.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_dropout_2_1.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_datagen_dropout_0_1.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_datagen_dropout_2_1.mean};
|
|
||||||
|
|
||||||
|
|
||||||
\addlegendentry{\footnotesize{Default}}
|
|
||||||
\addlegendentry{\footnotesize{D. 0.2}}
|
|
||||||
\addlegendentry{\footnotesize{G.}}
|
|
||||||
\addlegendentry{\footnotesize{G. + D. 0.2}}
|
|
||||||
\addlegendentry{\footnotesize{D. 0.4}}
|
|
||||||
\end{axis}
|
|
||||||
\end{tikzpicture}
|
|
||||||
\caption{1 Sample per Class}
|
|
||||||
\vspace{0.25cm}
|
|
||||||
\end{subfigure}
|
|
||||||
\begin{subfigure}[h]{\textwidth}
|
|
||||||
\begin{tikzpicture}
|
|
||||||
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
|
||||||
/pgf/number format/precision=3},tick style = {draw = none}, width = \textwidth,
|
|
||||||
height = 0.4\textwidth, legend style={at={(0.9825,0.0175)},anchor=south east},
|
|
||||||
xlabel = {Epoch},ylabel = {Training Accuracy}, cycle
|
|
||||||
list/Dark2, every axis plot/.append style={line width
|
|
||||||
=1.25pt}, ymin = {0.62}]
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_dropout_0_10.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_dropout_2_10.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_datagen_dropout_0_10.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_datagen_dropout_2_10.mean};
|
|
||||||
|
|
||||||
|
|
||||||
\addlegendentry{\footnotesize{Default.}}
|
|
||||||
\addlegendentry{\footnotesize{D. 0.2}}
|
|
||||||
\addlegendentry{\footnotesize{G.}}
|
|
||||||
\addlegendentry{\footnotesize{G + D. 0.2}}
|
|
||||||
\end{axis}
|
|
||||||
\end{tikzpicture}
|
|
||||||
\caption{10 Samples per Class}
|
|
||||||
\end{subfigure}
|
|
||||||
\begin{subfigure}[h]{\textwidth}
|
|
||||||
\begin{tikzpicture}
|
|
||||||
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
|
||||||
/pgf/number format/precision=3},tick style = {draw = none}, width = 0.9875\textwidth,
|
|
||||||
height = 0.4\textwidth, legend style={at={(0.9825,0.0175)},anchor=south east},
|
|
||||||
xlabel = {Epoch}, ylabel = {Training Accuracy}, cycle
|
|
||||||
list/Dark2, every axis plot/.append style={line width
|
|
||||||
=1.25pt}]
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_dropout_0_100.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_dropout_2_100.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_datagen_dropout_0_100.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/fashion_datagen_dropout_2_100.mean};
|
|
||||||
|
|
||||||
\addlegendentry{\footnotesize{Default.}}
|
|
||||||
\addlegendentry{\footnotesize{D. 0.2}}
|
|
||||||
\addlegendentry{\footnotesize{G.}}
|
|
||||||
\addlegendentry{\footnotesize{G + D. 0.2}}
|
|
||||||
\end{axis}
|
|
||||||
\end{tikzpicture}
|
|
||||||
\caption{100 Samples per Class}
|
|
||||||
\vspace{.25cm}
|
|
||||||
\end{subfigure}
|
|
||||||
\caption[Mean Training Accuracies for Subsets of Fashion MNIST]{Mean
|
|
||||||
training accuracies of the models fitting the sampled subsets of fashion MNIST
|
|
||||||
over the 125 epochs of training.}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
%%% Local Variables:
|
|
||||||
%%% mode: latex
|
|
||||||
%%% TeX-master: "main"
|
|
||||||
%%% End:
|
|
@ -10,14 +10,13 @@ plot coordinates {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
\begin{subfigure}[b]{0.48\textwidth}
|
\begin{subfigure}[b]{0.5\textwidth}
|
||||||
\begin{subfigure}[b]{\textwidth}
|
\begin{subfigure}[b]{\textwidth}
|
||||||
\begin{adjustbox}{width=\textwidth, height=0.25\textheight}
|
\begin{adjustbox}{width=\textwidth, height=0.25\textheight}
|
||||||
\begin{tikzpicture}
|
\begin{tikzpicture}
|
||||||
\begin{axis}[
|
\begin{axis}[
|
||||||
ytick = {-1, 0, 1, 2},
|
ytick = {-1, 0, 1, 2},
|
||||||
yticklabels = {$-1$, $\phantom{-0.}0$, $1$, $2$},
|
yticklabels = {$-1$, $\phantom{-0.}0$, $1$, $2$},]
|
||||||
restrict x to domain=-4:4, enlarge x limits = {0.1}]
|
|
||||||
\addplot table [x=x, y=y, col sep=comma, only marks,
|
\addplot table [x=x, y=y, col sep=comma, only marks,
|
||||||
forget plot] {Figures/Data/sin_6.csv};
|
forget plot] {Figures/Data/sin_6.csv};
|
||||||
\addplot [black, line width=2pt] table [x=x, y=y, col
|
\addplot [black, line width=2pt] table [x=x, y=y, col
|
||||||
@ -34,7 +33,7 @@ plot coordinates {
|
|||||||
\begin{subfigure}[b]{\textwidth}
|
\begin{subfigure}[b]{\textwidth}
|
||||||
\begin{adjustbox}{width=\textwidth, height=0.25\textheight}
|
\begin{adjustbox}{width=\textwidth, height=0.25\textheight}
|
||||||
\begin{tikzpicture}
|
\begin{tikzpicture}
|
||||||
\begin{axis}[restrict x to domain=-4:4, enlarge x limits = {0.1}]
|
\begin{axis}
|
||||||
\addplot table [x=x, y=y, col sep=comma, only marks,
|
\addplot table [x=x, y=y, col sep=comma, only marks,
|
||||||
forget plot] {Figures/Data/sin_6.csv};
|
forget plot] {Figures/Data/sin_6.csv};
|
||||||
\addplot [black, line width=2pt] table [x=x, y=y, col sep=comma, mark=none] {Figures/Data/matlab_1.csv};
|
\addplot [black, line width=2pt] table [x=x, y=y, col sep=comma, mark=none] {Figures/Data/matlab_1.csv};
|
||||||
@ -50,7 +49,7 @@ plot coordinates {
|
|||||||
\begin{subfigure}[b]{\textwidth}
|
\begin{subfigure}[b]{\textwidth}
|
||||||
\begin{adjustbox}{width=\textwidth, height=0.25\textheight}
|
\begin{adjustbox}{width=\textwidth, height=0.25\textheight}
|
||||||
\begin{tikzpicture}
|
\begin{tikzpicture}
|
||||||
\begin{axis}[restrict x to domain=-4:4, enlarge x limits = {0.1}]
|
\begin{axis}
|
||||||
\addplot table [x=x, y=y, col sep=comma, only marks,
|
\addplot table [x=x, y=y, col sep=comma, only marks,
|
||||||
forget plot] {Figures/Data/sin_6.csv};
|
forget plot] {Figures/Data/sin_6.csv};
|
||||||
\addplot [black, line width=2pt] table [x=x, y=y, col sep=comma, mark=none] {Figures/Data/matlab_3.csv};
|
\addplot [black, line width=2pt] table [x=x, y=y, col sep=comma, mark=none] {Figures/Data/matlab_3.csv};
|
||||||
@ -64,14 +63,13 @@ plot coordinates {
|
|||||||
\caption{$\lambda = 3.0$}
|
\caption{$\lambda = 3.0$}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}[b]{0.48\textwidth}
|
\begin{subfigure}[b]{0.5\textwidth}
|
||||||
\begin{subfigure}[b]{\textwidth}
|
\begin{subfigure}[b]{\textwidth}
|
||||||
\begin{adjustbox}{width=\textwidth, height=0.245\textheight}
|
\begin{adjustbox}{width=\textwidth, height=0.245\textheight}
|
||||||
\begin{tikzpicture}
|
\begin{tikzpicture}
|
||||||
\begin{axis}[
|
\begin{axis}[
|
||||||
ytick = {-2,-1, 0, 1, 2},
|
ytick = {-2,-1, 0, 1, 2},
|
||||||
yticklabels = {$-2$,$-1$, $\phantom{-0.}0$, $1$, $2$},
|
yticklabels = {$-2$,$-1$, $\phantom{-0.}0$, $1$, $2$},]
|
||||||
restrict x to domain=-4:4, enlarge x limits = {0.1}]
|
|
||||||
\addplot table [x=x, y=y, col sep=comma, only marks,
|
\addplot table [x=x, y=y, col sep=comma, only marks,
|
||||||
forget plot] {Figures/Data/data_sin_d_t.csv};
|
forget plot] {Figures/Data/data_sin_d_t.csv};
|
||||||
\addplot [black, line width=2pt] table [x=x, y=y, col sep=comma, mark=none] {Figures/Data/matlab_sin_d_01.csv};
|
\addplot [black, line width=2pt] table [x=x, y=y, col sep=comma, mark=none] {Figures/Data/matlab_sin_d_01.csv};
|
||||||
@ -87,7 +85,7 @@ plot coordinates {
|
|||||||
\begin{subfigure}[b]{\textwidth}
|
\begin{subfigure}[b]{\textwidth}
|
||||||
\begin{adjustbox}{width=\textwidth, height=0.25\textheight}
|
\begin{adjustbox}{width=\textwidth, height=0.25\textheight}
|
||||||
\begin{tikzpicture}
|
\begin{tikzpicture}
|
||||||
\begin{axis}[restrict x to domain=-4:4, enlarge x limits = {0.1}]
|
\begin{axis}
|
||||||
\addplot table [x=x, y=y, col sep=comma, only marks,
|
\addplot table [x=x, y=y, col sep=comma, only marks,
|
||||||
forget plot] {Figures/Data/data_sin_d_t.csv};
|
forget plot] {Figures/Data/data_sin_d_t.csv};
|
||||||
\addplot [black, line width=2pt] table [x=x, y=y, col sep=comma, mark=none] {Figures/Data/matlab_sin_d_1.csv};
|
\addplot [black, line width=2pt] table [x=x, y=y, col sep=comma, mark=none] {Figures/Data/matlab_sin_d_1.csv};
|
||||||
@ -103,7 +101,7 @@ plot coordinates {
|
|||||||
\begin{subfigure}[b]{\textwidth}
|
\begin{subfigure}[b]{\textwidth}
|
||||||
\begin{adjustbox}{width=\textwidth, height=0.25\textheight}
|
\begin{adjustbox}{width=\textwidth, height=0.25\textheight}
|
||||||
\begin{tikzpicture}
|
\begin{tikzpicture}
|
||||||
\begin{axis}[restrict x to domain=-4:4, enlarge x limits = {0.1}]
|
\begin{axis}
|
||||||
\addplot table [x=x, y=y, col sep=comma, only marks,
|
\addplot table [x=x, y=y, col sep=comma, only marks,
|
||||||
forget plot] {Figures/Data/data_sin_d_t.csv};
|
forget plot] {Figures/Data/data_sin_d_t.csv};
|
||||||
\addplot [black, line width=2pt] table [x=x, y=y, col sep=comma, mark=none] {Figures/Data/matlab_sin_d_3.csv};
|
\addplot [black, line width=2pt] table [x=x, y=y, col sep=comma, mark=none] {Figures/Data/matlab_sin_d_3.csv};
|
||||||
@ -117,8 +115,8 @@ plot coordinates {
|
|||||||
\caption{$\lambda = 3.0$}
|
\caption{$\lambda = 3.0$}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\caption[Comparison of Shallow Neural Networks and Regression
|
\caption[Comparison of shallow neural networks and regression
|
||||||
Splines] {% In these Figures the behaviour stated in ... is
|
splines]{% In these Figures the behaviour stated in ... is
|
||||||
% visualized
|
% visualized
|
||||||
% in two exaples. For $(a), (b), (c)$ six values of sinus equidistantly
|
% in two exaples. For $(a), (b), (c)$ six values of sinus equidistantly
|
||||||
% spaced on $[-\pi, \pi]$ have been used as training data. For
|
% spaced on $[-\pi, \pi]$ have been used as training data. For
|
||||||
@ -133,12 +131,11 @@ plot coordinates {
|
|||||||
Ridge Penalized Neural Network compared to Regression Spline,
|
Ridge Penalized Neural Network compared to Regression Spline,
|
||||||
with them being trained on $\text{data}_A$ in a), b), c) and on
|
with them being trained on $\text{data}_A$ in a), b), c) and on
|
||||||
$\text{data}_B$ in d), e), f).
|
$\text{data}_B$ in d), e), f).
|
||||||
The Parameters of each are given above. The implementation of the
|
The Parameters of each are given above.
|
||||||
network in Scala is given in Listing~\ref{lst:rsnn}
|
|
||||||
}
|
}
|
||||||
\label{fig:rn_vs_rs}
|
\label{fig:rn_vs_rs}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
%%% Local Variables:
|
%%% Local Variables:
|
||||||
%%% mode: latex
|
%%% mode: latex
|
||||||
%%% TeX-master: "main"
|
%%% TeX-master:
|
||||||
%%% End:
|
%%% End:
|
||||||
|
@ -4,32 +4,28 @@ legend image code/.code={
|
|||||||
\draw[mark repeat=2,mark phase=2]
|
\draw[mark repeat=2,mark phase=2]
|
||||||
plot coordinates {
|
plot coordinates {
|
||||||
(0cm,0cm)
|
(0cm,0cm)
|
||||||
(0.15cm,0cm) %% default is (0.3cm,0cm)
|
(0.0cm,0cm) %% default is (0.3cm,0cm)
|
||||||
(0.3cm,0cm) %% default is (0.6cm,0cm)
|
(0.0cm,0cm) %% default is (0.6cm,0cm)
|
||||||
};%
|
};%
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
\begin{subfigure}[h!]{\textwidth}
|
\begin{subfigure}[h!]{\textwidth}
|
||||||
\begin{tikzpicture}
|
\begin{tikzpicture}
|
||||||
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
\begin{axis}[tick style = {draw = none}, width = \textwidth,
|
||||||
/pgf/number format/precision=3},tick style = {draw = none}, width = 0.975\textwidth,
|
height = 0.6\textwidth,
|
||||||
height = 0.6\textwidth, legend
|
|
||||||
style={at={(0.0125,0.7)},anchor=north west},
|
|
||||||
xlabel = {Epoch}, ylabel = {Test Accuracy}, cycle
|
|
||||||
list/Dark2, every axis plot/.append style={line width
|
|
||||||
=1.25pt, mark = *, mark size=1pt},
|
|
||||||
xtick = {1, 3, 5,7,9,11,13,15,17,19},
|
xtick = {1, 3, 5,7,9,11,13,15,17,19},
|
||||||
xticklabels = {$2$, $4$, $6$, $8$,
|
xticklabels = {$2$, $4$, $6$, $8$,
|
||||||
$10$,$12$,$14$,$16$,$18$,$20$}]
|
$10$,$12$,$14$,$16$,$18$,$20$},
|
||||||
|
xlabel = {training epoch}, ylabel = {classification accuracy}]
|
||||||
\addplot table
|
\addplot table
|
||||||
[x=epoch, y=val_accuracy, col sep=comma] {Figures/Data/GD_01.log};
|
[x=epoch, y=val_accuracy, col sep=comma] {Figures/Data/GD_01.log};
|
||||||
\addplot table
|
\addplot table
|
||||||
[x=epoch, y=val_accuracy, col sep=comma, mark = *] {Figures/Data/GD_05.log};
|
[x=epoch, y=val_accuracy, col sep=comma] {Figures/Data/GD_05.log};
|
||||||
\addplot table
|
\addplot table
|
||||||
[x=epoch, y=val_accuracy, col sep=comma, mark = *] {Figures/Data/GD_1.log};
|
[x=epoch, y=val_accuracy, col sep=comma] {Figures/Data/GD_1.log};
|
||||||
\addplot table
|
\addplot table
|
||||||
[x=epoch, y=val_accuracy, col sep=comma, mark = *]
|
[x=epoch, y=val_accuracy, col sep=comma]
|
||||||
{Figures/Data/SGD_01_b32.log};
|
{Figures/Data/SGD_01_b32.log};
|
||||||
|
|
||||||
\addlegendentry{GD$_{0.01}$}
|
\addlegendentry{GD$_{0.01}$}
|
||||||
@ -38,65 +34,59 @@ plot coordinates {
|
|||||||
\addlegendentry{SGD$_{0.01}$}
|
\addlegendentry{SGD$_{0.01}$}
|
||||||
\end{axis}
|
\end{axis}
|
||||||
\end{tikzpicture}
|
\end{tikzpicture}
|
||||||
\caption{Test accuracy during training.}
|
%\caption{Classification accuracy}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
% \begin{subfigure}[b]{\textwidth}
|
\begin{subfigure}[b]{\textwidth}
|
||||||
% \begin{tikzpicture}
|
\begin{tikzpicture}
|
||||||
% \begin{axis}[tick style = {draw = none}, width = \textwidth,
|
\begin{axis}[tick style = {draw = none}, width = \textwidth,
|
||||||
% height = 0.6\textwidth,
|
height = 0.6\textwidth,
|
||||||
% ytick = {0, 1, 2, 3, 4},
|
ytick = {0, 1, 2, 3, 4},
|
||||||
% yticklabels = {$0$, $1$, $\phantom{0.}2$, $3$, $4$},
|
yticklabels = {$0$, $1$, $\phantom{0.}2$, $3$, $4$},
|
||||||
% xtick = {1, 3, 5,7,9,11,13,15,17,19},
|
xtick = {1, 3, 5,7,9,11,13,15,17,19},
|
||||||
% xticklabels = {$2$, $4$, $6$, $8$,
|
xticklabels = {$2$, $4$, $6$, $8$,
|
||||||
% $10$,$12$,$14$,$16$,$18$,$20$},
|
$10$,$12$,$14$,$16$,$18$,$20$},
|
||||||
% xlabel = {training epoch}, ylabel = {error measure\vphantom{fy}}]
|
xlabel = {training epoch}, ylabel = {error measure\vphantom{fy}}]
|
||||||
% \addplot table
|
\addplot table
|
||||||
% [x=epoch, y=val_loss, col sep=comma] {Figures/Data/GD_01.log};
|
[x=epoch, y=val_loss, col sep=comma] {Figures/Data/GD_01.log};
|
||||||
% \addplot table
|
\addplot table
|
||||||
% [x=epoch, y=val_loss, col sep=comma] {Figures/Data/GD_05.log};
|
[x=epoch, y=val_loss, col sep=comma] {Figures/Data/GD_05.log};
|
||||||
% \addplot table
|
\addplot table
|
||||||
% [x=epoch, y=val_loss, col sep=comma] {Figures/Data/GD_1.log};
|
[x=epoch, y=val_loss, col sep=comma] {Figures/Data/GD_1.log};
|
||||||
% \addplot table
|
\addplot table
|
||||||
% [x=epoch, y=val_loss, col sep=comma] {Figures/Data/SGD_01_b32.log};
|
[x=epoch, y=val_loss, col sep=comma] {Figures/Data/SGD_01_b32.log};
|
||||||
|
|
||||||
% \addlegendentry{GD$_{0.01}$}
|
\addlegendentry{GD$_{0.01}$}
|
||||||
% \addlegendentry{GD$_{0.05}$}
|
\addlegendentry{GD$_{0.05}$}
|
||||||
% \addlegendentry{GD$_{0.1}$}
|
\addlegendentry{GD$_{0.1}$}
|
||||||
% \addlegendentry{SGD$_{0.01}$}
|
\addlegendentry{SGD$_{0.01}$}
|
||||||
|
|
||||||
% \end{axis}
|
\end{axis}
|
||||||
% \end{tikzpicture}
|
\end{tikzpicture}
|
||||||
% \caption{Performance metrics during training}
|
\caption{Performance metrics during training}
|
||||||
% \end{subfigure}
|
\end{subfigure}
|
||||||
% \\~\\
|
% \\~\\
|
||||||
|
\caption[Performance comparison of SDG and GD]{The neural network given in ?? trained with different
|
||||||
\begin{subfigure}[b]{1.0\linewidth}
|
|
||||||
\begin{tabu} to \textwidth {@{} *4{X[c]}c*4{X[c]} @{}}
|
|
||||||
\multicolumn{4}{c}{Test Accuracy}
|
|
||||||
&~&\multicolumn{4}{c}{Test Loss}
|
|
||||||
\\\cline{1-4}\cline{6-9}
|
|
||||||
GD$_{0.01}$&GD$_{0.05}$&GD$_{0.1}$&SGD$_{0.01}$&&GD$_{0.01}$&GD$_{0.05}$&GD$_{0.1}$&SGD$_{0.01}$
|
|
||||||
\\\cline{1-4}\cline{6-9}
|
|
||||||
0.265&0.633&0.203&0.989&&2.267&1.947&3.911&0.032 \\
|
|
||||||
\multicolumn{4}{c}{Training Accuracy}
|
|
||||||
&~&\multicolumn{4}{c}{Training Loss}
|
|
||||||
\\\cline{1-4}\cline{6-9}
|
|
||||||
GD$_{0.01}$&GD$_{0.05}$&GD$_{0.1}$&SGD$_{0.01}$&&GD$_{0.01}$&GD$_{0.05}$&GD$_{0.1}$&SGD$_{0.01}$
|
|
||||||
\\\cline{1-4}\cline{6-9}
|
|
||||||
0.250&0.599&0.685&0.996&&2.271&1.995&1.089&0.012 \\
|
|
||||||
\end{tabu}
|
|
||||||
\caption{Performance metrics after 20 training epochs.}
|
|
||||||
\label{table:sgd_vs_gd}
|
|
||||||
\end{subfigure}
|
|
||||||
|
|
||||||
\caption[Performance Comparison of SDG and GD]{The neural network
|
|
||||||
given in Figure~\ref{fig:mnist_architecture} trained with different
|
|
||||||
algorithms on the MNIST handwritten digits data set. For gradient
|
algorithms on the MNIST handwritten digits data set. For gradient
|
||||||
descent the learning rated 0.01, 0.05, and 0.1 are (GD$_{\cdot}$). For
|
descent the learning rated 0.01, 0.05 and 0.1 are (GD$_{\cdot}$). For
|
||||||
stochastic gradient descend a batch size of 32 and learning rate
|
stochastic gradient descend a batch size of 32 and learning rate
|
||||||
of 0.01 is used (SDG$_{0.01}$).}
|
of 0.01 is used (SDG$_{0.01}$).}
|
||||||
\label{fig:sgd_vs_gd}
|
\label{fig:sgd_vs_gd}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
|
\begin{table}[h]
|
||||||
|
\begin{tabu} to \textwidth {@{} *4{X[c]}c*4{X[c]} @{}}
|
||||||
|
\multicolumn{4}{c}{Classification Accuracy}
|
||||||
|
&~&\multicolumn{4}{c}{Error Measure}
|
||||||
|
\\\cline{1-4}\cline{6-9}
|
||||||
|
GD$_{0.01}$&GD$_{0.05}$&GD$_{0.1}$&SGD$_{0.01}$&&GD$_{0.01}$&GD$_{0.05}$&GD$_{0.1}$&SGD$_{0.01}$
|
||||||
|
\\\cline{1-4}\cline{6-9}
|
||||||
|
\multicolumn{9}{c}{test}\\
|
||||||
|
0.265&0.633&0.203&0.989&&2.267&1.947&3.91&0.032
|
||||||
|
\end{tabu}
|
||||||
|
\caption{Performance metrics of the networks trained in
|
||||||
|
Figure~\ref{fig:sgd_vs_gd} after 20 training epochs.}
|
||||||
|
\label{table:sgd_vs_gd}
|
||||||
|
\end{table}
|
||||||
%%% Local Variables:
|
%%% Local Variables:
|
||||||
%%% mode: latex
|
%%% mode: latex
|
||||||
%%% TeX-master: "../main"
|
%%% TeX-master: "../main"
|
||||||
|
@ -40,11 +40,11 @@
|
|||||||
\includegraphics[width=\textwidth]{Figures/Data/fashion_mnist9.pdf}
|
\includegraphics[width=\textwidth]{Figures/Data/fashion_mnist9.pdf}
|
||||||
\caption{Ankle boot}
|
\caption{Ankle boot}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\caption[Fashion MNIST Data Set]{The fashtion MNIST data set contains 70.000 images of
|
\caption[Fashion MNIST data set]{The fashtion MNIST data set contains 70.000 images of
|
||||||
preprocessed product images from Zalando, which are categorized as
|
preprocessed product images from Zalando, which are categorized as
|
||||||
T-shirt/top, Trouser, Pullover, Dress, Coat, Sandal, Shirt,
|
T-shirt/top, Trouser, Pullover, Dress, Coat, Sandal, Shirt,
|
||||||
Sneaker, Bag, Ankle boot. Of these images 60.000 are used as training images, while
|
Sneaker, Bag, Ankle boot. Of these images 60.000 are used as training images, while
|
||||||
the rest is used to validate the models trained.}
|
the rest are used to validate the models trained.}
|
||||||
\label{fig:fashionMNIST}
|
\label{fig:fashionMNIST}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
%%% Local Variables:
|
%%% Local Variables:
|
||||||
|
@ -16,7 +16,7 @@ plot coordinates {
|
|||||||
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
||||||
/pgf/number format/precision=3},tick style = {draw = none}, width = 0.975\textwidth,
|
/pgf/number format/precision=3},tick style = {draw = none}, width = 0.975\textwidth,
|
||||||
height = 0.6\textwidth, ymin = 0.988, legend style={at={(0.9825,0.0175)},anchor=south east},
|
height = 0.6\textwidth, ymin = 0.988, legend style={at={(0.9825,0.0175)},anchor=south east},
|
||||||
xlabel = {Epoch}, ylabel = {Test Accuracy}, cycle
|
xlabel = {epoch}, ylabel = {Classification Accuracy}, cycle
|
||||||
list/Dark2, every axis plot/.append style={line width =1.25pt}]
|
list/Dark2, every axis plot/.append style={line width =1.25pt}]
|
||||||
\addplot table
|
\addplot table
|
||||||
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
@ -45,18 +45,18 @@ plot coordinates {
|
|||||||
\addlegendentry{\footnotesize{Default}}
|
\addlegendentry{\footnotesize{Default}}
|
||||||
\end{axis}
|
\end{axis}
|
||||||
\end{tikzpicture}
|
\end{tikzpicture}
|
||||||
\caption{Test Accuracy}
|
\caption{Classification accuracy}
|
||||||
\vspace{.25cm}
|
\vspace{.25cm}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}[h]{1.0\linewidth}
|
\begin{subfigure}[h]{1.0\linewidth}
|
||||||
\begin{tabu} to \textwidth {@{}lc*5{X[c]}@{}}
|
\begin{tabu} to \textwidth {@{}lc*5{X[c]}@{}}
|
||||||
\Tstrut \Bstrut & Default & D. 0.2 & D. 0.4 & G. &G.+D.\,0.2 & G.+D.\,0.4 \\
|
\Tstrut \Bstrut & \textsc{\,Adam\,} & D. 0.2 & D. 0.4 & G. &G.+D.\,0.2 & G.+D.\,0.4 \\
|
||||||
\hline
|
\hline
|
||||||
\multicolumn{7}{c}{Test Accuracy}\Bstrut \\
|
\multicolumn{7}{c}{Test Accuracy}\Bstrut \\
|
||||||
\cline{2-7}
|
\cline{2-7}
|
||||||
mean \Tstrut & 0.9914 & 0.9923 & 0.9930 & 0.9937 & 0.9943 & 0.9944 \\
|
mean \Tstrut & 0.9914 & 0.9923 & 0.9930 & 0.9937 & 0.9938 & 0.9943 \\
|
||||||
max & 0.9926 & 0.9930 & 0.9934 & 0.9946 & 0.9957 & 0.9956 \\
|
max & 0.9926 & 0.9930 & 0.9934 & 0.9946 & 0.9955 & 0.9956 \\
|
||||||
min & 0.9887 & 0.9909 & 0.9922 & 0.9929 & 0.9930 & 0.9934 \\
|
min & 0.9887 & 0.9909 & 0.9922 & 0.9929 & 0.9929 & 0.9934 \\
|
||||||
\hline
|
\hline
|
||||||
\multicolumn{7}{c}{Training Accuracy}\Bstrut \\
|
\multicolumn{7}{c}{Training Accuracy}\Bstrut \\
|
||||||
\cline{2-7}
|
\cline{2-7}
|
||||||
@ -64,16 +64,15 @@ plot coordinates {
|
|||||||
max & 0.9996 & 0.9996 & 0.9992 & 0.9979 & 0.9971 & 0.9937 \\
|
max & 0.9996 & 0.9996 & 0.9992 & 0.9979 & 0.9971 & 0.9937 \\
|
||||||
min & 0.9992 & 0.9990 & 0.9984 & 0.9947 & 0.9926 & 0.9908 \\
|
min & 0.9992 & 0.9990 & 0.9984 & 0.9947 & 0.9926 & 0.9908 \\
|
||||||
\end{tabu}
|
\end{tabu}
|
||||||
\caption{Mean, maximum and minimum accuracy after 50 epochs of training.}
|
\caption{Mean and maximum accuracy after 48 epochs of training.}
|
||||||
\label{fig:gen_dropout_b}
|
\label{fig:gen_dropout_b}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\caption[Performance Comparison of Overfitting Measures]{Accuracy
|
\caption[Performance comparison of overfitting measures]{Accuracy for the net given in ... with Dropout (D.),
|
||||||
for the net given in Figure~\ref{fig:mnist_architecture} with Dropout (D.),
|
|
||||||
data generation (G.), a combination, or neither (Default) implemented and trained
|
data generation (G.), a combination, or neither (Default) implemented and trained
|
||||||
with \textsc{Adam}. For each epoch the 60.000 training samples
|
with \textsc{Adam}. For each epoch the 60.000 training samples
|
||||||
were used, or for data generation 10.000 steps with each using
|
were used, or for data generation 10.000 steps with each using
|
||||||
batches of 60 generated data points. For each configuration the
|
batches of 60 generated data points. For each configuration the
|
||||||
model was trained five times and the average accuracies at each epoch
|
model was trained 5 times and the average accuracies at each epoch
|
||||||
are given in (a). Mean, maximum and minimum values of accuracy on
|
are given in (a). Mean, maximum and minimum values of accuracy on
|
||||||
the test and training set are given in (b).}
|
the test and training set are given in (b).}
|
||||||
\label{fig:gen_dropout}
|
\label{fig:gen_dropout}
|
||||||
|
@ -1,39 +1,38 @@
|
|||||||
\begin{figure}[h]
|
\begin{figure}[h]
|
||||||
\centering
|
\centering
|
||||||
\begin{subfigure}{0.19\textwidth}
|
\begin{subfigure}{0.19\textwidth}
|
||||||
\includegraphics[width=\textwidth]{Figures/Data/mnist0.pdf}
|
\includegraphics[width=\textwidth]{Plots/Data/mnist0.pdf}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{0.19\textwidth}
|
\begin{subfigure}{0.19\textwidth}
|
||||||
\includegraphics[width=\textwidth]{Figures/Data/mnist1.pdf}
|
\includegraphics[width=\textwidth]{Plots/Data/mnist1.pdf}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{0.19\textwidth}
|
\begin{subfigure}{0.19\textwidth}
|
||||||
\includegraphics[width=\textwidth]{Figures/Data/mnist2.pdf}
|
\includegraphics[width=\textwidth]{Plots/Data/mnist2.pdf}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{0.19\textwidth}
|
\begin{subfigure}{0.19\textwidth}
|
||||||
\includegraphics[width=\textwidth]{Figures/Data/mnist3.pdf}
|
\includegraphics[width=\textwidth]{Plots/Data/mnist3.pdf}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{0.19\textwidth}
|
\begin{subfigure}{0.19\textwidth}
|
||||||
\includegraphics[width=\textwidth]{Figures/Data/mnist4.pdf}
|
\includegraphics[width=\textwidth]{Plots/Data/mnist4.pdf}
|
||||||
\end{subfigure}\\
|
\end{subfigure}\\
|
||||||
\begin{subfigure}{0.19\textwidth}
|
\begin{subfigure}{0.19\textwidth}
|
||||||
\includegraphics[width=\textwidth]{Figures/Data/mnist5.pdf}
|
\includegraphics[width=\textwidth]{Plots/Data/mnist5.pdf}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{0.19\textwidth}
|
\begin{subfigure}{0.19\textwidth}
|
||||||
\includegraphics[width=\textwidth]{Figures/Data/mnist6.pdf}
|
\includegraphics[width=\textwidth]{Plots/Data/mnist6.pdf}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{0.19\textwidth}
|
\begin{subfigure}{0.19\textwidth}
|
||||||
\includegraphics[width=\textwidth]{Figures/Data/mnist7.pdf}
|
\includegraphics[width=\textwidth]{Plots/Data/mnist7.pdf}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{0.19\textwidth}
|
\begin{subfigure}{0.19\textwidth}
|
||||||
\includegraphics[width=\textwidth]{Figures/Data/mnist8.pdf}
|
\includegraphics[width=\textwidth]{Plots/Data/mnist8.pdf}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{0.19\textwidth}
|
\begin{subfigure}{0.19\textwidth}
|
||||||
\includegraphics[width=\textwidth]{Figures/Data/mnist9.pdf}
|
\includegraphics[width=\textwidth]{Plots/Data/mnist9.pdf}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\caption[MNIST Database of Handwritten Digits]{The MNIST database of handwritten
|
\caption[MNIST data set]{The MNIST data set contains 70.000 images of preprocessed handwritten
|
||||||
digits contains 70.000 images of preprocessed handwritten
|
|
||||||
digits. Of these images 60.000 are used as training images, while
|
digits. Of these images 60.000 are used as training images, while
|
||||||
the rest is used to validate the models trained.}
|
the rest are used to validate the models trained.}
|
||||||
\label{fig:MNIST}
|
\label{fig:MNIST}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
%%% Local Variables:
|
%%% Local Variables:
|
||||||
|
@ -8,363 +8,286 @@
|
|||||||
\usepackage{showframe}
|
\usepackage{showframe}
|
||||||
\usepackage{graphicx}
|
\usepackage{graphicx}
|
||||||
\usepackage{titlecaps}
|
\usepackage{titlecaps}
|
||||||
\usepackage{amssymb}
|
|
||||||
\usepackage{mathtools}%add-on and patches to amsmath
|
|
||||||
\usetikzlibrary{calc, 3d}
|
\usetikzlibrary{calc, 3d}
|
||||||
\usepgfplotslibrary{colorbrewer}
|
\usepgfplotslibrary{colorbrewer}
|
||||||
|
|
||||||
\newcommand\Tstrut{\rule{0pt}{2.6ex}} % = `top' strut
|
\newcommand\Tstrut{\rule{0pt}{2.6ex}} % = `top' strut
|
||||||
\newcommand\Bstrut{\rule[-0.9ex]{0pt}{0pt}} % = `bottom' strut
|
\newcommand\Bstrut{\rule[-0.9ex]{0pt}{0pt}} % = `bottom' strut
|
||||||
|
|
||||||
\DeclareMathOperator*{\plim}{\mathbb{P}\text{-}\lim}
|
|
||||||
\DeclareMathOperator{\supp}{supp}
|
|
||||||
\DeclareMathOperator*{\argmin}{arg\,min}
|
|
||||||
\DeclareMathOperator*{\po}{\mathbb{P}\text{-}\mathcal{O}}
|
|
||||||
\DeclareMathOperator*{\equals}{=}
|
|
||||||
\begin{document}
|
\begin{document}
|
||||||
\newcommand{\plimn}[0]{\plim\limits_{n \to \infty}}
|
\pgfplotsset{
|
||||||
\newcommand{\norm}[1]{\left\lVert#1\right\rVert}
|
compat=1.11,
|
||||||
% \pgfplotsset{
|
legend image code/.code={
|
||||||
% compat=1.11,
|
\draw[mark repeat=2,mark phase=2]
|
||||||
% legend image code/.code={
|
plot coordinates {
|
||||||
% \draw[mark repeat=2,mark phase=2]
|
(0cm,0cm)
|
||||||
% plot coordinates {
|
(0.3cm,0cm) %% default is (0.3cm,0cm)
|
||||||
% (0cm,0cm)
|
(0.6cm,0cm) %% default is (0.6cm,0cm)
|
||||||
% (0.3cm,0cm) %% default is (0.3cm,0cm)
|
};%
|
||||||
% (0.6cm,0cm) %% default is (0.6cm,0cm)
|
}
|
||||||
% };%
|
}
|
||||||
% }
|
\begin{figure}
|
||||||
% }
|
\begin{subfigure}[h]{\textwidth}
|
||||||
% \begin{figure}
|
\begin{tikzpicture}
|
||||||
% \begin{subfigure}[h]{\textwidth}
|
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
||||||
% \begin{tikzpicture}
|
/pgf/number format/precision=3},tick style = {draw = none}, width = \textwidth,
|
||||||
% \begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
height = 0.35\textwidth, legend style={at={(0.9825,0.0175)},anchor=south east},
|
||||||
% /pgf/number format/precision=3},tick style = {draw = none}, width = \textwidth,
|
ylabel = {Test Accuracy}, cycle
|
||||||
% height = 0.35\textwidth, legend style={at={(0.9825,0.0175)},anchor=south east},
|
list/Dark2, every axis plot/.append style={line width
|
||||||
% ylabel = {Test Accuracy}, cycle
|
=1.25pt}]
|
||||||
% list/Dark2, every axis plot/.append style={line width
|
% \addplot [dashed] table
|
||||||
% =1.25pt}]
|
% [x=epoch, y=accuracy, col sep=comma, mark = none]
|
||||||
% % \addplot [dashed] table
|
% {Data/adam_datagen_full.log};
|
||||||
% % [x=epoch, y=accuracy, col sep=comma, mark = none]
|
\addplot table
|
||||||
% % {Data/adam_datagen_full.log};
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
% \addplot table
|
{Data/adam_1.mean};
|
||||||
% [x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
% \addplot [dashed] table
|
||||||
% {Data/adam_1.mean};
|
% [x=epoch, y=accuracy, col sep=comma, mark = none]
|
||||||
% % \addplot [dashed] table
|
% {Data/adam_datagen_dropout_02_full.log};
|
||||||
% % [x=epoch, y=accuracy, col sep=comma, mark = none]
|
\addplot table
|
||||||
% % {Data/adam_datagen_dropout_02_full.log};
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
% \addplot table
|
{Data/adam_datagen_1.mean};
|
||||||
% [x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
\addplot table
|
||||||
% {Data/adam_datagen_1.mean};
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
% \addplot table
|
{Data/adam_datagen_dropout_02_1.mean};
|
||||||
% [x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
\addplot table
|
||||||
% {Data/adam_datagen_dropout_02_1.mean};
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
% \addplot table
|
{Data/adam_dropout_02_1.mean};
|
||||||
% [x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
|
||||||
% {Data/adam_dropout_02_1.mean};
|
|
||||||
|
|
||||||
|
|
||||||
% \addlegendentry{\footnotesize{G.}}
|
\addlegendentry{\footnotesize{G.}}
|
||||||
% \addlegendentry{\footnotesize{G. + D. 0.2}}
|
\addlegendentry{\footnotesize{G. + D. 0.2}}
|
||||||
% \addlegendentry{\footnotesize{G. + D. 0.4}}
|
\addlegendentry{\footnotesize{G. + D. 0.4}}
|
||||||
% \addlegendentry{\footnotesize{D. 0.2}}
|
\addlegendentry{\footnotesize{D. 0.2}}
|
||||||
% \addlegendentry{\footnotesize{D. 0.4}}
|
\addlegendentry{\footnotesize{D. 0.4}}
|
||||||
% \addlegendentry{\footnotesize{Default}}
|
\addlegendentry{\footnotesize{Default}}
|
||||||
% \end{axis}
|
\end{axis}
|
||||||
% \end{tikzpicture}
|
\end{tikzpicture}
|
||||||
% \caption{1 sample per class}
|
\caption{1 sample per class}
|
||||||
% \vspace{0.25cm}
|
\vspace{0.25cm}
|
||||||
% \end{subfigure}
|
\end{subfigure}
|
||||||
% \begin{subfigure}[h]{\textwidth}
|
\begin{subfigure}[h]{\textwidth}
|
||||||
% \begin{tikzpicture}
|
\begin{tikzpicture}
|
||||||
% \begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
||||||
% /pgf/number format/precision=3},tick style = {draw = none}, width = \textwidth,
|
/pgf/number format/precision=3},tick style = {draw = none}, width = \textwidth,
|
||||||
% height = 0.35\textwidth, legend style={at={(0.9825,0.0175)},anchor=south east},
|
height = 0.35\textwidth, legend style={at={(0.9825,0.0175)},anchor=south east},
|
||||||
% ylabel = {Test Accuracy}, cycle
|
ylabel = {Test Accuracy}, cycle
|
||||||
% list/Dark2, every axis plot/.append style={line width
|
list/Dark2, every axis plot/.append style={line width
|
||||||
% =1.25pt}]
|
=1.25pt}]
|
||||||
% \addplot table
|
\addplot table
|
||||||
% [x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
% {Data/adam_dropout_00_10.mean};
|
{Data/adam_dropout_00_10.mean};
|
||||||
% \addplot table
|
\addplot table
|
||||||
% [x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
% {Data/adam_dropout_02_10.mean};
|
{Data/adam_dropout_02_10.mean};
|
||||||
% \addplot table
|
\addplot table
|
||||||
% [x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
% {Data/adam_datagen_dropout_00_10.mean};
|
{Data/adam_datagen_dropout_00_10.mean};
|
||||||
% \addplot table
|
\addplot table
|
||||||
% [x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
% {Data/adam_datagen_dropout_02_10.mean};
|
{Data/adam_datagen_dropout_02_10.mean};
|
||||||
|
|
||||||
|
|
||||||
% \addlegendentry{\footnotesize{G.}}
|
\addlegendentry{\footnotesize{G.}}
|
||||||
% \addlegendentry{\footnotesize{G. + D. 0.2}}
|
\addlegendentry{\footnotesize{G. + D. 0.2}}
|
||||||
% \addlegendentry{\footnotesize{G. + D. 0.4}}
|
\addlegendentry{\footnotesize{G. + D. 0.4}}
|
||||||
% \addlegendentry{\footnotesize{D. 0.2}}
|
\addlegendentry{\footnotesize{D. 0.2}}
|
||||||
% \addlegendentry{\footnotesize{D. 0.4}}
|
\addlegendentry{\footnotesize{D. 0.4}}
|
||||||
% \addlegendentry{\footnotesize{Default}}
|
\addlegendentry{\footnotesize{Default}}
|
||||||
% \end{axis}
|
\end{axis}
|
||||||
% \end{tikzpicture}
|
\end{tikzpicture}
|
||||||
% \caption{10 samples per class}
|
\caption{10 samples per class}
|
||||||
% \end{subfigure}
|
\end{subfigure}
|
||||||
% \begin{subfigure}[h]{\textwidth}
|
\begin{subfigure}[h]{\textwidth}
|
||||||
% \begin{tikzpicture}
|
\begin{tikzpicture}
|
||||||
% \begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
||||||
% /pgf/number format/precision=3},tick style = {draw = none}, width = 0.9875\textwidth,
|
/pgf/number format/precision=3},tick style = {draw = none}, width = 0.9875\textwidth,
|
||||||
% height = 0.35\textwidth, legend style={at={(0.9825,0.0175)},anchor=south east},
|
height = 0.35\textwidth, legend style={at={(0.9825,0.0175)},anchor=south east},
|
||||||
% xlabel = {epoch}, ylabel = {Test Accuracy}, cycle
|
xlabel = {epoch}, ylabel = {Test Accuracy}, cycle
|
||||||
% list/Dark2, every axis plot/.append style={line width
|
list/Dark2, every axis plot/.append style={line width
|
||||||
% =1.25pt}, ymin = {0.92}]
|
=1.25pt}, ymin = {0.92}]
|
||||||
% \addplot table
|
\addplot table
|
||||||
% [x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
% {Data/adam_dropout_00_100.mean};
|
{Data/adam_dropout_00_100.mean};
|
||||||
% \addplot table
|
\addplot table
|
||||||
% [x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
% {Data/adam_dropout_02_100.mean};
|
{Data/adam_dropout_02_100.mean};
|
||||||
% \addplot table
|
\addplot table
|
||||||
% [x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
% {Data/adam_datagen_dropout_00_100.mean};
|
{Data/adam_datagen_dropout_00_100.mean};
|
||||||
% \addplot table
|
\addplot table
|
||||||
% [x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
% {Data/adam_datagen_dropout_02_100.mean};
|
{Data/adam_datagen_dropout_02_100.mean};
|
||||||
|
|
||||||
% \addlegendentry{\footnotesize{G.}}
|
\addlegendentry{\footnotesize{G.}}
|
||||||
% \addlegendentry{\footnotesize{G. + D. 0.2}}
|
\addlegendentry{\footnotesize{G. + D. 0.2}}
|
||||||
% \addlegendentry{\footnotesize{G. + D. 0.4}}
|
\addlegendentry{\footnotesize{G. + D. 0.4}}
|
||||||
% \addlegendentry{\footnotesize{D. 0.2}}
|
\addlegendentry{\footnotesize{D. 0.2}}
|
||||||
% \addlegendentry{\footnotesize{D. 0.4}}
|
\addlegendentry{\footnotesize{D. 0.4}}
|
||||||
% \addlegendentry{\footnotesize{Default}}
|
\addlegendentry{\footnotesize{Default}}
|
||||||
% \end{axis}
|
\end{axis}
|
||||||
% \end{tikzpicture}
|
\end{tikzpicture}
|
||||||
% \caption{100 samples per class}
|
\caption{100 samples per class}
|
||||||
% \vspace{.25cm}
|
\vspace{.25cm}
|
||||||
% \end{subfigure}
|
\end{subfigure}
|
||||||
% \caption{Accuracy for the net given in ... with Dropout (D.),
|
\caption{Accuracy for the net given in ... with Dropout (D.),
|
||||||
% data generation (G.), a combination, or neither (Default) implemented and trained
|
data generation (G.), a combination, or neither (Default) implemented and trained
|
||||||
% with \textsc{Adam}. For each epoch the 60.000 training samples
|
with \textsc{Adam}. For each epoch the 60.000 training samples
|
||||||
% were used, or for data generation 10.000 steps with each using
|
were used, or for data generation 10.000 steps with each using
|
||||||
% batches of 60 generated data points. For each configuration the
|
batches of 60 generated data points. For each configuration the
|
||||||
% model was trained 5 times and the average accuracies at each epoch
|
model was trained 5 times and the average accuracies at each epoch
|
||||||
% are given in (a). Mean, maximum and minimum values of accuracy on
|
are given in (a). Mean, maximum and minimum values of accuracy on
|
||||||
% the test and training set are given in (b).}
|
the test and training set are given in (b).}
|
||||||
% \end{figure}
|
\end{figure}
|
||||||
% \begin{table}
|
\begin{table}
|
||||||
% \centering
|
\centering
|
||||||
% \begin{tabu} to \textwidth {@{}l*4{X[c]}@{}}
|
\begin{tabu} to \textwidth {@{}l*4{X[c]}@{}}
|
||||||
% \Tstrut \Bstrut & \textsc{Adam} & D. 0.2 & Gen & Gen.+D. 0.2 \\
|
\Tstrut \Bstrut & \textsc{Adam} & D. 0.2 & Gen & Gen.+D. 0.2 \\
|
||||||
% \hline
|
\hline
|
||||||
% &
|
&
|
||||||
% \multicolumn{4}{c}{\titlecap{test accuracy for 1 sample}}\Bstrut \\
|
\multicolumn{4}{c}{\titlecap{test accuracy for 1 sample}}\Bstrut \\
|
||||||
% \cline{2-5}
|
\cline{2-5}
|
||||||
% max \Tstrut & 0.5633 & 0.5312 & 0.6704 & 0.6604 \\
|
max \Tstrut & 0.5633 & 0.5312 & 0.6704 & 0.6604 \\
|
||||||
% min & 0.3230 & 0.4224 & 0.4878 & 0.5175 \\
|
min & 0.3230 & 0.4224 & 0.4878 & 0.5175 \\
|
||||||
% mean & 0.4570 & 0.4714 & 0.5862 & 0.6014 \\
|
mean & 0.4570 & 0.4714 & 0.5862 & 0.6014 \\
|
||||||
% var & 0.0040 & 0.0012 & 0.0036 & 0.0023 \\
|
var & 0.0040 & 0.0012 & 0.0036 & 0.0023 \\
|
||||||
% \hline
|
\hline
|
||||||
% &
|
&
|
||||||
% \multicolumn{4}{c}{\titlecap{test accuracy for 10 samples}}\Bstrut \\
|
\multicolumn{4}{c}{\titlecap{test accuracy for 10 samples}}\Bstrut \\
|
||||||
% \cline{2-5}
|
\cline{2-5}
|
||||||
% max \Tstrut & 0.8585 & 0.9423 & 0.9310 & 0.9441 \\
|
max \Tstrut & 0.8585 & 0.9423 & 0.9310 & 0.9441 \\
|
||||||
% min & 0.8148 & 0.9081 & 0.9018 & 0.9061 \\
|
min & 0.8148 & 0.9081 & 0.9018 & 0.9061 \\
|
||||||
% mean & 0.8377 & 0.9270 & 0.9185 & 0.9232 \\
|
mean & 0.8377 & 0.9270 & 0.9185 & 0.9232 \\
|
||||||
% var & 2.7e-4 & 1.3e-4 & 6e-05 & 1.5e-4 \\
|
var & 2.7e-4 & 1.3e-4 & 6e-05 & 1.5e-4 \\
|
||||||
% \hline
|
\hline
|
||||||
% &
|
&
|
||||||
% \multicolumn{4}{c}{\titlecap{test accuracy for 100 samples}}\Bstrut \\
|
\multicolumn{4}{c}{\titlecap{test accuracy for 100 samples}}\Bstrut \\
|
||||||
% \cline{2-5}
|
\cline{2-5}
|
||||||
% max & 0.9637 & 0.9796 & 0.9810 & 0.9805 \\
|
max & 0.9637 & 0.9796 & 0.9810 & 0.9805 \\
|
||||||
% min & 0.9506 & 0.9719 & 0.9702 & 0.9727 \\
|
min & 0.9506 & 0.9719 & 0.9702 & 0.9727 \\
|
||||||
% mean & 0.9582 & 0.9770 & 0.9769 & 0.9783 \\
|
mean & 0.9582 & 0.9770 & 0.9769 & 0.9783 \\
|
||||||
% var & 2e-05 & 1e-05 & 1e-05 & 0 \\
|
var & 2e-05 & 1e-05 & 1e-05 & 0 \\
|
||||||
% \hline
|
\hline
|
||||||
% \end{tabu}
|
\end{tabu}
|
||||||
% \caption{Values of the test accuracy of the model trained 10 times
|
\caption{Values of the test accuracy of the model trained 10 times
|
||||||
% of random training sets containing 1, 10 and 100 data points per
|
of random training sets containing 1, 10 and 100 data points per
|
||||||
% class.}
|
class.}
|
||||||
% \end{table}
|
\end{table}
|
||||||
|
|
||||||
% \begin{center}
|
\begin{center}
|
||||||
% \begin{figure}[h]
|
\begin{figure}[h]
|
||||||
% \centering
|
\centering
|
||||||
% \begin{subfigure}{\textwidth}
|
\begin{subfigure}{0.19\textwidth}
|
||||||
% \includegraphics[width=\textwidth]{Data/cnn_fashion_fig.pdf}
|
\includegraphics[width=\textwidth]{Data/mnist0.pdf}
|
||||||
% \caption{original\\image}
|
\caption{original\\image}
|
||||||
% \end{subfigure}
|
\end{subfigure}
|
||||||
% \begin{subfigure}{\textwidth}
|
\begin{subfigure}{0.19\textwidth}
|
||||||
% \includegraphics[width=\textwidth]{Data/cnn_fashion_fig1.pdf}
|
\includegraphics[width=\textwidth]{Data/mnist_gen_zoom.pdf}
|
||||||
% \caption{random\\zoom}
|
\caption{random\\zoom}
|
||||||
% \end{subfigure}
|
\end{subfigure}
|
||||||
% \begin{subfigure}{0.19\textwidth}
|
\begin{subfigure}{0.19\textwidth}
|
||||||
% \includegraphics[width=\textwidth]{Data/mnist_gen_shear.pdf}
|
\includegraphics[width=\textwidth]{Data/mnist_gen_shear.pdf}
|
||||||
% \caption{random\\shear}
|
\caption{random\\shear}
|
||||||
% \end{subfigure}
|
\end{subfigure}
|
||||||
% \begin{subfigure}{0.19\textwidth}
|
\begin{subfigure}{0.19\textwidth}
|
||||||
% \includegraphics[width=\textwidth]{Data/mnist_gen_rotation.pdf}
|
\includegraphics[width=\textwidth]{Data/mnist_gen_rotation.pdf}
|
||||||
% \caption{random\\rotation}
|
\caption{random\\rotation}
|
||||||
% \end{subfigure}
|
\end{subfigure}
|
||||||
% \begin{subfigure}{0.19\textwidth}
|
\begin{subfigure}{0.19\textwidth}
|
||||||
% \includegraphics[width=\textwidth]{Data/mnist_gen_shift.pdf}
|
\includegraphics[width=\textwidth]{Data/mnist_gen_shift.pdf}
|
||||||
% \caption{random\\positional shift}
|
\caption{random\\positional shift}
|
||||||
% \end{subfigure}\\
|
\end{subfigure}\\
|
||||||
% \begin{subfigure}{0.19\textwidth}
|
\begin{subfigure}{0.19\textwidth}
|
||||||
% \includegraphics[width=\textwidth]{Data/mnist5.pdf}
|
\includegraphics[width=\textwidth]{Data/mnist5.pdf}
|
||||||
% \end{subfigure}
|
\end{subfigure}
|
||||||
% \begin{subfigure}{0.19\textwidth}
|
\begin{subfigure}{0.19\textwidth}
|
||||||
% \includegraphics[width=\textwidth]{Data/mnist6.pdf}
|
\includegraphics[width=\textwidth]{Data/mnist6.pdf}
|
||||||
% \end{subfigure}
|
\end{subfigure}
|
||||||
% \begin{subfigure}{0.19\textwidth}
|
\begin{subfigure}{0.19\textwidth}
|
||||||
% \includegraphics[width=\textwidth]{Data/mnist7.pdf}
|
\includegraphics[width=\textwidth]{Data/mnist7.pdf}
|
||||||
% \end{subfigure}
|
\end{subfigure}
|
||||||
% \begin{subfigure}{0.19\textwidth}
|
\begin{subfigure}{0.19\textwidth}
|
||||||
% \includegraphics[width=\textwidth]{Data/mnist8.pdf}
|
\includegraphics[width=\textwidth]{Data/mnist8.pdf}
|
||||||
% \end{subfigure}
|
\end{subfigure}
|
||||||
% \begin{subfigure}{0.19\textwidth}
|
\begin{subfigure}{0.19\textwidth}
|
||||||
% \includegraphics[width=\textwidth]{Data/mnist9.pdf}
|
\includegraphics[width=\textwidth]{Data/mnist9.pdf}
|
||||||
% \end{subfigure}
|
\end{subfigure}
|
||||||
% \caption{The MNIST data set contains 70.000 images of preprocessed handwritten
|
\caption{The MNIST data set contains 70.000 images of preprocessed handwritten
|
||||||
% digits. Of these images 60.000 are used as training images, while
|
digits. Of these images 60.000 are used as training images, while
|
||||||
% the rest are used to validate the models trained.}
|
the rest are used to validate the models trained.}
|
||||||
% \end{figure}
|
\end{figure}
|
||||||
% \end{center}
|
\end{center}
|
||||||
|
|
||||||
% \begin{figure}
|
\begin{figure}
|
||||||
% \begin{adjustbox}{width=\textwidth}
|
\begin{adjustbox}{width=\textwidth}
|
||||||
% \begin{tikzpicture}
|
\begin{tikzpicture}
|
||||||
% \begin{scope}[x = (0:1cm), y=(90:1cm), z=(15:-0.5cm)]
|
\begin{scope}[x = (0:1cm), y=(90:1cm), z=(15:-0.5cm)]
|
||||||
% \node[canvas is xy plane at z=0, transform shape] at (0,0)
|
\node[canvas is xy plane at z=0, transform shape] at (0,0)
|
||||||
% {\includegraphics[width=5cm]{Data/klammern_r.jpg}};
|
{\includegraphics[width=5cm]{Data/klammern_r.jpg}};
|
||||||
% \node[canvas is xy plane at z=2, transform shape] at (0,-0.2)
|
\node[canvas is xy plane at z=2, transform shape] at (0,-0.2)
|
||||||
% {\includegraphics[width=5cm]{Data/klammern_g.jpg}};
|
{\includegraphics[width=5cm]{Data/klammern_g.jpg}};
|
||||||
% \node[canvas is xy plane at z=4, transform shape] at (0,-0.4)
|
\node[canvas is xy plane at z=4, transform shape] at (0,-0.4)
|
||||||
% {\includegraphics[width=5cm]{Data/klammern_b.jpg}};
|
{\includegraphics[width=5cm]{Data/klammern_b.jpg}};
|
||||||
% \node[canvas is xy plane at z=4, transform shape] at (-8,-0.2)
|
\node[canvas is xy plane at z=4, transform shape] at (-8,-0.2)
|
||||||
% {\includegraphics[width=5.3cm]{Data/klammern_rgb.jpg}};
|
{\includegraphics[width=5.3cm]{Data/klammern_rgb.jpg}};
|
||||||
% \end{scope}
|
\end{scope}
|
||||||
% \end{tikzpicture}
|
\end{tikzpicture}
|
||||||
% \end{adjustbox}
|
\end{adjustbox}
|
||||||
% \caption{On the right the red, green and blue chanels of the picture
|
\caption{On the right the red, green and blue chanels of the picture
|
||||||
% are displayed. In order to better visualize the color channes the
|
are displayed. In order to better visualize the color channes the
|
||||||
% black and white picture of each channel has been colored in the
|
black and white picture of each channel has been colored in the
|
||||||
% respective color. Combining the layers results in the image on the
|
respective color. Combining the layers results in the image on the
|
||||||
% left}
|
left}
|
||||||
% \end{figure}
|
\end{figure}
|
||||||
|
|
||||||
% \begin{figure}
|
\begin{figure}
|
||||||
% \centering
|
\centering
|
||||||
% \begin{subfigure}{\linewidth}
|
\begin{subfigure}{\linewidth}
|
||||||
% \centering
|
\centering
|
||||||
% \includegraphics[width=\textwidth]{Data/convnet_fig.pdf}
|
\includegraphics[width=\textwidth]{Data/convnet_fig.pdf}
|
||||||
% \end{subfigure}
|
\end{subfigure}
|
||||||
% \begin{subfigure}{.45\linewidth}
|
\begin{subfigure}{.45\linewidth}
|
||||||
% \centering
|
\centering
|
||||||
% \begin{tikzpicture}
|
\begin{tikzpicture}
|
||||||
% \begin{axis}[enlargelimits=false, width=\textwidth]
|
\begin{axis}[enlargelimits=false, width=\textwidth]
|
||||||
% \addplot[domain=-5:5, samples=100]{tanh(x)};
|
\addplot[domain=-5:5, samples=100]{tanh(x)};
|
||||||
% \end{axis}
|
\end{axis}
|
||||||
% \end{tikzpicture}
|
\end{tikzpicture}
|
||||||
% \end{subfigure}
|
\end{subfigure}
|
||||||
% \begin{subfigure}{.45\linewidth}
|
\begin{subfigure}{.45\linewidth}
|
||||||
% \centering
|
\centering
|
||||||
% \begin{tikzpicture}
|
\begin{tikzpicture}
|
||||||
% \begin{axis}[enlargelimits=false, width=\textwidth,
|
\begin{axis}[enlargelimits=false, width=\textwidth,
|
||||||
% ytick={0,2,4},yticklabels={\hphantom{4.}0,2,4}, ymin=-1]
|
ytick={0,2,4},yticklabels={\hphantom{4.}0,2,4}, ymin=-1]
|
||||||
% \addplot[domain=-5:5, samples=100]{max(0,x)};
|
\addplot[domain=-5:5, samples=100]{max(0,x)};
|
||||||
% \end{axis}
|
\end{axis}
|
||||||
% \end{tikzpicture}
|
\end{tikzpicture}
|
||||||
% \end{subfigure}
|
\end{subfigure}
|
||||||
% \begin{subfigure}{.45\linewidth}
|
\begin{subfigure}{.45\linewidth}
|
||||||
% \centering
|
\centering
|
||||||
% \begin{tikzpicture}
|
\begin{tikzpicture}
|
||||||
% \begin{axis}[enlargelimits=false, width=\textwidth, ymin=-1,
|
\begin{axis}[enlargelimits=false, width=\textwidth, ymin=-1,
|
||||||
% ytick={0,2,4},yticklabels={$\hphantom{-5.}0$,2,4}]
|
ytick={0,2,4},yticklabels={$\hphantom{-5.}0$,2,4}]
|
||||||
% \addplot[domain=-5:5, samples=100]{max(0,x)+ 0.1*min(0,x)};
|
\addplot[domain=-5:5, samples=100]{max(0,x)+ 0.1*min(0,x)};
|
||||||
% \end{axis}
|
\end{axis}
|
||||||
% \end{tikzpicture}
|
\end{tikzpicture}
|
||||||
% \end{subfigure}
|
\end{subfigure}
|
||||||
% \end{figure}
|
\end{figure}
|
||||||
|
|
||||||
|
|
||||||
% \begin{tikzpicture}
|
|
||||||
% \begin{axis}[enlargelimits=false]
|
|
||||||
% \addplot [domain=-5:5, samples=101,unbounded coords=jump]{1/(1+exp(-x)};
|
|
||||||
% \addplot[domain=-5:5, samples=100]{tanh(x)};
|
|
||||||
% \addplot[domain=-5:5, samples=100]{max(0,x)};
|
|
||||||
% \end{axis}
|
|
||||||
% \end{tikzpicture}
|
|
||||||
|
|
||||||
% \begin{tikzpicture}
|
|
||||||
% \begin{axis}[enlargelimits=false]
|
|
||||||
% \addplot[domain=-2*pi:2*pi, samples=100]{cos(deg(x))};
|
|
||||||
% \end{axis}
|
|
||||||
% \end{tikzpicture}
|
|
||||||
\newcommand{\abs}[1]{\ensuremath{\left\vert#1\right\vert}}
|
|
||||||
|
|
||||||
\[
|
|
||||||
\sum_{k \in \kappa : \xi_k < T} \varphi(\xi_k, v_k)
|
|
||||||
h_{k,n} = \sum_{\substack{l \in \mathbb{Z} \\ [\delta l, \delta
|
|
||||||
(l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T \}]}}
|
|
||||||
\left(\sum_{\substack{k \in \kappa \\ \xi_k \in
|
|
||||||
[\delta l , \delta(l+1))}} \varphi(\xi_k, v_k)
|
|
||||||
h_{k,n}\right) \approx
|
|
||||||
\]
|
|
||||||
\[
|
|
||||||
\approx \sum_{\substack{l \in \mathbb{Z} \\ [\delta l, \delta
|
|
||||||
(l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T \}]}}
|
|
||||||
\left(\sum_{\substack{k \in \kappa \\ \xi_k \in
|
|
||||||
[\delta l , \delta(l+1))}} \left(\varphi(\delta l, v_k)
|
|
||||||
\frac{1}{n g_\xi (\delta l)} \pm \frac{\varepsilon}{n}\right)
|
|
||||||
\frac{\abs{\left\{m \in \kappa : \xi_m \in [\delta l,
|
|
||||||
\delta(l+1))\right\}}}{\abs{\left\{m \in \kappa : \xi_m
|
|
||||||
\in [\delta l, \delta(l+1))\right\}}}\right)
|
|
||||||
\]
|
|
||||||
\[
|
|
||||||
\approx \sum_{\substack{l \in \mathbb{Z} \\ [\delta l, \delta
|
|
||||||
(l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T \}]}}
|
|
||||||
\left(\frac{\sum_{\substack{k \in \kappa \\ \xi_k \in
|
|
||||||
[\delta l , \delta(l+1))}}\varphi(\delta l,
|
|
||||||
v_k)}{\abs{\left\{m \in \kappa : \xi_m
|
|
||||||
\in [\delta l, \delta(l+1))\right\}}}
|
|
||||||
\frac{\abs{\left\{m \in \kappa : \xi_m \in [\delta l,
|
|
||||||
\delta(l+1))\right\}}}{n g_\xi (\delta l)}\right) \pm \varepsilon
|
|
||||||
\]
|
|
||||||
The amount of kinks in a given interval of length $\delta$ follows a
|
|
||||||
binomial distribution,
|
|
||||||
\[
|
|
||||||
\mathbb{E} \left[\abs{\left\{m \in \kappa : \xi_m \in [\delta l,
|
|
||||||
\delta(l+1))\right\}}\right] = n \int_{\delta
|
|
||||||
l}^{\delta(l+1)}g_\xi (x) dx \approx n (\delta g_\xi(\delta l)
|
|
||||||
\pm \delta \tilde{\varepsilon}),
|
|
||||||
\]
|
|
||||||
for any $\delta \leq \delta(\varepsilon, \tilde{\varepsilon})$, since $g_\xi$ is uniformly continuous on its
|
|
||||||
support by Assumption..
|
|
||||||
As the distribution of $v$ is continuous as well we get
|
|
||||||
$\mathcal{L}(v_k) = \mathcal{L} v| \xi = \delta l) \forall k \in
|
|
||||||
\kappa : \xi_k \in [\delta l, \delta(l+1))$ for $delta \leq
|
|
||||||
\delta(\varepsilon, \tilde{\varepsilon})$. Thus we get with the law of
|
|
||||||
large numbers
|
|
||||||
\begin{align*}
|
|
||||||
&\sum_{k \in \kappa : \xi_k < T} \varphi(\xi_k, v_k)
|
|
||||||
h_{k,n} \approx\\
|
|
||||||
&\approx \sum_{\substack{l \in \mathbb{Z} \\ [\delta l, \delta
|
|
||||||
(l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T
|
|
||||||
\}]}}\left(\mathbb{E}[\phi(\xi, v)|\xi=\delta l]
|
|
||||||
\stackrel{\mathbb{P}}{\pm}\right) \delta \left(1 \pm
|
|
||||||
\frac{\tilde{\varepsilon}}{g_\xi(\delta l)}\right) \pm \varepsilon
|
|
||||||
\\
|
|
||||||
&\approx \left(\sum_{\substack{l \in \mathbb{Z} \\ [\delta
|
|
||||||
l, \delta
|
|
||||||
(l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T
|
|
||||||
\}]}}\mathbb{E}[\phi(\xi, v)|\xi=\delta l] \delta
|
|
||||||
\stackrel{\mathbb{P}}{\pm}\tilde{\tilde{\varepsilon}}
|
|
||||||
\abs{C_{g_\xi}^u - C_{g_\xi}^l}
|
|
||||||
\right)\\
|
|
||||||
&\phantom{\approx}\cdot \left(1 \pm
|
|
||||||
\frac{\tilde{\varepsilon}}{g_\xi(\delta l)}\right) \pm \varepsilon
|
|
||||||
\end{align*}
|
|
||||||
\newpage
|
|
||||||
|
|
||||||
|
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[enlargelimits=false]
|
||||||
|
\addplot [domain=-5:5, samples=101,unbounded coords=jump]{1/(1+exp(-x)};
|
||||||
|
\addplot[domain=-5:5, samples=100]{tanh(x)};
|
||||||
|
\addplot[domain=-5:5, samples=100]{max(0,x)};
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[enlargelimits=false]
|
||||||
|
\addplot[domain=-2*pi:2*pi, samples=100]{cos(deg(x))};
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
|
||||||
\end{document}
|
\end{document}
|
||||||
|
|
||||||
|
@ -4,56 +4,34 @@ legend image code/.code={
|
|||||||
\draw[mark repeat=2,mark phase=2]
|
\draw[mark repeat=2,mark phase=2]
|
||||||
plot coordinates {
|
plot coordinates {
|
||||||
(0cm,0cm)
|
(0cm,0cm)
|
||||||
(0.15cm,0cm) %% default is (0.3cm,0cm)
|
(0.0cm,0cm) %% default is (0.3cm,0cm)
|
||||||
(0.3cm,0cm) %% default is (0.6cm,0cm)
|
(0.0cm,0cm) %% default is (0.6cm,0cm)
|
||||||
};%
|
};%
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
\begin{subfigure}[h]{\textwidth}
|
\begin{subfigure}[h]{\textwidth}
|
||||||
\begin{tikzpicture}
|
\begin{tikzpicture}
|
||||||
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
\begin{axis}[tick style = {draw = none}, width = \textwidth,
|
||||||
/pgf/number format/precision=3},tick style = {draw = none}, width = 0.975\textwidth,
|
height = 0.6\textwidth, ymin = 0.92, legend style={at={(0.9825,0.75)},anchor=north east},
|
||||||
height = 0.6\textwidth, ymin = 0.885, legend style={at={(0.9825,0.0175)},anchor=south east},
|
xlabel = {epoch}, ylabel = {Classification Accuracy}]
|
||||||
xlabel = {Epoch}, ylabel = {Test Accuracy}, cycle
|
|
||||||
list/Dark2, every axis plot/.append style={line width
|
|
||||||
=1.25pt}]
|
|
||||||
% [tick style = {draw = none}, width = \textwidth,
|
|
||||||
% height = 0.6\textwidth, ymin = 0.905, legend style={at={(0.9825,0.75)},anchor=north east},
|
|
||||||
% xlabel = {epoch}, ylabel = {Classification Accuracy}]
|
|
||||||
% \addplot table
|
|
||||||
% [x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
|
||||||
% {Figures/Data/adagrad.log};
|
|
||||||
% \addplot table
|
|
||||||
% [x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
|
||||||
% {Figures/Data/adadelta.log};
|
|
||||||
% \addplot table
|
|
||||||
% [x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
|
||||||
% {Figures/Data/adam.log};
|
|
||||||
\addplot table
|
\addplot table
|
||||||
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
{Figures/Data/Adagrad.mean};
|
{Figures/Data/adagrad.log};
|
||||||
\addplot table
|
\addplot table
|
||||||
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
{Figures/Data/Adadelta.mean};
|
{Figures/Data/adadelta.log};
|
||||||
\addplot table
|
\addplot table
|
||||||
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
{Figures/Data/Adam.mean};
|
{Figures/Data/adam.log};
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/SGD_00.mean};
|
|
||||||
\addplot table
|
|
||||||
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
|
||||||
{Figures/Data/SGD_09.mean};
|
|
||||||
|
|
||||||
\addlegendentry{\footnotesize{\textsc{AdaGrad}}}
|
\addlegendentry{\footnotesize{ADAGRAD}}
|
||||||
\addlegendentry{\footnotesize{\textsc{Adadelta}}}
|
\addlegendentry{\footnotesize{ADADELTA}}
|
||||||
\addlegendentry{\footnotesize{\textsc{Adam}}}
|
\addlegendentry{\footnotesize{ADAM}}
|
||||||
\addlegendentry{\footnotesize{\textsc{Sgd}}}
|
\addlegendentry{SGD$_{0.01}$}
|
||||||
\addlegendentry{\footnotesize{Momentum}}
|
|
||||||
\end{axis}
|
\end{axis}
|
||||||
\end{tikzpicture}
|
\end{tikzpicture}
|
||||||
\caption{Test accuracies during training}
|
%\caption{Classification accuracy}
|
||||||
\vspace{.25cm}
|
\vspace{.25cm}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
% \begin{subfigure}[b]{\textwidth}
|
% \begin{subfigure}[b]{\textwidth}
|
||||||
@ -80,27 +58,18 @@ plot coordinates {
|
|||||||
% \vspace{.25cm}
|
% \vspace{.25cm}
|
||||||
% \end{subfigure}
|
% \end{subfigure}
|
||||||
\begin{subfigure}[b]{1.0\linewidth}
|
\begin{subfigure}[b]{1.0\linewidth}
|
||||||
\begin{tabu} to \textwidth {@{}l*5{X[c]}@{}}
|
\begin{tabu} to \textwidth {@{} *3{X[c]}c*3{X[c]} @{}}
|
||||||
\Tstrut \Bstrut &\textsc{AdaGrad}& \textsc{AdaDelta}&
|
\multicolumn{3}{c}{Classification Accuracy}
|
||||||
\textsc{Adam} & \textsc{Sgd} & Momentum \\
|
&~&\multicolumn{3}{c}{Error Measure}
|
||||||
\hline
|
\\\cline{1-3}\cline{5-7}
|
||||||
\Tstrut Accuracy &0.9870 & 0.9562 & 0.9925 & 0.9866 & 0.9923 \\
|
ADAGRAD&ADADELTA&ADAM&&ADAGRAD&ADADELTA&ADAM
|
||||||
\Tstrut Loss &0.0404 & 0.1447 & 0.0999 & 0.0403 & 0.0246 \\
|
\\\cline{1-3}\cline{5-7}
|
||||||
|
1&1&1&&1&1&1
|
||||||
\end{tabu}
|
\end{tabu}
|
||||||
% \begin{tabu} to \textwidth {@{} *3{X[c]}c*3{X[c]} @{}}
|
\caption{Performace metrics after 20 epochs}
|
||||||
% \multicolumn{3}{c}{Classification Accuracy}
|
|
||||||
% &~&\multicolumn{3}{c}{Error Measure}
|
|
||||||
% \\\cline{1-3}\cline{5-7}
|
|
||||||
% \textsc{AdaGad}&\textsc{AdaDelta}&\textsc{Adam}&&\textsc{AdaGrad}&\textsc{AdaDelta}&\textsc{Adam}
|
|
||||||
% \\\cline{1-3}\cline{5-7}
|
|
||||||
% 1&1&1&&1&1&1
|
|
||||||
% \end{tabu}
|
|
||||||
\caption{Performace metrics after 50 epochs}
|
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\caption[Performance Comparison of Training Algorithms]{
|
\caption[Performance comparison of training algorithms]{Classification accuracy on the test set and ...Performance metrics of the network given in ... trained
|
||||||
Average performance metrics of the neural network given in
|
with different optimization algorithms}
|
||||||
Figure~\ref{fig:mnist_architecture} trained 5 times for 50 epochs
|
|
||||||
using different optimization algorithms.}
|
|
||||||
\label{fig:comp_alg}
|
\label{fig:comp_alg}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
%%% Local Variables:
|
%%% Local Variables:
|
||||||
|
@ -14,7 +14,6 @@
|
|||||||
\end{adjustbox}
|
\end{adjustbox}
|
||||||
\caption{True position (\textcolor{red}{red}), distorted position data (black)}
|
\caption{True position (\textcolor{red}{red}), distorted position data (black)}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\hfill
|
|
||||||
\begin{subfigure}[b]{0.49\textwidth}
|
\begin{subfigure}[b]{0.49\textwidth}
|
||||||
\centering
|
\centering
|
||||||
\begin{adjustbox}{width=\textwidth, height=0.25\textheight}
|
\begin{adjustbox}{width=\textwidth, height=0.25\textheight}
|
||||||
@ -29,11 +28,11 @@
|
|||||||
\end{adjustbox}
|
\end{adjustbox}
|
||||||
\caption{True position (\textcolor{red}{red}), filtered position data (black)}
|
\caption{True position (\textcolor{red}{red}), filtered position data (black)}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\caption[Signal Smoothing Using Convolution]{Example for noise reduction using convolution with simulated
|
\caption[Signal smoothing using convolution]{Example for noise reduction using convolution with simulated
|
||||||
positional data. As filter
|
positional data. As filter
|
||||||
$g(i)=\left(\nicefrac{1}{3},\nicefrac{1}{4},\nicefrac{1}{5},\nicefrac{1}{6},\nicefrac{1}{20}\right)_{(i-1)}$
|
$g(i)=\left(\nicefrac{1}{3},\nicefrac{1}{4},\nicefrac{1}{5},\nicefrac{1}{6},\nicefrac{1}{20}\right)_{(i-1)}$
|
||||||
is chosen and applied to the $x$ and $y$ coordinate
|
is chosen and applied to the $x$ and $y$ coordinate
|
||||||
data separately. The convolution of both signals with $g$
|
data seperately. The convolution of both signals with $g$
|
||||||
improves the MSE of the positions from 0.196 to 0.170 and
|
improves the MSE of the positions from 0.196 to 0.170 and
|
||||||
visibly smoothes the data.
|
visibly smoothes the data.
|
||||||
}
|
}
|
||||||
|
@ -11,306 +11,165 @@
|
|||||||
\definecolor{ipython_red}{RGB}{186, 33, 33}
|
\definecolor{ipython_red}{RGB}{186, 33, 33}
|
||||||
\definecolor{ipython_green}{RGB}{0, 128, 0}
|
\definecolor{ipython_green}{RGB}{0, 128, 0}
|
||||||
\definecolor{ipython_cyan}{RGB}{64, 128, 128}
|
\definecolor{ipython_cyan}{RGB}{64, 128, 128}
|
||||||
\definecolor{ipython_purple}{RGB}{110, 64, 130}
|
\definecolor{ipython_purple}{RGB}{170, 34, 255}
|
||||||
|
|
||||||
\usepackage{listings}
|
\usepackage{listings}
|
||||||
\usepackage{float}
|
\lstset{
|
||||||
|
breaklines=true,
|
||||||
|
%
|
||||||
|
extendedchars=true,
|
||||||
|
literate=
|
||||||
|
{á}{{\'a}}1 {é}{{\'e}}1 {í}{{\'i}}1 {ó}{{\'o}}1 {ú}{{\'u}}1
|
||||||
|
{Á}{{\'A}}1 {É}{{\'E}}1 {Í}{{\'I}}1 {Ó}{{\'O}}1 {Ú}{{\'U}}1
|
||||||
|
{à}{{\`a}}1 {è}{{\`e}}1 {ì}{{\`i}}1 {ò}{{\`o}}1 {ù}{{\`u}}1
|
||||||
|
{À}{{\`A}}1 {È}{{\'E}}1 {Ì}{{\`I}}1 {Ò}{{\`O}}1 {Ù}{{\`U}}1
|
||||||
|
{ä}{{\"a}}1 {ë}{{\"e}}1 {ï}{{\"i}}1 {ö}{{\"o}}1 {ü}{{\"u}}1
|
||||||
|
{Ä}{{\"A}}1 {Ë}{{\"E}}1 {Ï}{{\"I}}1 {Ö}{{\"O}}1 {Ü}{{\"U}}1
|
||||||
|
{â}{{\^a}}1 {ê}{{\^e}}1 {î}{{\^i}}1 {ô}{{\^o}}1 {û}{{\^u}}1
|
||||||
|
{Â}{{\^A}}1 {Ê}{{\^E}}1 {Î}{{\^I}}1 {Ô}{{\^O}}1 {Û}{{\^U}}1
|
||||||
|
{œ}{{\oe}}1 {Œ}{{\OE}}1 {æ}{{\ae}}1 {Æ}{{\AE}}1 {ß}{{\ss}}1
|
||||||
|
{ç}{{\c c}}1 {Ç}{{\c C}}1 {ø}{{\o}}1 {å}{{\r a}}1 {Å}{{\r A}}1
|
||||||
|
{€}{{\EUR}}1 {£}{{\pounds}}1
|
||||||
|
}
|
||||||
|
|
||||||
\usepackage{amsmath,amssymb}
|
|
||||||
|
|
||||||
% \newfloat{lstfloat}{htbp}{lop}
|
|
||||||
% \floatname{lstfloat}{Listing}
|
|
||||||
% \def\lstfloatautorefname{Listing}
|
|
||||||
|
|
||||||
% \lstset{
|
|
||||||
% breaklines=true,
|
|
||||||
%%
|
%%
|
||||||
|
%% Python definition (c) 1998 Michael Weber
|
||||||
|
%% Additional definitions (2013) Alexis Dimitriadis
|
||||||
|
%% modified by me (should not have empty lines)
|
||||||
|
%%
|
||||||
|
\lstdefinelanguage{iPython}{
|
||||||
|
morekeywords={access,and,break,class,continue,def,del,elif,else,except,exec,finally,for,from,global,if,import,in,is,lambda,not,or,pass,print,raise,return,try,while},%
|
||||||
|
%
|
||||||
|
% Built-ins
|
||||||
|
morekeywords=[2]{abs,all,any,basestring,bin,bool,bytearray,callable,chr,classmethod,cmp,compile,complex,delattr,dict,dir,divmod,enumerate,eval,execfile,file,filter,float,format,frozenset,getattr,globals,hasattr,hash,help,hex,id,input,int,isinstance,issubclass,iter,len,list,locals,long,map,max,memoryview,min,next,object,oct,open,ord,pow,property,range,raw_input,reduce,reload,repr,reversed,round,set,setattr,slice,sorted,staticmethod,str,sum,super,tuple,type,unichr,unicode,vars,xrange,zip,apply,buffer,coerce,intern},%
|
||||||
|
%
|
||||||
|
sensitive=true,%
|
||||||
|
morecomment=[l]\#,%
|
||||||
|
morestring=[b]',%
|
||||||
|
morestring=[b]",%
|
||||||
|
%
|
||||||
|
morestring=[s]{'''}{'''},% used for documentation text (mulitiline strings)
|
||||||
|
morestring=[s]{"""}{"""},% added by Philipp Matthias Hahn
|
||||||
|
%
|
||||||
|
morestring=[s]{r'}{'},% `raw' strings
|
||||||
|
morestring=[s]{r"}{"},%
|
||||||
|
morestring=[s]{r'''}{'''},%
|
||||||
|
morestring=[s]{r"""}{"""},%
|
||||||
|
morestring=[s]{u'}{'},% unicode strings
|
||||||
|
morestring=[s]{u"}{"},%
|
||||||
|
morestring=[s]{u'''}{'''},%
|
||||||
|
morestring=[s]{u"""}{"""},%
|
||||||
|
%
|
||||||
|
% {replace}{replacement}{lenght of replace}
|
||||||
|
% *{-}{-}{1} will not replace in comments and so on
|
||||||
|
literate=
|
||||||
|
{á}{{\'a}}1 {é}{{\'e}}1 {í}{{\'i}}1 {ó}{{\'o}}1 {ú}{{\'u}}1
|
||||||
|
{Á}{{\'A}}1 {É}{{\'E}}1 {Í}{{\'I}}1 {Ó}{{\'O}}1 {Ú}{{\'U}}1
|
||||||
|
{à}{{\`a}}1 {è}{{\`e}}1 {ì}{{\`i}}1 {ò}{{\`o}}1 {ù}{{\`u}}1
|
||||||
|
{À}{{\`A}}1 {È}{{\'E}}1 {Ì}{{\`I}}1 {Ò}{{\`O}}1 {Ù}{{\`U}}1
|
||||||
|
{ä}{{\"a}}1 {ë}{{\"e}}1 {ï}{{\"i}}1 {ö}{{\"o}}1 {ü}{{\"u}}1
|
||||||
|
{Ä}{{\"A}}1 {Ë}{{\"E}}1 {Ï}{{\"I}}1 {Ö}{{\"O}}1 {Ü}{{\"U}}1
|
||||||
|
{â}{{\^a}}1 {ê}{{\^e}}1 {î}{{\^i}}1 {ô}{{\^o}}1 {û}{{\^u}}1
|
||||||
|
{Â}{{\^A}}1 {Ê}{{\^E}}1 {Î}{{\^I}}1 {Ô}{{\^O}}1 {Û}{{\^U}}1
|
||||||
|
{œ}{{\oe}}1 {Œ}{{\OE}}1 {æ}{{\ae}}1 {Æ}{{\AE}}1 {ß}{{\ss}}1
|
||||||
|
{ç}{{\c c}}1 {Ç}{{\c C}}1 {ø}{{\o}}1 {å}{{\r a}}1 {Å}{{\r A}}1
|
||||||
|
{€}{{\EUR}}1 {£}{{\pounds}}1
|
||||||
|
%
|
||||||
|
{^}{{{\color{ipython_purple}\^{}}}}1
|
||||||
|
{=}{{{\color{ipython_purple}=}}}1
|
||||||
|
%
|
||||||
|
{+}{{{\color{ipython_purple}+}}}1
|
||||||
|
{*}{{{\color{ipython_purple}$^\ast$}}}1
|
||||||
|
{/}{{{\color{ipython_purple}/}}}1
|
||||||
|
%
|
||||||
|
{+=}{{{+=}}}1
|
||||||
|
{-=}{{{-=}}}1
|
||||||
|
{*=}{{{$^\ast$=}}}1
|
||||||
|
{/=}{{{/=}}}1,
|
||||||
|
literate=
|
||||||
|
*{-}{{{\color{ipython_purple}-}}}1
|
||||||
|
{?}{{{\color{ipython_purple}?}}}1,
|
||||||
|
%
|
||||||
|
identifierstyle=\color{black}\ttfamily,
|
||||||
|
commentstyle=\color{ipython_cyan}\ttfamily,
|
||||||
|
stringstyle=\color{ipython_red}\ttfamily,
|
||||||
|
keepspaces=true,
|
||||||
|
showspaces=false,
|
||||||
|
showstringspaces=false,
|
||||||
|
%
|
||||||
|
rulecolor=\color{ipython_frame},
|
||||||
|
frame=single,
|
||||||
|
frameround={t}{t}{t}{t},
|
||||||
|
framexleftmargin=6mm,
|
||||||
|
numbers=left,
|
||||||
|
numberstyle=\tiny\color{halfgray},
|
||||||
|
%
|
||||||
|
%
|
||||||
|
backgroundcolor=\color{ipython_bg},
|
||||||
% extendedchars=true,
|
% extendedchars=true,
|
||||||
% literate=
|
basicstyle=\scriptsize,
|
||||||
% {á}{{\'a}}1 {é}{{\'e}}1 {í}{{\'i}}1 {ó}{{\'o}}1 {ú}{{\'u}}1
|
keywordstyle=\color{ipython_green}\ttfamily,
|
||||||
% {Á}{{\'A}}1 {É}{{\'E}}1 {Í}{{\'I}}1 {Ó}{{\'O}}1 {Ú}{{\'U}}1
|
}
|
||||||
% {à}{{\`a}}1 {è}{{\`e}}1 {ì}{{\`i}}1 {ò}{{\`o}}1 {ù}{{\`u}}1
|
|
||||||
% {À}{{\`A}}1 {È}{{\'E}}1 {Ì}{{\`I}}1 {Ò}{{\`O}}1 {Ù}{{\`U}}1
|
|
||||||
% {ä}{{\"a}}1 {ë}{{\"e}}1 {ï}{{\"i}}1 {ö}{{\"o}}1 {ü}{{\"u}}1
|
|
||||||
% {Ä}{{\"A}}1 {Ë}{{\"E}}1 {Ï}{{\"I}}1 {Ö}{{\"O}}1 {Ü}{{\"U}}1
|
|
||||||
% {â}{{\^a}}1 {ê}{{\^e}}1 {î}{{\^i}}1 {ô}{{\^o}}1 {û}{{\^u}}1
|
|
||||||
% {Â}{{\^A}}1 {Ê}{{\^E}}1 {Î}{{\^I}}1 {Ô}{{\^O}}1 {Û}{{\^U}}1
|
|
||||||
% {œ}{{\oe}}1 {Œ}{{\OE}}1 {æ}{{\ae}}1 {Æ}{{\AE}}1 {ß}{{\ss}}1
|
|
||||||
% {ç}{{\c c}}1 {Ç}{{\c C}}1 {ø}{{\o}}1 {å}{{\r a}}1 {Å}{{\r A}}1
|
|
||||||
% {€}{{\EUR}}1 {£}{{\pounds}}1
|
|
||||||
% }
|
|
||||||
|
|
||||||
% %%
|
|
||||||
% %% Python definition (c) 1998 Michael Weber
|
|
||||||
% %% Additional definitions (2013) Alexis Dimitriadis
|
|
||||||
% %% modified by me (should not have empty lines)
|
|
||||||
% %%
|
|
||||||
% \lstdefinelanguage{iPython}{
|
|
||||||
% morekeywords={access,and,break,class,continue,def,del,elif,else,except,exec,finally,for,from,global,if,import,
|
|
||||||
% in,is,lambda,not,or,pass,print,raise,return,try,while},%
|
|
||||||
% %
|
|
||||||
% % Built-ins
|
|
||||||
% morekeywords=[2]{abs,all,any,basestring,bin,bool,bytearray,callable,chr,classmethod,cmp,compile,complex,delattr,dict,dir,divmod,enumerate,eval,execfile,file,filter,float,format,frozenset,getattr,globals,hasattr,hash,help,hex,id,input,int,isinstance,issubclass,iter,len,list,locals,long,map,max,memoryview,min,next,object,oct,open,ord,pow,property,range,raw_input,reduce,reload,repr,reversed,round,set,setattr,slice,sorted,staticmethod,str,sum,super,tuple,type,unichr,unicode,vars,xrange,zip,apply,buffer,coerce,intern,val},%
|
|
||||||
% %
|
|
||||||
% sensitive=true,%
|
|
||||||
% morecomment=[l]\#,%
|
|
||||||
% morestring=[b]',%
|
|
||||||
% morestring=[b]",%
|
|
||||||
% %
|
|
||||||
% morestring=[s]{'''}{'''},% used for documentation text (mulitiline strings)
|
|
||||||
% morestring=[s]{"""}{"""},% added by Philipp Matthias Hahn
|
|
||||||
% %
|
|
||||||
% morestring=[s]{r'}{'},% `raw' strings
|
|
||||||
% morestring=[s]{r"}{"},%
|
|
||||||
% morestring=[s]{r'''}{'''},%
|
|
||||||
% morestring=[s]{r"""}{"""},%
|
|
||||||
% morestring=[s]{u'}{'},% unicode strings
|
|
||||||
% morestring=[s]{u"}{"},%
|
|
||||||
% morestring=[s]{u'''}{'''},%
|
|
||||||
% morestring=[s]{u"""}{"""},%
|
|
||||||
% %
|
|
||||||
% % {replace}{replacement}{lenght of replace}
|
|
||||||
% % *{-}{-}{1} will not replace in comments and so on
|
|
||||||
% literate=
|
|
||||||
% {á}{{\'a}}1 {é}{{\'e}}1 {í}{{\'i}}1 {ó}{{\'o}}1 {ú}{{\'u}}1
|
|
||||||
% {Á}{{\'A}}1 {É}{{\'E}}1 {Í}{{\'I}}1 {Ó}{{\'O}}1 {Ú}{{\'U}}1
|
|
||||||
% {à}{{\`a}}1 {è}{{\`e}}1 {ì}{{\`i}}1 {ò}{{\`o}}1 {ù}{{\`u}}1
|
|
||||||
% {À}{{\`A}}1 {È}{{\'E}}1 {Ì}{{\`I}}1 {Ò}{{\`O}}1 {Ù}{{\`U}}1
|
|
||||||
% {ä}{{\"a}}1 {ë}{{\"e}}1 {ï}{{\"i}}1 {ö}{{\"o}}1 {ü}{{\"u}}1
|
|
||||||
% {Ä}{{\"A}}1 {Ë}{{\"E}}1 {Ï}{{\"I}}1 {Ö}{{\"O}}1 {Ü}{{\"U}}1
|
|
||||||
% {â}{{\^a}}1 {ê}{{\^e}}1 {î}{{\^i}}1 {ô}{{\^o}}1 {û}{{\^u}}1
|
|
||||||
% {Â}{{\^A}}1 {Ê}{{\^E}}1 {Î}{{\^I}}1 {Ô}{{\^O}}1 {Û}{{\^U}}1
|
|
||||||
% {œ}{{\oe}}1 {Œ}{{\OE}}1 {æ}{{\ae}}1 {Æ}{{\AE}}1 {ß}{{\ss}}1
|
|
||||||
% {ç}{{\c c}}1 {Ç}{{\c C}}1 {ø}{{\o}}1 {å}{{\r a}}1 {Å}{{\r A}}1
|
|
||||||
% {€}{{\EUR}}1 {£}{{\pounds}}1
|
|
||||||
% %
|
|
||||||
% {^}{{{\color{ipython_purple}\^{}}}}1
|
|
||||||
% {=}{{{\color{ipython_purple}=}}}1
|
|
||||||
% %
|
|
||||||
% {+}{{{\color{ipython_purple}+}}}1
|
|
||||||
% {*}{{{\color{ipython_purple}$^\ast$}}}1
|
|
||||||
% {/}{{{\color{ipython_purple}/}}}1
|
|
||||||
% %
|
|
||||||
% {+=}{{{+=}}}1
|
|
||||||
% {-=}{{{-=}}}1
|
|
||||||
% {*=}{{{$^\ast$=}}}1
|
|
||||||
% {/=}{{{/=}}}1,
|
|
||||||
% literate=
|
|
||||||
% *{-}{{{\color{ipython_purple}-}}}1
|
|
||||||
% {?}{{{\color{ipython_purple}?}}}1,
|
|
||||||
% %
|
|
||||||
% identifierstyle=\color{black}\ttfamily,
|
|
||||||
% commentstyle=\color{ipython_red}\ttfamily,
|
|
||||||
% stringstyle=\color{ipython_red}\ttfamily,
|
|
||||||
% keepspaces=true,
|
|
||||||
% showspaces=false,
|
|
||||||
% showstringspaces=false,
|
|
||||||
% %
|
|
||||||
% rulecolor=\color{ipython_frame},
|
|
||||||
% frame=single,
|
|
||||||
% frameround={t}{t}{t}{t},
|
|
||||||
% framexleftmargin=6mm,
|
|
||||||
% numbers=left,
|
|
||||||
% numberstyle=\tiny\color{halfgray},
|
|
||||||
% %
|
|
||||||
% %
|
|
||||||
% backgroundcolor=\color{ipython_bg},
|
|
||||||
% % extendedchars=true,
|
|
||||||
% basicstyle=\scriptsize,
|
|
||||||
% keywordstyle=\color{ipython_green}\ttfamily,
|
|
||||||
% morekeywords = [3]{Int, Double},
|
|
||||||
% morekeywords = [2]{foldRight, case},
|
|
||||||
% keywordstyle = [3]{\color{ipython_purple}\ttfamily},
|
|
||||||
% keywordstyle = [2]{\color{ipython_cyan}\ttfamily},
|
|
||||||
% }
|
|
||||||
|
|
||||||
|
|
||||||
% \begin{document}
|
|
||||||
|
|
||||||
% \begin{lstfloat}
|
|
||||||
% \begin{lstlisting}[language=iPython]
|
|
||||||
% import breeze.stats.distributions.Uniform
|
|
||||||
% import breeze.stats.distributions.Gaussian
|
|
||||||
% import scala.language.postfixOps
|
|
||||||
|
|
||||||
% object Activation {
|
|
||||||
% def apply(x: Double): Double = math.max(0, x)
|
|
||||||
|
|
||||||
% def d(x: Double): Double = if (x > 0) 1 else 0
|
|
||||||
% }
|
|
||||||
|
|
||||||
% class RSNN(val n: Int, val gamma: Double = 0.001) {
|
|
||||||
% val g_unif = Uniform(-10, 10)
|
|
||||||
% val g_gauss = Gaussian(0, 5)
|
|
||||||
|
|
||||||
% val xis = g_unif.sample(n)
|
|
||||||
% val vs = g_gauss.sample(n)
|
|
||||||
% val bs = xis zip vs map {case(xi, v) => xi * v}
|
|
||||||
|
|
||||||
% def computeL1(x: Double) = (bs zip vs) map {
|
|
||||||
% case (b, v) => Activation(b + v * x) }
|
|
||||||
|
|
||||||
% def computeL2(l1: Seq[Double], ws: Seq[Double]): Double =
|
|
||||||
% (l1 zip ws) map { case (l, w) => w * l } sum
|
|
||||||
|
|
||||||
% def output(ws: Seq[Double])(x: Double): Double =
|
|
||||||
% computeL2(computeL1(x), ws)
|
|
||||||
|
|
||||||
% def learn(data: Seq[(Double, Double)], ws: Seq[Double],
|
|
||||||
% lamb: Double, gamma: Double): Seq[Double] = {
|
|
||||||
|
|
||||||
% lazy val deltas = data.map {
|
|
||||||
% case (x, y) =>
|
|
||||||
% val l1 = computeL1(x) // n
|
|
||||||
% val out = computeL2(l1, ws) // 1
|
|
||||||
% (l1 zip ws) map {case (l1, w) => (l1 * 2 * (out - y) +
|
|
||||||
% lam * 2 * w) * gamma * -1}
|
|
||||||
% }
|
|
||||||
|
|
||||||
% deltas.foldRight(ws)(
|
|
||||||
% (delta, ws) => ws zip (delta) map { case (w, d) => w + d })
|
|
||||||
% }
|
|
||||||
|
|
||||||
% def train(data: Seq[(Double, Double)], iter: Int, lam: Double,
|
|
||||||
% gamma: Double = gamma): (Seq[Double], Double => Double)= {
|
|
||||||
|
|
||||||
% val ws = (1 to iter).foldRight((1 to n).map(
|
|
||||||
% _ => 0.0) :Seq[Double])((i, w) => {
|
|
||||||
% println(s"Training iteration $i")
|
|
||||||
% println(w.sum/w.length)
|
|
||||||
% learn(data, w, lam, gamma / 10)
|
|
||||||
% })
|
|
||||||
% (ws, output(ws))
|
|
||||||
% }
|
|
||||||
% }
|
|
||||||
% \end{lstlisting}
|
|
||||||
% \caption{Scala code used to build and train the ridge penalized
|
|
||||||
% randomized shallow neural network in .... The parameter \textit{lam}
|
|
||||||
% in the train function represents the $\lambda$ parameter in the error
|
|
||||||
% function. The parameters \textit{n} and \textit{gamma} set the number
|
|
||||||
% of hidden nodes and the stepsize for training.}
|
|
||||||
% \end{lstfloat}
|
|
||||||
% \clearpage
|
|
||||||
|
|
||||||
% \begin{lstlisting}[language=iPython]
|
|
||||||
% import tensorflow as tf
|
|
||||||
% import numpy as np
|
|
||||||
% from tensorflow.keras.callbacks import CSVLogger
|
|
||||||
% from tensorflow.keras.preprocessing.image import ImageDataGenerator
|
|
||||||
|
|
||||||
% mnist = tf.keras.datasets.mnist
|
|
||||||
|
|
||||||
% (x_train, y_train), (x_test, y_test) = mnist.load_data()
|
|
||||||
% x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
|
|
||||||
% x_train = x_train / 255.0
|
|
||||||
% x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
|
|
||||||
% x_test = x_test / 255.0
|
|
||||||
|
|
||||||
% y_train = tf.keras.utils.to_categorical(y_train)
|
|
||||||
% y_test = tf.keras.utils.to_categorical(y_test)
|
|
||||||
|
|
||||||
% model = tf.keras.models.Sequential()
|
|
||||||
% model.add(tf.keras.layers.Conv2D(24,kernel_size=5,padding='same',activation='relu',input_shape=(28,28,1)))
|
|
||||||
% model.add(tf.keras.layers.MaxPool2D())
|
|
||||||
% model.add(tf.keras.layers.Conv2D(64,kernel_size=5,padding='same',activation='relu'))
|
|
||||||
% model.add(tf.keras.layers.MaxPool2D(padding='same'))
|
|
||||||
% model.add(tf.keras.layers.Flatten())
|
|
||||||
% model.add(tf.keras.layers.Dense(256, activation='relu'))
|
|
||||||
% model.add(tf.keras.layers.Dropout(0.2))
|
|
||||||
% model.add(tf.keras.layers.Dense(10, activation='softmax'))
|
|
||||||
% model.compile(optimizer='adam', loss="categorical_crossentropy",
|
|
||||||
% metrics=["accuracy"])
|
|
||||||
|
|
||||||
% datagen = ImageDataGenerator(
|
|
||||||
% rotation_range = 30,
|
|
||||||
% zoom_range = 0.15,
|
|
||||||
% width_shift_range=2,
|
|
||||||
% height_shift_range=2,
|
|
||||||
% shear_range = 1)
|
|
||||||
|
|
||||||
% csv_logger = CSVLogger(<Target File>)
|
|
||||||
|
|
||||||
% history = model.fit(datagen.flow(x_train, y_train, batch_size=50),
|
|
||||||
% validation_data=(x_test, y_test),
|
|
||||||
% epochs=125, callbacks=[csv_logger],
|
|
||||||
% steps_per_epoch = x_train.shape[0]//50)
|
|
||||||
|
|
||||||
% \end{lstlisting}
|
|
||||||
% \clearpage
|
|
||||||
% \begin{lstlisting}[language=iPython]
|
|
||||||
% import tensorflow as tf
|
|
||||||
% import numpy as np
|
|
||||||
% from tensorflow.keras.callbacks import CSVLogger
|
|
||||||
% from tensorflow.keras.preprocessing.image import ImageDataGenerator
|
|
||||||
% mnist = tf.keras.datasets.fashion_mnist
|
|
||||||
|
|
||||||
% (x_train, y_train), (x_test, y_test) = mnist.load_data()
|
|
||||||
% x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
|
|
||||||
% x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
|
|
||||||
% x_train, x_test = x_train / 255.0, x_test / 255.0
|
|
||||||
|
|
||||||
% y_train = tf.keras.utils.to_categorical(y_train)
|
|
||||||
% y_test = tf.keras.utils.to_categorical(y_test)
|
|
||||||
|
|
||||||
% model = tf.keras.Sequential()
|
|
||||||
% model.add(tf.keras.layers.Conv2D(filters = 32, kernel_size = (3, 3), activation='relu',
|
|
||||||
% input_shape = (28, 28, 1), padding='same'))
|
|
||||||
% model.add(tf.keras.layers.Conv2D(filters = 32, kernel_size = (2, 2), activation='relu', padding = 'same'))
|
|
||||||
% model.add(tf.keras.layers.MaxPool2D(strides=(2,2)))
|
|
||||||
% model.add(tf.keras.layers.Conv2D(filters = 64, kernel_size = (3, 3), activation='relu', padding='same'))
|
|
||||||
% model.add(tf.keras.layers.Conv2D(filters = 64, kernel_size = (3, 3), activation='relu', padding='same'))
|
|
||||||
% model.add(tf.keras.layers.MaxPool2D(strides=(2,2)))
|
|
||||||
% model.add(tf.keras.layers.Flatten())
|
|
||||||
% model.add(tf.keras.layers.Dense(256, activation='relu'))
|
|
||||||
% model.add(tf.keras.layers.Dropout(0.2))
|
|
||||||
% model.add(tf.keras.layers.Dense(10, activation='softmax'))
|
|
||||||
|
|
||||||
% model.compile(optimizer=tf.keras.optimizers.Adam(lr = 1e-3), loss="categorical_crossentropy", metrics=["accuracy"])
|
|
||||||
|
|
||||||
% datagen = ImageDataGenerator(
|
|
||||||
% rotation_range = 15,
|
|
||||||
% zoom_range = 0.1,
|
|
||||||
% width_shift_range=2,
|
|
||||||
% height_shift_range=2,
|
|
||||||
% shear_range = 0.5,
|
|
||||||
% fill_mode = 'constant',
|
|
||||||
% cval = 0)
|
|
||||||
|
|
||||||
% csv_logger = CSVLogger(<Target File>)
|
|
||||||
|
|
||||||
% history = model.fit(datagen.flow(x_train, y_train, batch_size=30),
|
|
||||||
% steps_per_epoch=2000,
|
|
||||||
% validation_data=(x_test, y_test),
|
|
||||||
% epochs=125, callbacks=[csv_logger],
|
|
||||||
% shuffle=True)
|
|
||||||
|
|
||||||
% \end{lstlisting}
|
|
||||||
% \begin{lstlisting}[language=iPython]
|
|
||||||
% def get_random_sample(a, b, number_of_samples=10):
|
|
||||||
% x = []
|
|
||||||
% y = []
|
|
||||||
% for category_number in range(0,10):
|
|
||||||
% # get all samples of a category
|
|
||||||
% train_data_category = a[b==category_number]
|
|
||||||
% # pick a number of random samples from the category
|
|
||||||
% train_data_category = train_data_category[np.random.randint(
|
|
||||||
% train_data_category.shape[0], size=number_of_samples), :]
|
|
||||||
% x.extend(train_data_category)
|
|
||||||
% y.append([category_number]*number_of_samples)
|
|
||||||
|
|
||||||
% return (np.asarray(x).reshape(-1, 28, 28, 1),
|
|
||||||
% np.asarray(y).reshape(10*number_of_samples,1))
|
|
||||||
% \end{lstlisting}
|
|
||||||
\begin{document}
|
\begin{document}
|
||||||
|
\begin{lstlisting}[language=iPython]
|
||||||
|
import tensorflow as tf
|
||||||
|
import numpy as np
|
||||||
|
from tensorflow.keras.callbacks import CSVLogger
|
||||||
|
from tensorflow.keras.preprocessing.image import ImageDataGenerator
|
||||||
|
|
||||||
\begin{align}
|
mnist = tf.keras.datasets.mnist
|
||||||
\makebox[2cm][c]{$\overset{\text{Lem. A.6}}{\underset{\delta \text{
|
|
||||||
small enough}}{=}} $}
|
|
||||||
\end{align}
|
|
||||||
|
|
||||||
|
(x_train, y_train), (x_test, y_test) = mnist.load_data()
|
||||||
|
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
|
||||||
|
x_train = x_train / 255.0
|
||||||
|
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
|
||||||
|
x_test = x_test / 255.0
|
||||||
|
|
||||||
|
y_train = tf.keras.utils.to_categorical(y_train)
|
||||||
|
y_test = tf.keras.utils.to_categorical(y_test)
|
||||||
|
|
||||||
|
model = tf.keras.models.Sequential()
|
||||||
|
model.add(tf.keras.layers.Conv2D(24,kernel_size=5,padding='same',activation='relu',input_shape=(28,28,1)))
|
||||||
|
model.add(tf.keras.layers.MaxPool2D())
|
||||||
|
model.add(tf.keras.layers.Conv2D(64,kernel_size=5,padding='same',activation='relu'))
|
||||||
|
model.add(tf.keras.layers.MaxPool2D(padding='same'))
|
||||||
|
model.add(tf.keras.layers.Flatten())
|
||||||
|
model.add(tf.keras.layers.Dense(256, activation='relu'))
|
||||||
|
model.add(tf.keras.layers.Dropout(j))
|
||||||
|
model.add(tf.keras.layers.Dense(10, activation='softmax'))
|
||||||
|
model.compile(optimizer='adam', loss="categorical_crossentropy",
|
||||||
|
metrics=["accuracy"])
|
||||||
|
|
||||||
|
datagen = ImageDataGenerator(
|
||||||
|
rotation_range = 30,
|
||||||
|
zoom_range = 0.15,
|
||||||
|
width_shift_range=2,
|
||||||
|
height_shift_range=2,
|
||||||
|
shear_range = 1)
|
||||||
|
|
||||||
|
csv_logger = CSVLogger(<Target File>)
|
||||||
|
|
||||||
|
history = model.fit(datagen.flow(x_train_, y_train_, batch_size=50),
|
||||||
|
validation_data=(x_test, y_test), epochs=125,
|
||||||
|
callbacks=[csv_logger],
|
||||||
|
steps_per_epoch = x_train_.shape[0]//50)
|
||||||
|
|
||||||
|
\end{lstlisting}
|
||||||
|
\begin{lstlisting}[language=iPython]
|
||||||
|
def get_random_sample(a, b, number_of_samples=10):
|
||||||
|
x = []
|
||||||
|
y = []
|
||||||
|
for category_number in range(0,10):
|
||||||
|
# get all samples of a category
|
||||||
|
train_data_category = a[b==category_number]
|
||||||
|
# pick a number of random samples from the category
|
||||||
|
train_data_category = train_data_category[np.random.randint(
|
||||||
|
train_data_category.shape[0], size=number_of_samples), :]
|
||||||
|
x.extend(train_data_category)
|
||||||
|
y.append([category_number]*number_of_samples)
|
||||||
|
|
||||||
|
return (np.asarray(x).reshape(-1, 28, 28, 1),
|
||||||
|
np.asarray(y).reshape(10*number_of_samples,1))
|
||||||
|
\end{lstlisting}
|
||||||
\end{document}
|
\end{document}
|
||||||
|
|
||||||
%%% Local Variables:
|
|
||||||
%%% mode: latex
|
|
||||||
%%% TeX-master: t
|
|
||||||
%%% End:
|
|
||||||
|
17
TeX/Plots/Data/data_sin_d_t.csv
Executable file
17
TeX/Plots/Data/data_sin_d_t.csv
Executable file
@ -0,0 +1,17 @@
|
|||||||
|
x,y
|
||||||
|
-3.141592653589793,0.0802212608585366
|
||||||
|
-2.722713633111154,-0.3759376368887911
|
||||||
|
-2.303834612632515,-1.3264180339054117
|
||||||
|
-1.8849555921538759,-0.8971334213504949
|
||||||
|
-1.4660765716752369,-0.7724344034354425
|
||||||
|
-1.0471975511965979,-0.9501497164520739
|
||||||
|
-0.6283185307179586,-0.6224628757084738
|
||||||
|
-0.2094395102393194,-0.35622668982623207
|
||||||
|
0.2094395102393194,-0.18377660088356823
|
||||||
|
0.6283185307179586,0.7836770998126841
|
||||||
|
1.0471975511965974,0.5874762732054489
|
||||||
|
1.4660765716752362,1.0696991264956026
|
||||||
|
1.8849555921538759,1.1297065441952743
|
||||||
|
2.3038346126325155,0.7587275382323738
|
||||||
|
2.7227136331111543,-0.030547103790458163
|
||||||
|
3.1415926535897922,0.044327111895927106
|
|
1002
TeX/Plots/Data/matlab_0.csv
Executable file
1002
TeX/Plots/Data/matlab_0.csv
Executable file
File diff suppressed because it is too large
Load Diff
1002
TeX/Plots/Data/matlab_1.csv
Executable file
1002
TeX/Plots/Data/matlab_1.csv
Executable file
File diff suppressed because it is too large
Load Diff
1002
TeX/Plots/Data/matlab_3.csv
Executable file
1002
TeX/Plots/Data/matlab_3.csv
Executable file
File diff suppressed because it is too large
Load Diff
1002
TeX/Plots/Data/matlab_sin_d_01.csv
Executable file
1002
TeX/Plots/Data/matlab_sin_d_01.csv
Executable file
File diff suppressed because it is too large
Load Diff
1002
TeX/Plots/Data/matlab_sin_d_1.csv
Executable file
1002
TeX/Plots/Data/matlab_sin_d_1.csv
Executable file
File diff suppressed because it is too large
Load Diff
1002
TeX/Plots/Data/matlab_sin_d_3.csv
Executable file
1002
TeX/Plots/Data/matlab_sin_d_3.csv
Executable file
File diff suppressed because it is too large
Load Diff
58
TeX/Plots/Data/min_max.txt
Executable file
58
TeX/Plots/Data/min_max.txt
Executable file
@ -0,0 +1,58 @@
|
|||||||
|
datagen_dropout_02_1
|
||||||
|
test
|
||||||
|
0.6604& 0.5175& 0.60136& 0.002348447
|
||||||
|
|
||||||
|
datagen_dropout_00_1
|
||||||
|
test
|
||||||
|
0.6704& 0.4878& 0.58621& 0.003600539
|
||||||
|
|
||||||
|
dropout_02_1
|
||||||
|
test
|
||||||
|
0.5312& 0.4224& 0.47137& 0.001175149
|
||||||
|
|
||||||
|
default_1
|
||||||
|
test
|
||||||
|
0.5633& 0.3230& 0.45702& 0.004021449
|
||||||
|
|
||||||
|
datagen_dropout_02_10
|
||||||
|
test
|
||||||
|
0.9441& 0.9061& 0.92322& 0.00015
|
||||||
|
train
|
||||||
|
1& 0.97& 0.989& 1e-04
|
||||||
|
|
||||||
|
datagen_dropout_00_10
|
||||||
|
test
|
||||||
|
0.931& 0.9018& 0.9185& 6e-05
|
||||||
|
train
|
||||||
|
1& 0.97& 0.99& 0.00013
|
||||||
|
|
||||||
|
dropout_02_10
|
||||||
|
test
|
||||||
|
0.9423& 0.9081& 0.92696& 0.00013
|
||||||
|
train
|
||||||
|
1& 0.99& 0.992& 2e-05
|
||||||
|
|
||||||
|
default_10
|
||||||
|
test
|
||||||
|
0.8585& 0.8148& 0.83771& 0.00027
|
||||||
|
train
|
||||||
|
1& 1& 1& 0
|
||||||
|
|
||||||
|
datagen_dropout_02_100
|
||||||
|
test
|
||||||
|
0.9805& 0.9727& 0.97826& 0
|
||||||
|
train
|
||||||
|
|
||||||
|
datagen_dropout_00_100
|
||||||
|
test
|
||||||
|
0.981& 0.9702& 0.9769& 1e-05
|
||||||
|
train
|
||||||
|
|
||||||
|
dropout_02_100
|
||||||
|
test
|
||||||
|
0.9796& 0.9719& 0.97703& 1e-05
|
||||||
|
train
|
||||||
|
|
||||||
|
default_100
|
||||||
|
test
|
||||||
|
0.9637& 0.9506& 0.95823& 2e-05
|
1202
TeX/Plots/Data/overfit.csv
Normal file
1202
TeX/Plots/Data/overfit.csv
Normal file
File diff suppressed because it is too large
Load Diff
1202
TeX/Plots/Data/overfit_spline.csv
Normal file
1202
TeX/Plots/Data/overfit_spline.csv
Normal file
File diff suppressed because it is too large
Load Diff
101
TeX/Plots/Data/scala_out_d_1_t.csv
Executable file
101
TeX/Plots/Data/scala_out_d_1_t.csv
Executable file
@ -0,0 +1,101 @@
|
|||||||
|
x_n_5000_tl_0.1,y_n_5000_tl_0.1,x_n_5000_tl_1.0,y_n_5000_tl_1.0,x_n_5000_tl_3.0,y_n_5000_tl_3.0
|
||||||
|
-5.0,1.794615305950707,-5.0,0.3982406589003759,-5.0,-0.4811539502118497
|
||||||
|
-4.898989898989899,1.6984389486364895,-4.898989898989899,0.35719218031912614,-4.898989898989899,-0.48887996302459025
|
||||||
|
-4.797979797979798,1.6014200743009022,-4.797979797979798,0.3160182633093358,-4.797979797979798,-0.4966732473871599
|
||||||
|
-4.696969696969697,1.5040575427157106,-4.696969696969697,0.27464978660531225,-4.696969696969697,-0.5045073579233731
|
||||||
|
-4.595959595959596,1.4061194142774731,-4.595959595959596,0.23293440418365288,-4.595959595959596,-0.5123589845230747
|
||||||
|
-4.494949494949495,1.3072651356075136,-4.494949494949495,0.19100397829173557,-4.494949494949495,-0.5202738824510786
|
||||||
|
-4.393939393939394,1.2078259346207492,-4.393939393939394,0.1488314515422353,-4.393939393939394,-0.5282281154332915
|
||||||
|
-4.292929292929293,1.1079271590765678,-4.292929292929293,0.10646618526238515,-4.292929292929293,-0.536250283913464
|
||||||
|
-4.191919191919192,1.0073183089866045,-4.191919191919192,0.0637511521454329,-4.191919191919192,-0.5443068679044686
|
||||||
|
-4.090909090909091,0.9064682044248323,-4.090909090909091,0.020965778107027506,-4.090909090909091,-0.5524049731989601
|
||||||
|
-3.9898989898989896,0.805095064694333,-3.9898989898989896,-0.02200882631350869,-3.9898989898989896,-0.5605562335116703
|
||||||
|
-3.888888888888889,0.7032463151196859,-3.888888888888889,-0.06548644224881082,-3.888888888888889,-0.5687680272492979
|
||||||
|
-3.787878787878788,0.6007843964001714,-3.787878787878788,-0.10914135786185346,-3.787878787878788,-0.5770307386196555
|
||||||
|
-3.686868686868687,0.4978572358270573,-3.686868686868687,-0.15292201515712506,-3.686868686868687,-0.5853131654059709
|
||||||
|
-3.5858585858585856,0.39465522349482535,-3.5858585858585856,-0.19694472820060063,-3.5858585858585856,-0.593636189078738
|
||||||
|
-3.484848484848485,0.29091175104318323,-3.484848484848485,-0.24139115547918963,-3.484848484848485,-0.6019914655156898
|
||||||
|
-3.383838383838384,0.1868284306918275,-3.383838383838384,-0.28617728400089926,-3.383838383838384,-0.6103823599700093
|
||||||
|
-3.282828282828283,0.0817944681090728,-3.282828282828283,-0.33119615483860937,-3.282828282828283,-0.6188088888423856
|
||||||
|
-3.1818181818181817,-0.023670753859105602,-3.1818181818181817,-0.3764480559542342,-3.1818181818181817,-0.6272515625106694
|
||||||
|
-3.080808080808081,-0.1299349094939808,-3.080808080808081,-0.42202262988259276,-3.080808080808081,-0.6357221532633648
|
||||||
|
-2.9797979797979797,-0.2360705715363967,-2.9797979797979797,-0.467584017465408,-2.9797979797979797,-0.6440454918766952
|
||||||
|
-2.878787878787879,-0.34125419448980393,-2.878787878787879,-0.5126079284225549,-2.878787878787879,-0.65203614244987
|
||||||
|
-2.7777777777777777,-0.443504036212927,-2.7777777777777777,-0.5569084060463078,-2.7777777777777777,-0.6594896031012563
|
||||||
|
-2.676767676767677,-0.5411482698953787,-2.676767676767677,-0.6002683604183435,-2.676767676767677,-0.6661215834468585
|
||||||
|
-2.5757575757575757,-0.6363089624800997,-2.5757575757575757,-0.6396725440402657,-2.5757575757575757,-0.6715398637661353
|
||||||
|
-2.474747474747475,-0.725241414197713,-2.474747474747475,-0.6753456416248385,-2.474747474747475,-0.674565545688341
|
||||||
|
-2.3737373737373737,-0.8010191169999671,-2.3737373737373737,-0.7066964605752718,-2.3737373737373737,-0.6765307025278043
|
||||||
|
-2.272727272727273,-0.8626605255789729,-2.272727272727273,-0.7348121862404637,-2.272727272727273,-0.6766187567521622
|
||||||
|
-2.1717171717171717,-0.911435840482434,-2.1717171717171717,-0.7592451818361001,-2.1717171717171717,-0.6747200340049733
|
||||||
|
-2.070707070707071,-0.9518228090965052,-2.070707070707071,-0.7755022118880182,-2.070707070707071,-0.6711535886166349
|
||||||
|
-1.9696969696969697,-0.9791642715505677,-1.9696969696969697,-0.7889078495544403,-1.9696969696969697,-0.6653309071624213
|
||||||
|
-1.868686868686869,-0.9959505678135467,-1.868686868686869,-0.7978655263590677,-1.868686868686869,-0.6574048849245917
|
||||||
|
-1.7676767676767677,-1.0042572630521163,-1.7676767676767677,-0.8024926242661324,-1.7676767676767677,-0.6465258005011485
|
||||||
|
-1.6666666666666665,-1.0031374573437621,-1.6666666666666665,-0.8024786300118695,-1.6666666666666665,-0.6326231142587367
|
||||||
|
-1.5656565656565657,-0.9924082586558415,-1.5656565656565657,-0.7967021619463882,-1.5656565656565657,-0.6166476676023103
|
||||||
|
-1.4646464646464645,-0.9734669180157094,-1.4646464646464645,-0.7849942222838879,-1.4646464646464645,-0.5979735104135664
|
||||||
|
-1.3636363636363638,-0.9509454078185711,-1.3636363636363638,-0.7662349774950723,-1.3636363636363638,-0.5774876452737464
|
||||||
|
-1.2626262626262625,-0.9231872651397443,-1.2626262626262625,-0.7433085627087517,-1.2626262626262625,-0.554712230754877
|
||||||
|
-1.1616161616161618,-0.8903321986477033,-1.1616161616161618,-0.7150493507052204,-1.1616161616161618,-0.5295933185437713
|
||||||
|
-1.0606060606060606,-0.8533989447900909,-1.0606060606060606,-0.6814643745239313,-1.0606060606060606,-0.5021785239088743
|
||||||
|
-0.9595959595959593,-0.8107636317978494,-0.9595959595959593,-0.6421615608115637,-0.9595959595959593,-0.472606158673678
|
||||||
|
-0.858585858585859,-0.7612745578549842,-0.858585858585859,-0.5973114244123007,-0.858585858585859,-0.4405007246413654
|
||||||
|
-0.7575757575757578,-0.7079734098301842,-0.7575757575757578,-0.5483264663676062,-0.7575757575757578,-0.4059991890198415
|
||||||
|
-0.6565656565656566,-0.6488963804386183,-0.6565656565656566,-0.49554278063844803,-0.6565656565656566,-0.3695525928005769
|
||||||
|
-0.5555555555555554,-0.5859222961089965,-0.5555555555555554,-0.4403758682478846,-0.5555555555555554,-0.33111757514282614
|
||||||
|
-0.45454545454545503,-0.5162955936688821,-0.45454545454545503,-0.38037108381900747,-0.45454545454545503,-0.28897806883385513
|
||||||
|
-0.3535353535353538,-0.4413321076045784,-0.3535353535353538,-0.31690399361617216,-0.3535353535353538,-0.24421776219711205
|
||||||
|
-0.2525252525252526,-0.3616414699818406,-0.2525252525252526,-0.25204481791119354,-0.2525252525252526,-0.19795939679257332
|
||||||
|
-0.15151515151515138,-0.2780916794094584,-0.15151515151515138,-0.18575713332565263,-0.15151515151515138,-0.15066195015784248
|
||||||
|
-0.050505050505050164,-0.18977454284683343,-0.050505050505050164,-0.11797643773197505,-0.050505050505050164,-0.10274021898431054
|
||||||
|
0.050505050505050164,-0.0969321739577506,0.050505050505050164,-0.049351343645831554,0.050505050505050164,-0.05414525935109969
|
||||||
|
0.15151515151515138,-4.4802289442360816E-4,0.15151515151515138,0.019464788799119597,0.15151515151515138,-0.005354051541524688
|
||||||
|
0.2525252525252526,0.09918485823776255,0.2525252525252526,0.08804193897553166,0.2525252525252526,0.0433816826222638
|
||||||
|
0.3535353535353538,0.1998735386668185,0.3535353535353538,0.15569793996298523,0.3535353535353538,0.09176342956997338
|
||||||
|
0.45454545454545414,0.2999169047201809,0.45454545454545414,0.2218157527002848,0.45454545454545414,0.13952481930457306
|
||||||
|
0.5555555555555554,0.3978204122760816,0.5555555555555554,0.2846069052305317,0.5555555555555554,0.18668380673527113
|
||||||
|
0.6565656565656566,0.49120659266814587,0.6565656565656566,0.34467300454040606,0.6565656565656566,0.23277011860523958
|
||||||
|
0.7575757575757578,0.5777980409414698,0.7575757575757578,0.40208229496894643,0.7575757575757578,0.27613740421328176
|
||||||
|
0.8585858585858581,0.6568213676446025,0.8585858585858581,0.45705882493784666,0.8585858585858581,0.316305372116494
|
||||||
|
0.9595959595959593,0.7305067401293432,0.9595959595959593,0.5066458373898202,0.9595959595959593,0.35343427932594923
|
||||||
|
1.0606060606060606,0.7966609096765547,1.0606060606060606,0.5516149744358979,1.0606060606060606,0.38717949746647334
|
||||||
|
1.1616161616161618,0.8521200140106753,1.1616161616161618,0.5878017101641295,1.1616161616161618,0.4170777567516486
|
||||||
|
1.262626262626262,0.8975259277901253,1.262626262626262,0.6168588441570951,1.262626262626262,0.4446516626376453
|
||||||
|
1.3636363636363633,0.9290861930067627,1.3636363636363633,0.6411836178298306,1.3636363636363633,0.46927636759559477
|
||||||
|
1.4646464646464645,0.9508521659740165,1.4646464646464645,0.6610795923876176,1.4646464646464645,0.4901812911280025
|
||||||
|
1.5656565656565657,0.9612143570080512,1.5656565656565657,0.6768219209716341,1.5656565656565657,0.5079918402617868
|
||||||
|
1.666666666666667,0.9590141254017294,1.666666666666667,0.6878304863477654,1.666666666666667,0.5233400296358803
|
||||||
|
1.7676767676767673,0.9434050911299104,1.7676767676767673,0.6925040592034013,1.7676767676767673,0.5351552186913862
|
||||||
|
1.8686868686868685,0.9166484175947194,1.8686868686868685,0.6900246131027935,1.8686868686868685,0.5441567759439713
|
||||||
|
1.9696969696969697,0.8762489440965586,1.9696969696969697,0.6764843940414706,1.9696969696969697,0.5496025817549586
|
||||||
|
2.070707070707071,0.821609113516158,2.070707070707071,0.6566284893291617,2.070707070707071,0.5536820874974513
|
||||||
|
2.1717171717171713,0.7581599898835192,2.1717171717171713,0.6308981649064993,2.1717171717171713,0.5533100035360206
|
||||||
|
2.2727272727272725,0.6877704486402438,2.2727272727272725,0.6016976467409065,2.2727272727272725,0.550251787575325
|
||||||
|
2.3737373737373737,0.610815603287697,2.3737373737373737,0.5704721438286479,2.3737373737373737,0.5445865851994449
|
||||||
|
2.474747474747475,0.5275282181728166,2.474747474747475,0.5362814307290142,2.474747474747475,0.537858723684707
|
||||||
|
2.5757575757575752,0.44098299617705367,2.5757575757575752,0.5007018478259194,2.5757575757575752,0.5301810557083476
|
||||||
|
2.6767676767676765,0.3535127269572474,2.6767676767676765,0.4635791072799046,2.6767676767676765,0.5214280506499815
|
||||||
|
2.7777777777777777,0.2669314340184933,2.7777777777777777,0.4252681214470508,2.7777777777777777,0.5119428002841875
|
||||||
|
2.878787878787879,0.18244774892195767,2.878787878787879,0.3860805361925665,2.878787878787879,0.5020280103571171
|
||||||
|
2.9797979797979792,0.10009287374461422,2.9797979797979792,0.34649978327862213,2.9797979797979792,0.4918997465440798
|
||||||
|
3.0808080808080813,0.01825358803182036,3.0808080808080813,0.3067456416075246,3.0808080808080813,0.48152164248236273
|
||||||
|
3.1818181818181817,-0.06257603867024951,3.1818181818181817,0.2670556605010131,3.1818181818181817,0.4710506406469346
|
||||||
|
3.282828282828282,-0.14256250037038515,3.282828282828282,0.22747478740583862,3.282828282828282,0.46061400021772264
|
||||||
|
3.383838383838384,-0.22183964093761221,3.383838383838384,0.18823442296238005,3.383838383838384,0.4502063176185161
|
||||||
|
3.4848484848484844,-0.3000530710681483,3.4848484848484844,0.14930923451816047,3.4848484848484844,0.43983195563012295
|
||||||
|
3.5858585858585865,-0.37715837046834677,3.5858585858585865,0.11064727810620513,3.5858585858585865,0.4294855408707603
|
||||||
|
3.686868686868687,-0.4535879015098929,3.686868686868687,0.0721761317620166,3.686868686868687,0.41918651120808587
|
||||||
|
3.787878787878787,-0.5295958753874862,3.787878787878787,0.03385158496402993,3.787878787878787,0.4089211108732785
|
||||||
|
3.8888888888888893,-0.605341954214415,3.8888888888888893,-0.004196426105451837,3.8888888888888893,0.3986849690078671
|
||||||
|
3.9898989898989896,-0.6805725256650321,3.9898989898989896,-0.04204424507819378,3.9898989898989896,0.3884698016669201
|
||||||
|
4.09090909090909,-0.7553382625080638,4.09090909090909,-0.0795288839270637,4.09090909090909,0.37826736472008937
|
||||||
|
4.191919191919192,-0.8294318073700058,4.191919191919192,-0.11675718948094181,4.191919191919192,0.36808861016948324
|
||||||
|
4.292929292929292,-0.9025671571505313,4.292929292929292,-0.15379169226972225,4.292929292929292,0.3579396881040081
|
||||||
|
4.3939393939393945,-0.9751233932017581,4.3939393939393945,-0.19069301489402432,4.3939393939393945,0.3478279422102407
|
||||||
|
4.494949494949495,-1.0471623188798242,4.494949494949495,-0.227426975503073,4.494949494949495,0.3377388026398381
|
||||||
|
4.595959595959595,-1.1187532876284094,4.595959595959595,-0.263878605240927,4.595959595959595,0.32767338817749475
|
||||||
|
4.696969696969697,-1.189660915888889,4.696969696969697,-0.3001960056492053,4.696969696969697,0.3176530967513947
|
||||||
|
4.797979797979798,-1.2601246569645388,4.797979797979798,-0.3363281464377301,4.797979797979798,0.3076778013243957
|
||||||
|
4.8989898989899,-1.3303637186847002,4.8989898989899,-0.37225330321499334,4.8989898989899,0.29772768053304777
|
||||||
|
5.0,-1.4004134094571867,5.0,-0.4080316669473787,5.0,0.2878184725593889
|
|
101
TeX/Plots/Data/scala_out_sin.csv
Executable file
101
TeX/Plots/Data/scala_out_sin.csv
Executable file
@ -0,0 +1,101 @@
|
|||||||
|
x_n_50_tl_0.0,y_n_50_tl_0.0,x_n_500_tl_0.0,y_n_500_tl_0.0,x_n_5000_tl_0.0,y_n_5000_tl_0.0,x_n_50_tl_1.0,y_n_50_tl_1.0,x_n_500_tl_1.0,y_n_500_tl_1.0,x_n_5000_tl_1.0,y_n_5000_tl_1.0,x_n_50_tl_3.0,y_n_50_tl_3.0,x_n_500_tl_3.0,y_n_500_tl_3.0,x_n_5000_tl_3.0,y_n_5000_tl_3.0
|
||||||
|
-5.0,-0.8599583057554976,-5.0,1.6797068787192495,-5.0,1.7379689606223239,-5.0,-0.42741272499487776,-5.0,0.23661838590976328,-5.0,0.20399386816229978,-5.0,0.13095951218866275,-5.0,-0.46242184829078237,-5.0,-0.41058629664051305
|
||||||
|
-4.898989898989899,-0.8456047840536887,-4.898989898989899,1.5940442438460278,-4.898989898989899,1.6472202329485999,-4.898989898989899,-0.4276431031893983,-4.898989898989899,0.20862681459226723,-4.898989898989899,0.17824071850107404,-4.898989898989899,0.10539057470765349,-4.898989898989899,-0.4609018322257037,-4.898989898989899,-0.4110599614729015
|
||||||
|
-4.797979797979798,-0.8312512623518801,-4.797979797979798,1.5066655952530659,-4.797979797979798,1.5560370024912986,-4.797979797979798,-0.42787348138391906,-4.797979797979798,0.18056404254218186,-4.797979797979798,0.1523309553054011,-4.797979797979798,0.07982163722664384,-4.797979797979798,-0.4593800781031771,-4.797979797979798,-0.41155161184122596
|
||||||
|
-4.696969696969697,-0.8168977406500709,-4.696969696969697,1.4192486056640365,-4.696969696969697,1.4641612521550218,-4.696969696969697,-0.42810385957843955,-4.696969696969697,0.1524990189306639,-4.696969696969697,0.1262143553005724,-4.696969696969697,0.05464380509332076,-4.696969696969697,-0.4578583174084625,-4.696969696969697,-0.41205688060740875
|
||||||
|
-4.595959595959596,-0.8025442189482614,-4.595959595959596,1.3308076153149195,-4.595959595959596,1.3718747642404912,-4.595959595959596,-0.42833423777296026,-4.595959595959596,0.12443399531914556,-4.595959595959596,0.10000299804643913,-4.595959595959596,0.029720704709016,-4.595959595959596,-0.45633655338498746,-4.595959595959596,-0.4126005212950324
|
||||||
|
-4.494949494949495,-0.788190697246453,-4.494949494949495,1.2408764237610932,-4.494949494949495,1.2794547935729972,-4.494949494949495,-0.42856461596748074,-4.494949494949495,0.09628036393480953,-4.494949494949495,0.07370213597938947,-4.494949494949495,0.004797604324711557,-4.494949494949495,-0.45481454100468904,-4.494949494949495,-0.41317280828652125
|
||||||
|
-4.393939393939394,-0.7757194193374484,-4.393939393939394,1.150777108936673,-4.393939393939394,1.1865984175078124,-4.393939393939394,-0.4287949941620015,-4.393939393939394,0.06803799087458409,-4.393939393939394,0.047353868838267546,-4.393939393939394,-0.019952866294811474,-4.393939393939394,-0.4532902682540511,-4.393939393939394,-0.41378088791316736
|
||||||
|
-4.292929292929293,-0.7635428572249876,-4.292929292929293,1.0606777941122512,-4.292929292929293,1.0935156155193826,-4.292929292929293,-0.42902537235652216,-4.292929292929293,0.039745189354681264,-4.292929292929293,0.020863777423783696,-4.292929292929293,-0.04424719286600705,-4.292929292929293,-0.45176167641583376,-4.292929292929293,-0.41441903123033147
|
||||||
|
-4.191919191919192,-0.7514991436388702,-4.191919191919192,0.9705784792878309,-4.191919191919192,0.9999451479756023,-4.191919191919192,-0.42925575055104276,-4.191919191919192,0.01144626171509771,-4.191919191919192,-0.005903721047402898,-4.191919191919192,-0.06854151943720274,-4.191919191919192,-0.4502329821869361,-4.191919191919192,-0.415076548381381
|
||||||
|
-4.090909090909091,-0.7396941691045894,-4.090909090909091,0.8798554638230421,-4.090909090909091,0.9059203084364202,-4.090909090909091,-0.42948612874556336,-4.090909090909091,-0.016952280979816926,-4.090909090909091,-0.03298925765732338,-4.090909090909091,-0.09283584600839848,-4.090909090909091,-0.44869972853751156,-4.090909090909091,-0.4157629995846106
|
||||||
|
-3.9898989898989896,-0.7279252765177078,-3.9898989898989896,0.7884244803113447,-3.9898989898989896,0.811474387051809,-3.9898989898989896,-0.42971650694008423,-3.9898989898989896,-0.04548036359257723,-3.9898989898989896,-0.06017986522111469,-3.9898989898989896,-0.11713017257959416,-3.9898989898989896,-0.44715472797022665,-3.9898989898989896,-0.41647096691012625
|
||||||
|
-3.888888888888889,-0.7161580919866168,-3.888888888888889,0.6966140451148786,-3.888888888888889,0.7168906385054419,-3.888888888888889,-0.4299468851346048,-3.888888888888889,-0.07408610945271141,-3.888888888888889,-0.0874709084540591,-3.888888888888889,-0.14142449915078953,-3.888888888888889,-0.4456015995456161,-3.888888888888889,-0.4171930364234525
|
||||||
|
-3.787878787878788,-0.7043909074555256,-3.787878787878788,0.604803249010758,-3.787878787878788,0.6219712537736367,-3.787878787878788,-0.4301772633291252,-3.787878787878788,-0.10285723661640957,-3.787878787878788,-0.11503695886523099,-3.787878787878788,-0.16571882572198493,-3.787878787878788,-0.4440477592686527,-3.787878787878788,-0.41792735866227004
|
||||||
|
-3.686868686868687,-0.6926237229244344,-3.686868686868687,0.512070766385858,-3.686868686868687,0.5265347560169878,-3.686868686868687,-0.4304076415236461,-3.686868686868687,-0.13176620357773466,-3.686868686868687,-0.1429497539600965,-3.686868686868687,-0.19001315229318066,-3.686868686868687,-0.44249216926013074,-3.686868686868687,-0.4186788950692494
|
||||||
|
-3.5858585858585856,-0.680856538393343,-3.5858585858585856,0.418341406261733,-3.5858585858585856,0.43037422799158725,-3.5858585858585856,-0.43063801971816673,-3.5858585858585856,-0.16072772857488207,-3.5858585858585856,-0.17103810603915154,-3.5858585858585856,-0.21430747886437626,-3.5858585858585856,-0.44093657925160834,-3.5858585858585856,-0.41944890491602094
|
||||||
|
-3.484848484848485,-0.6690893538622519,-3.484848484848485,0.3230008626762439,-3.484848484848485,0.33347359833985296,-3.484848484848485,-0.43086839791268744,-3.484848484848485,-0.189786562504877,-3.484848484848485,-0.1992640699299042,-3.484848484848485,-0.238601805435572,-3.484848484848485,-0.4393809892430859,-3.484848484848485,-0.4202525693559286
|
||||||
|
-3.383838383838384,-0.6573221693311603,-3.383838383838384,0.22755806300474243,-3.383838383838384,0.23599152727957395,-3.383838383838384,-0.4310987761072079,-3.383838383838384,-0.21885301172451227,-3.383838383838384,-0.22770533404467666,-3.383838383838384,-0.2628961320067672,-3.383838383838384,-0.43781693796746485,-3.383838383838384,-0.4210766722370822
|
||||||
|
-3.282828282828283,-0.6455549848000697,-3.282828282828283,0.13172938749299176,-3.282828282828283,0.13785071540835,-3.282828282828283,-0.4313291543017285,-3.282828282828283,-0.24792012144222308,-3.282828282828283,-0.25633384693349226,-3.282828282828283,-0.28719045857796294,-3.282828282828283,-0.4362515901030497,-3.282828282828283,-0.42192705020460003
|
||||||
|
-3.1818181818181817,-0.6337878002689783,-3.1818181818181817,0.03583960513370717,-3.1818181818181817,0.03926297085619488,-3.1818181818181817,-0.43155953249624923,-3.1818181818181817,-0.2770868438988566,-3.1818181818181817,-0.28512064843139634,-3.1818181818181817,-0.3114847851491585,-3.1818181818181817,-0.4346861097486259,-3.1818181818181817,-0.42279043662854426
|
||||||
|
-3.080808080808081,-0.6219933944673289,-3.080808080808081,-0.06005017722557655,-3.080808080808081,-0.05953650043486377,-3.080808080808081,-0.4317899106907698,-3.080808080808081,-0.30634202732953336,-3.080808080808081,-0.3140197227479732,-3.080808080808081,-0.33577911172035446,-3.080808080808081,-0.4331124443470669,-3.080808080808081,-0.42366980349780375
|
||||||
|
-2.9797979797979797,-0.6084802589111126,-2.9797979797979797,-0.15590935392992944,-2.9797979797979797,-0.15810366579897028,-2.9797979797979797,-0.4320202888852905,-2.9797979797979797,-0.33549678779642544,-2.9797979797979797,-0.3430021282671825,-2.9797979797979797,-0.3600734382915496,-2.9797979797979797,-0.4315218307109141,-2.9797979797979797,-0.42449207343700956
|
||||||
|
-2.878787878787879,-0.5891232690738096,-2.878787878787879,-0.24713180817765498,-2.878787878787879,-0.2552003497036097,-2.878787878787879,-0.43225066707981114,-2.878787878787879,-0.36352866123332933,-2.878787878787879,-0.3716002292573769,-2.878787878787879,-0.38436776486274526,-2.878787878787879,-0.42982012082652077,-2.878787878787879,-0.4251380414134998
|
||||||
|
-2.7777777777777777,-0.5636588831509095,-2.7777777777777777,-0.33701300990207655,-2.7777777777777777,-0.35066910453142525,-2.7777777777777777,-0.4324810452743318,-2.7777777777777777,-0.3911342117000581,-2.7777777777777777,-0.39951657101606874,-2.7777777777777777,-0.4086620914339411,-2.7777777777777777,-0.42794280685642583,-2.7777777777777777,-0.4254095546530059
|
||||||
|
-2.676767676767677,-0.538194497228009,-2.676767676767677,-0.4265304961947721,-2.676767676767677,-0.4419057912445846,-2.676767676767677,-0.4295143886441945,-2.676767676767677,-0.41758811768544335,-2.676767676767677,-0.4264377612958712,-2.676767676767677,-0.4329564180051365,-2.676767676767677,-0.4251801800597513,-2.676767676767677,-0.42514350551302893
|
||||||
|
-2.5757575757575757,-0.5127301113051083,-2.5757575757575757,-0.5160338868263108,-2.5757575757575757,-0.530562896182845,-2.5757575757575757,-0.4209813938653777,-2.5757575757575757,-0.4421888684751682,-2.5757575757575757,-0.4521958194404763,-2.5757575757575757,-0.4572507445763323,-2.5757575757575757,-0.4220835438175992,-2.5757575757575757,-0.42424941235712643
|
||||||
|
-2.474747474747475,-0.48726572538220836,-2.474747474747475,-0.6045443334592155,-2.474747474747475,-0.615529859161848,-2.474747474747475,-0.4124483990865609,-2.474747474747475,-0.4657884717671948,-2.474747474747475,-0.4762840194362591,-2.474747474747475,-0.480179747245649,-2.474747474747475,-0.4184871960008546,-2.474747474747475,-0.4227211360179997
|
||||||
|
-2.3737373737373737,-0.4618013394593081,-2.3737373737373737,-0.6866461198443653,-2.3737373737373737,-0.6916556206405179,-2.3737373737373737,-0.4039154043077441,-2.3737373737373737,-0.4872175481179362,-2.3737373737373737,-0.49664688375599,-2.3737373737373737,-0.5021327343044837,-2.3737373737373737,-0.4148617786025484,-2.3737373737373737,-0.42058969704823307
|
||||||
|
-2.272727272727273,-0.4363369535364072,-2.272727272727273,-0.7664221699283893,-2.272727272727273,-0.76211944205629,-2.272727272727273,-0.3953824095289272,-2.272727272727273,-0.5066515567337302,-2.272727272727273,-0.5156479697413601,-2.272727272727273,-0.5240857213633179,-2.272727272727273,-0.4101489198915738,-2.272727272727273,-0.41773244666508813
|
||||||
|
-2.1717171717171717,-0.41087256761350716,-2.1717171717171717,-0.8294863656303931,-2.1717171717171717,-0.8275864122047706,-2.1717171717171717,-0.38684941475011053,-2.1717171717171717,-0.5248642081767847,-2.1717171717171717,-0.5320776321494358,-2.1717171717171717,-0.5460387084221523,-2.1717171717171717,-0.40386935734460455,-2.1717171717171717,-0.41386532161191136
|
||||||
|
-2.070707070707071,-0.38540818169060687,-2.070707070707071,-0.8777818560548117,-2.070707070707071,-0.8828614286116081,-2.070707070707071,-0.37790597680581006,-2.070707070707071,-0.5419305295559403,-2.070707070707071,-0.5450192204063132,-2.070707070707071,-0.5535021346303699,-2.070707070707071,-0.3970390682426877,-2.070707070707071,-0.40816135821642785
|
||||||
|
-1.9696969696969697,-0.3599437957677064,-1.9696969696969697,-0.9240065596308831,-1.9696969696969697,-0.9252381701217932,-1.9696969696969697,-0.3679210297690768,-1.9696969696969697,-0.5515520831674893,-1.9696969696969697,-0.5532507694312989,-1.9696969696969697,-0.5395642887779512,-1.9696969696969697,-0.3899536977126602,-1.9696969696969697,-0.4010221140801823
|
||||||
|
-1.868686868686869,-0.3344794098448062,-1.868686868686869,-0.9642081153190732,-1.868686868686869,-0.9553319880266173,-1.868686868686869,-0.3579360827323437,-1.868686868686869,-0.5596849243269256,-1.868686868686869,-0.556146459781286,-1.868686868686869,-0.5226399861377664,-1.868686868686869,-0.38238093755017905,-1.868686868686869,-0.3924834151653046
|
||||||
|
-1.7676767676767677,-0.3090150239219054,-1.7676767676767677,-1.0007396420666628,-1.7676767676767677,-0.9785388909278812,-1.7676767676767677,-0.34795113569561026,-1.7676767676767677,-0.5614467949548656,-1.7676767676767677,-0.556098671354368,-1.7676767676767677,-0.4982759643499402,-1.7676767676767677,-0.37323932215085087,-1.7676767676767677,-0.3822790688909727
|
||||||
|
-1.6666666666666665,-0.2835506379990052,-1.6666666666666665,-1.0187333297343348,-1.6666666666666665,-0.990642179129256,-1.6666666666666665,-0.3378404050890797,-1.6666666666666665,-0.5581030917440444,-1.6666666666666665,-0.5516597526410076,-1.6666666666666665,-0.47067804898067184,-1.6666666666666665,-0.3614402633008814,-1.6666666666666665,-0.37030436851426224
|
||||||
|
-1.5656565656565657,-0.2580862520761052,-1.5656565656565657,-1.0247628857811257,-1.5656565656565657,-0.9908786897501635,-1.5656565656565657,-0.32764529263529574,-1.5656565656565657,-0.5521110428952534,-1.5656565656565657,-0.543054168961121,-1.5656565656565657,-0.44308013361140386,-1.5656565656565657,-0.34868249075072216,-1.5656565656565657,-0.35699361568660476
|
||||||
|
-1.4646464646464645,-0.2326218661532044,-1.4646464646464645,-1.0034906902849632,-1.4646464646464645,-0.9791175953628313,-1.4646464646464645,-0.3174501801815117,-1.4646464646464645,-0.5459322825614802,-1.4646464646464645,-0.5306579767422843,-1.4646464646464645,-0.41548221824213516,-1.4646464646464645,-0.3311832422822113,-1.4646464646464645,-0.3422960409489238
|
||||||
|
-1.3636363636363638,-0.20715748023030392,-1.3636363636363638,-0.9673348570651019,-1.3636363636363638,-0.9595107779813504,-1.3636363636363638,-0.30725506772772765,-1.3636363636363638,-0.5358046337748493,-1.3636363636363638,-0.5149935986561597,-1.3636363636363638,-0.3878843028728669,-1.3636363636363638,-0.3132121589299601,-1.3636363636363638,-0.32640862478895577
|
||||||
|
-1.2626262626262625,-0.1816930943074038,-1.2626262626262625,-0.9225014127525308,-1.2626262626262625,-0.9337929369785798,-1.2626262626262625,-0.29705995527394363,-1.2626262626262625,-0.5219865374295057,-1.2626262626262625,-0.49551878203869837,-1.2626262626262625,-0.3602863875035988,-1.2626262626262625,-0.2946441284959401,-1.2626262626262625,-0.3093875165551468
|
||||||
|
-1.1616161616161618,-0.15622870838450328,-1.1616161616161618,-0.8751043056611054,-1.1616161616161618,-0.8989581380947891,-1.1616161616161618,-0.2868560938657385,-1.1616161616161618,-0.5034750880272445,-1.1616161616161618,-0.47203943335323734,-1.1616161616161618,-0.33268847213433056,-1.1616161616161618,-0.274883632364574,-1.1616161616161618,-0.290930041718859
|
||||||
|
-1.0606060606060606,-0.13076432246160322,-1.0606060606060606,-0.821606899074672,-1.0606060606060606,-0.8584249497008333,-1.0606060606060606,-0.27660353819390815,-1.0606060606060606,-0.48270847299437897,-1.0606060606060606,-0.44464074915622404,-1.0606060606060606,-0.3050905567650622,-1.0606060606060606,-0.25396600066040825,-1.0606060606060606,-0.27118022111102713
|
||||||
|
-0.9595959595959593,-0.1052999365387022,-0.9595959595959593,-0.7640740662013277,-0.9595959595959593,-0.8091349495541134,-0.9595959595959593,-0.2663509825220778,-0.9595959595959593,-0.4531496187924299,-0.9595959595959593,-0.4131252245857649,-0.9595959595959593,-0.2774926413957938,-0.9595959595959593,-0.2325608605277687,-0.9595959595959593,-0.24999263682664583
|
||||||
|
-0.858585858585859,-0.07983555061580246,-0.858585858585859,-0.6997648036121712,-0.858585858585859,-0.7481101580520273,-0.858585858585859,-0.24945014324598108,-0.858585858585859,-0.4128551081137216,-0.858585858585859,-0.3783375004573455,-0.858585858585859,-0.24988890615957382,-0.858585858585859,-0.20970608424200354,-0.858585858585859,-0.22760758480332924
|
||||||
|
-0.7575757575757578,-0.054371164692902076,-0.7575757575757578,-0.6349094271338603,-0.7575757575757578,-0.6820384544330558,-0.7575757575757578,-0.22976061598357173,-0.7575757575757578,-0.37194755761368214,-0.7575757575757578,-0.34125536540984164,-0.7575757575757578,-0.22211577202959193,-0.7575757575757578,-0.18612295967753525,-0.7575757575757578,-0.20435972492122192
|
||||||
|
-0.6565656565656566,-0.028906778770001355,-0.6565656565656566,-0.5675463340257147,-0.6565656565656566,-0.6095055279444694,-0.6565656565656566,-0.21007108872116223,-0.6565656565656566,-0.33089771921954814,-0.6565656565656566,-0.3018873155488892,-0.6565656565656566,-0.193901705770251,-0.6565656565656566,-0.16215648653127196,-0.6565656565656566,-0.17931671250996567
|
||||||
|
-0.5555555555555554,-0.003442392847101086,-0.5555555555555554,-0.4979737843441253,-0.5555555555555554,-0.5294156894319434,-0.5555555555555554,-0.17756203711819088,-0.5555555555555554,-0.28543993548509355,-0.5555555555555554,-0.26041062451302716,-0.5555555555555554,-0.1652647608815763,-0.5555555555555554,-0.13697108727984195,-0.5555555555555554,-0.15330854213602407
|
||||||
|
-0.45454545454545503,0.022021993075799252,-0.45454545454545503,-0.41446378537016554,-0.45454545454545503,-0.44063136513918405,-0.45454545454545503,-0.14370193132078618,-0.45454545454545503,-0.2395445410097954,-0.45454545454545503,-0.21652789115320525,-0.45454545454545503,-0.13529651419425484,-0.45454545454545503,-0.11162353028803523,-0.45454545454545503,-0.12623393965312618
|
||||||
|
-0.3535353535353538,0.047486378998699605,-0.3535353535353538,-0.32279891003383887,-0.3535353535353538,-0.3477046435373429,-0.3535353535353538,-0.10934683153775412,-0.3535353535353538,-0.19101529776271153,-0.3535353535353538,-0.17035416577174828,-0.3535353535353538,-0.10509845793132169,-0.3535353535353538,-0.08626013443382194,-0.3535353535353538,-0.0984136402387288
|
||||||
|
-0.2525252525252526,0.07295076492159988,-0.2525252525252526,-0.2310925448666578,-0.2525252525252526,-0.25069145628093464,-0.2525252525252526,-0.07491795886312486,-0.2525252525252526,-0.14150481827496786,-0.2525252525252526,-0.12255925867115473,-0.2525252525252526,-0.07490040166838845,-0.2525252525252526,-0.060434579838324495,-0.2525252525252526,-0.07006332009798681
|
||||||
|
-0.15151515151515138,0.09843047923373265,-0.15151515151515138,-0.13636354870852932,-0.15151515151515138,-0.15095910699954188,-0.15151515151515138,-0.040306119685216676,-0.15151515151515138,-0.08982558834407159,-0.15151515151515138,-0.07398207558396772,-0.15151515151515138,-0.044702345405455264,-0.15151515151515138,-0.033631412543263274,-0.15151515151515138,-0.04141233375856603
|
||||||
|
-0.050505050505050164,0.12391212075429944,-0.050505050505050164,-0.03941345742250633,-0.050505050505050164,-0.04947445191778734,-0.050505050505050164,-0.005694280507308445,-0.050505050505050164,-0.03797674651308919,-0.050505050505050164,-0.025080464074353173,-0.050505050505050164,-0.014504289142522105,-0.050505050505050164,-0.006446181090338347,-0.050505050505050164,-0.012381418678247798
|
||||||
|
0.050505050505050164,0.14939376227486617,0.050505050505050164,0.056551574802519614,0.050505050505050164,0.0525838784102356,0.050505050505050164,0.028911158365061536,0.050505050505050164,0.013973891774473416,0.050505050505050164,0.023794553267499748,0.050505050505050164,0.01583885016218507,0.050505050505050164,0.021038028372213642,0.050505050505050164,0.016846741994686543
|
||||||
|
0.15151515151515138,0.17487540379543332,0.15151515151515138,0.15017264202689645,0.15151515151515138,0.15408973105493792,0.15151515151515138,0.062183868537649845,0.15151515151515138,0.06589471730593952,0.15151515151515138,0.07245763138776953,0.15151515151515138,0.046508129166361926,0.15151515151515138,0.04842915541973139,0.15151515151515138,0.04601083462340586
|
||||||
|
0.2525252525252526,0.2003570453160002,0.2525252525252526,0.24151055338001104,0.2525252525252526,0.2530277286116801,0.2525252525252526,0.09533027991528796,0.2525252525252526,0.11633887943820748,0.2525252525252526,0.11992049316059605,0.2525252525252526,0.07717740817053882,0.2525252525252526,0.07538338916654858,0.2525252525252526,0.07493657104851133
|
||||||
|
0.3535353535353538,0.22583868683656727,0.3535353535353538,0.3245702345293225,0.3535353535353538,0.3487077570947679,0.3535353535353538,0.12847669129292608,0.3535353535353538,0.1661606781018032,0.3535353535353538,0.16626942811591283,0.3535353535353538,0.10784668717471575,0.3535353535353538,0.10205978943459323,0.3535353535353538,0.10356289911566637
|
||||||
|
0.45454545454545414,0.25132032835713397,0.45454545454545414,0.4042440047834261,0.45454545454545414,0.4412637068427958,0.45454545454545414,0.15705349698246504,0.45454545454545414,0.21489012650224273,0.45454545454545414,0.21055873443432177,0.45454545454545414,0.1385159661788923,0.45454545454545414,0.12849799626750344,0.45454545454545414,0.13171638145035697
|
||||||
|
0.5555555555555554,0.2768019698777009,0.5555555555555554,0.48386343064481413,0.5555555555555554,0.5292644209820558,0.5555555555555554,0.1822941322301175,0.5555555555555554,0.26332131026810235,0.5555555555555554,0.25282542280637477,0.5555555555555554,0.16918524518306918,0.5555555555555554,0.1537986605041808,0.5555555555555554,0.15885558014342485
|
||||||
|
0.6565656565656566,0.30228361139826787,0.6565656565656566,0.5582703975525269,0.6565656565656566,0.6095279265110211,0.6565656565656566,0.20753476747777022,0.6565656565656566,0.311752494033962,0.6565656565656566,0.2926908500466596,0.6565656565656566,0.1998545241872461,0.6565656565656566,0.17827793057103108,0.6565656565656566,0.18425901109338033
|
||||||
|
0.7575757575757578,0.3276630675001063,0.7575757575757578,0.6240165672599972,0.7575757575757578,0.6817170975194252,0.7575757575757578,0.23277540272542308,0.7575757575757578,0.35585725421977105,0.7575757575757578,0.32990973382338223,0.7575757575757578,0.23052380319142296,0.7575757575757578,0.19993717433313357,0.7575757575757578,0.20856541522380753
|
||||||
|
0.8585858585858581,0.35294340046326517,0.8585858585858581,0.6832251591090945,0.8585858585858581,0.7490023509530548,0.8585858585858581,0.2580160379730755,0.8585858585858581,0.39411574874383437,0.8585858585858581,0.3646086605463153,0.8585858585858581,0.2611930821955996,0.8585858585858581,0.21959645347898898,0.8585858585858581,0.2319021251050189
|
||||||
|
0.9595959595959593,0.3782237334264241,0.9595959595959593,0.7379264665053952,0.9595959595959593,0.8101967957597399,0.9595959595959593,0.28325667322072823,0.9595959595959593,0.4295026011065611,0.9595959595959593,0.39755496473819213,0.9595959595959593,0.2918623611997765,0.9595959595959593,0.23923560012200779,0.9595959595959593,0.25414437767202697
|
||||||
|
1.0606060606060606,0.40350406638958297,1.0606060606060606,0.7921580999576039,1.0606060606060606,0.865038072851208,1.0606060606060606,0.3013557830052828,1.0606060606060606,0.4622992830762259,1.0606060606060606,0.4264742505103137,1.0606060606060606,0.3225316402039533,1.0606060606060606,0.2583177367004956,1.0606060606060606,0.27509011865395333
|
||||||
|
1.1616161616161618,0.4287843993527419,1.1616161616161618,0.8463753861957045,1.1616161616161618,0.9101530745705552,1.1616161616161618,0.30890687222540525,1.1616161616161618,0.4901351365169132,1.1616161616161618,0.45319808589043276,1.1616161616161618,0.35276108409396234,1.1616161616161618,0.27725858987652097,1.1616161616161618,0.29560030900846
|
||||||
|
1.262626262626262,0.4540647323159006,1.262626262626262,0.897495878378595,1.262626262626262,0.9417185837581196,1.262626262626262,0.3164579614455276,1.262626262626262,0.5145846409490937,1.262626262626262,0.4780296455205537,1.262626262626262,0.3794404038170447,1.262626262626262,0.2943968389517317,1.262626262626262,0.3152556528081
|
||||||
|
1.3636363636363633,0.47934506527905946,1.3636363636363633,0.9371746663372353,1.3636363636363633,0.9683350572505884,1.3636363636363633,0.32400905066565,1.3636363636363633,0.5362370529858077,1.3636363636363633,0.49985847015098533,1.3636363636363633,0.4061135157391696,1.3636363636363633,0.31086438420332474,1.3636363636363633,0.33319398921001137
|
||||||
|
1.4646464646464645,0.5046253982422182,1.4646464646464645,0.9707358108138878,1.4646464646464645,0.9850292043911345,1.4646464646464645,0.3315601398857724,1.4646464646464645,0.5531019255981576,1.4646464646464645,0.5181848921010453,1.4646464646464645,0.43278662766129444,1.4646464646464645,0.32679862313827224,1.4646464646464645,0.34908318351734496
|
||||||
|
1.5656565656565657,0.519310758600954,1.5656565656565657,0.9906032176938914,1.5656565656565657,0.9918397190961462,1.5656565656565657,0.3391112291058948,1.5656565656565657,0.5659801950328859,1.5656565656565657,0.5323498791465002,1.5656565656565657,0.4511009412793216,1.5656565656565657,0.34162587661768695,1.5656565656565657,0.3628958484057042
|
||||||
|
1.666666666666667,0.51401635833774,1.666666666666667,1.005715077214144,1.666666666666667,0.9899656142606021,1.666666666666667,0.346662318326017,1.666666666666667,0.575829702298404,1.666666666666667,0.541260149475436,1.666666666666667,0.4674803110925756,1.666666666666667,0.35601920704359724,1.666666666666667,0.37572761649169056
|
||||||
|
1.7676767676767673,0.5060676795476615,1.7676767676767673,1.0131883048070176,1.7676767676767673,0.9786887428475383,1.7676767676767673,0.35421340754613934,1.7676767676767673,0.5828151703640635,1.7676767676767673,0.5455395874048847,1.7676767676767673,0.4838510153495891,1.7676767676767673,0.3703169330810678,1.7676767676767673,0.38781035405087
|
||||||
|
1.8686868686868685,0.495017206229559,1.8686868686868685,0.9802541539054102,1.8686868686868685,0.9559310588882513,1.8686868686868685,0.3617644967662619,1.8686868686868685,0.5839088497682434,1.8686868686868685,0.5467157898697311,1.8686868686868685,0.49835864007261943,1.8686868686868685,0.38410765063343066,1.8686868686868685,0.3977196343512365
|
||||||
|
1.9696969696969697,0.48396673291145637,1.9696969696969697,0.9263388630289161,1.9696969696969697,0.9221166683929235,1.9696969696969697,0.36931558598638414,1.9696969696969697,0.5804936028756624,1.9696969696969697,0.5450082343452209,1.9696969696969697,0.5115510651058692,1.9696969696969697,0.39647206872026003,1.9696969696969697,0.4057110985660076
|
||||||
|
2.070707070707071,0.4729162595933537,2.070707070707071,0.8698358861835761,2.070707070707071,0.8764481362001709,2.070707070707071,0.3768666752065065,2.070707070707071,0.574716686049867,2.070707070707071,0.5394474878302619,2.070707070707071,0.5097127295818997,2.070707070707071,0.4049032898801099,2.070707070707071,0.41126316053027995
|
||||||
|
2.1717171717171713,0.46186578627525116,2.1717171717171713,0.8081407617658106,2.1717171717171713,0.8224404974364862,2.1717171717171713,0.38441776442662906,2.1717171717171713,0.5655375705620478,2.1717171717171713,0.5300324428024472,2.1717171717171713,0.49554940844796147,2.1717171717171713,0.4101839304627971,2.1717171717171713,0.4155357725301964
|
||||||
|
2.2727272727272725,0.4491770446280175,2.2727272727272725,0.7442526428212628,2.2727272727272725,0.7592323649828391,2.2727272727272725,0.391968853646751,2.2727272727272725,0.552350323381661,2.2727272727272725,0.5163813504127768,2.2727272727272725,0.48094925798793925,2.2727272727272725,0.413936941837358,2.2727272727272725,0.41843071308941276
|
||||||
|
2.3737373737373737,0.43609986761848685,2.3737373737373737,0.675405575107383,2.3737373737373737,0.6874741372997285,2.3737373737373737,0.39951994286687353,2.3737373737373737,0.5335539998256553,2.3737373737373737,0.49865541506871236,2.3737373737373737,0.4655571015656922,2.3737373737373737,0.4173906236056948,2.3737373737373737,0.42027249977934045
|
||||||
|
2.474747474747475,0.4066895271847391,2.474747474747475,0.5978840366507735,2.474747474747475,0.6073682995880296,2.474747474747475,0.40692119452733155,2.474747474747475,0.5117177142842388,2.474747474747475,0.4784532511364369,2.474747474747475,0.4501649451434452,2.474747474747475,0.4206585025597512,2.474747474747475,0.4213399238172195
|
||||||
|
2.5757575757575752,0.3749622763477891,2.5757575757575752,0.5099585586540418,2.5757575757575752,0.5223271133442401,2.5757575757575752,0.41415264022012394,2.5757575757575752,0.4850415148130571,2.5757575757575752,0.4567094947730761,2.5757575757575752,0.43458555601387144,2.5757575757575752,0.42158324745022285,2.5757575757575752,0.42181632222498416
|
||||||
|
2.6767676767676765,0.3432350255108388,2.6767676767676765,0.4205365946887392,2.6767676767676765,0.432906236858961,2.6767676767676765,0.4199131836378292,2.6767676767676765,0.45218830888592937,2.6767676767676765,0.4332394825941561,2.6767676767676765,0.41774264448225407,2.6767676767676765,0.42145613907090707,2.6767676767676765,0.4215504924390677
|
||||||
|
2.7777777777777777,0.3115077746738885,2.7777777777777777,0.32930350370842715,2.7777777777777777,0.3412321347424227,2.7777777777777777,0.42274639662898705,2.7777777777777777,0.4163402713183856,2.7777777777777777,0.40851950219775013,2.7777777777777777,0.40089973295063663,2.7777777777777777,0.4209228617300304,2.7777777777777777,0.4203590184673923
|
||||||
|
2.878787878787879,0.27978052383693824,2.878787878787879,0.23807041272811588,2.878787878787879,0.24760314946640188,2.878787878787879,0.42557960962014507,2.878787878787879,0.3802049595409251,2.878787878787879,0.383057999391408,2.878787878787879,0.3840568214190192,2.878787878787879,0.41938009129458526,2.878787878787879,0.41854626446476473
|
||||||
|
2.9797979797979792,0.24805327299998842,2.9797979797979792,0.14646854757187647,2.9797979797979792,0.15264712621771054,2.9797979797979792,0.428104678899817,2.9797979797979792,0.3432577786602793,2.9797979797979792,0.35694448241628624,2.9797979797979792,0.367213909887402,2.9797979797979792,0.41773298189050795,2.9797979797979792,0.4163510447804036
|
||||||
|
3.0808080808080813,0.21632602216303798,3.0808080808080813,0.05456143993271787,3.0808080808080813,0.057336396951423035,3.0808080808080813,0.42910204221273207,3.0808080808080813,0.30602019255320434,3.0808080808080813,0.3305660520102483,3.0808080808080813,0.3503709983557844,3.0808080808080813,0.41593157838764133,3.0808080808080813,0.41396474245507225
|
||||||
|
3.1818181818181817,0.18459877132608776,3.1818181818181817,-0.03733538955626138,3.1818181818181817,-0.03779843888287274,3.1818181818181817,0.4300994055256468,3.1818181818181817,0.26873960102765904,3.1818181818181817,0.30419224859801247,3.1818181818181817,0.3335280868241671,3.1818181818181817,0.41409475876758717,3.1818181818181817,0.41152646064562604
|
||||||
|
3.282828282828282,0.15287152048913782,3.282828282828282,-0.12920906194738088,3.282828282828282,-0.13249853932321157,3.282828282828282,0.43099899837317435,3.282828282828282,0.2314874157056526,3.282828282828282,0.27788417508140784,3.282828282828282,0.3164995410780566,3.282828282828282,0.4122620364061852,3.282828282828282,0.40912247673587887
|
||||||
|
3.383838383838384,0.12114426965218736,3.383838383838384,-0.22108273433850145,3.383838383838384,-0.22672866959540386,3.383838383838384,0.4318917322435721,3.383838383838384,0.19424068277399548,3.383838383838384,0.25176947991950477,3.383838383838384,0.2992528546417876,3.383838383838384,0.41043205422405316,3.383838383838384,0.40674183306733336
|
||||||
|
3.4848484848484844,0.08941701881523752,3.4848484848484844,-0.3129564067296208,3.4848484848484844,-0.3204339220693533,3.4848484848484844,0.43278446611396965,3.4848484848484844,0.15713787053146627,3.4848484848484844,0.22587592408322044,3.4848484848484844,0.2820061682055188,3.4848484848484844,0.4086021011097265,3.4848484848484844,0.4043698847877142
|
||||||
|
3.5858585858585865,0.058162275193419995,3.5858585858585865,-0.40462815693660914,3.5858585858585865,-0.41324795154433747,3.5858585858585865,0.4336771999843675,3.5858585858585865,0.12019800234358827,3.5858585858585865,0.20009983185318994,3.5858585858585865,0.2647594817692496,3.5858585858585865,0.4067722514909233,3.5858585858585865,0.40203120630187705
|
||||||
|
3.686868686868687,0.027654025225499562,3.686868686868687,-0.49422269067564845,3.686868686868687,-0.505293720158625,3.686868686868687,0.43456993385476517,3.686868686868687,0.08338176166505175,3.686868686868687,0.17451220690194294,3.686868686868687,0.24694025624429472,3.686868686868687,0.40494401437700783,3.686868686868687,0.39972779600606
|
||||||
|
3.787878787878787,-0.0028542247424208616,3.787878787878787,-0.5825355853286744,3.787878787878787,-0.5971159649192432,3.787878787878787,0.4354626677251625,3.787878787878787,0.04665044899957155,3.787878787878787,0.14916273839002891,3.787878787878787,0.22899283485249716,3.787878787878787,0.40312179798093106,3.787878787878787,0.39746202764807126
|
||||||
|
3.8888888888888893,-0.03336247471034154,3.8888888888888893,-0.6703463394238872,3.8888888888888893,-0.68824406601414,3.8888888888888893,0.4363554015955604,3.8888888888888893,0.009919136334091362,3.8888888888888893,0.12414842115967273,3.8888888888888893,0.21104541346069938,3.8888888888888893,0.4013011021954902,3.8888888888888893,0.3952295870367829
|
||||||
|
3.9898989898989896,-0.06387072467826214,3.9898989898989896,-0.7575928168757736,3.9898989898989896,-0.7784133912470257,3.9898989898989896,0.437248135465958,3.9898989898989896,-0.026722390982327433,3.9898989898989896,0.09939234299162882,3.9898989898989896,0.19309799206890174,3.9898989898989896,0.399484052282032,3.9898989898989896,0.3930265651896393
|
||||||
|
4.09090909090909,-0.0943789746461824,4.09090909090909,-0.8443788481067765,4.09090909090909,-0.8681309126980375,4.09090909090909,0.43814086933635565,4.09090909090909,-0.06308596529257729,4.09090909090909,0.07491765400345742,4.09090909090909,0.1750164743635475,4.09090909090909,0.39766754707663343,4.09090909090909,0.3908577509521082
|
||||||
|
4.191919191919192,-0.12488722461410334,4.191919191919192,-0.9297917533069101,4.191919191919192,-0.9573364023412008,4.191919191919192,0.4390336032067535,4.191919191919192,-0.09929539509789244,4.191919191919192,0.05074971564267564,4.191919191919192,0.1568727764842795,4.191919191919192,0.3958543351530404,4.191919191919192,0.38872432233841003
|
||||||
|
4.292929292929292,-0.15539547458202363,4.292929292929292,-1.0140884125491687,4.292929292929292,-1.0459165238042567,4.292929292929292,0.4399263370771512,4.292929292929292,-0.1349334585206603,4.292929292929292,0.02675516616820918,4.292929292929292,0.13872907860501169,4.292929292929292,0.3940418892740997,4.292929292929292,0.38661923148208605
|
||||||
|
4.3939393939393945,-0.18590372454994458,4.3939393939393945,-1.0972974392893766,4.3939393939393945,-1.1342383379633272,4.3939393939393945,0.4408190709475487,4.3939393939393945,-0.16982980680843562,4.3939393939393945,0.002964652994963484,4.3939393939393945,0.11796054958424437,4.3939393939393945,0.3922298874756054,4.3939393939393945,0.3845302650106349
|
||||||
|
4.494949494949495,-0.216411974517865,4.494949494949495,-1.179182894055243,4.494949494949495,-1.2221355458185688,4.494949494949495,0.44032091498508585,4.494949494949495,-0.20469748939648835,4.494949494949495,-0.0206002794035424,4.494949494949495,0.09701325884395126,4.494949494949495,0.39041788567711144,4.494949494949495,0.38248614430609396
|
||||||
|
4.595959595959595,-0.24692022448578524,4.595959595959595,-1.2601894992373368,4.595959595959595,-1.3091379548259912,4.595959595959595,0.4390119198940737,4.595959595959595,-0.239564339118166,4.595959595959595,-0.044064215802437315,4.595959595959595,0.07606596810365834,4.595959595959595,0.38861853091288373,4.595959595959595,0.3804739406387159
|
||||||
|
4.696969696969697,-0.2774284744537062,4.696969696969697,-1.3408190143954206,4.696969696969697,-1.395667382198044,4.696969696969697,0.4377029248030613,4.696969696969697,-0.2744311888398445,4.696969696969697,-0.06739710896332894,4.696969696969697,0.05511867736336504,4.696969696969697,0.38683625018149875,4.696969696969697,0.37848669218529357
|
||||||
|
4.797979797979798,-0.3079367244216266,4.797979797979798,-1.4214485295534998,4.797979797979798,-1.4814148159277154,4.797979797979798,0.436393929712049,4.797979797979798,-0.3092980385615221,4.797979797979798,-0.09057526494106827,4.797979797979798,0.034171386623072064,4.797979797979798,0.3850542123238927,4.797979797979798,0.37652869146057905
|
||||||
|
4.8989898989899,-0.3384449743895474,4.8989898989899,-1.5019215376311323,4.8989898989899,-1.5662892316768398,4.8989898989899,0.4350560618496009,4.8989898989899,-0.34416306870335767,4.8989898989899,-0.11357143325279366,4.8989898989899,0.013224095882778591,4.8989898989899,0.383272237289863,4.8989898989899,0.37460430584833954
|
||||||
|
5.0,-0.3689532243574676,5.0,-1.5820215750973248,5.0,-1.6508596672714462,5.0,0.43307940950570034,5.0,-0.37879161071248096,5.0,-0.13636462992911846,5.0,-0.007723194857514326,5.0,0.38149127984729847,5.0,0.37272620912380855
|
|
7
TeX/Plots/Data/sin_6.csv
Normal file
7
TeX/Plots/Data/sin_6.csv
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
x,y
|
||||||
|
-3.14159265358979 , -1.22464679914735e-16
|
||||||
|
-1.88495559215388 , -0.951056516295154
|
||||||
|
-0.628318530717959 , -0.587785252292473
|
||||||
|
0.628318530717959 , 0.587785252292473
|
||||||
|
1.88495559215388 , 0.951056516295154
|
||||||
|
3.14159265358979 , 1.22464679914735e-16
|
|
64
TeX/Plots/Data/sin_conv.csv
Normal file
64
TeX/Plots/Data/sin_conv.csv
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
,x_i,y_i,x_d,y_d,x,y
|
||||||
|
"1",0,0,-0.251688505259414,-0.109203329280437,-0.0838961684198045,-0.0364011097601456
|
||||||
|
"2",0.1,0.0998334166468282,0.216143831477992,0.112557051753147,0.00912581751114394,0.0102181849309398
|
||||||
|
"3",0.2,0.198669330795061,0.351879533708722,0.52138915851383,0.120991434720523,0.180094983253476
|
||||||
|
"4",0.3,0.29552020666134,-0.0169121548298757,0.0870956013269369,0.0836131805695847,0.163690012207993
|
||||||
|
"5",0.4,0.389418342308651,0.278503661037003,0.464752686490904,0.182421968363305,0.294268636359638
|
||||||
|
"6",0.5,0.479425538604203,0.241783494554983,0.521480762031938,0.216291763003623,0.399960258238722
|
||||||
|
"7",0.6,0.564642473395035,0.67288177436767,0.617435509386938,0.35521581484916,0.469717955748659
|
||||||
|
"8",0.7,0.644217687237691,0.692239292735764,0.395366561077235,0.492895242512842,0.472257444593698
|
||||||
|
"9",0.8,0.717356090899523,0.779946606884677,0.830045203984444,0.621840812496715,0.609161571471379
|
||||||
|
"10",0.9,0.783326909627483,0.796987424421658,0.801263132114778,0.723333122197902,0.682652280249237
|
||||||
|
"11",1,0.841470984807897,1.06821012817873,0.869642838589798,0.860323524382936,0.752971972337735
|
||||||
|
"12",1.1,0.891207360061435,1.50128637982775,0.899079529605641,1.09148187598916,0.835465707990221
|
||||||
|
"13",1.2,0.932039085967226,1.1194263347154,0.906626360727432,1.13393429991233,0.875953352580199
|
||||||
|
"14",1.3,0.963558185417193,1.24675170552299,1.07848030956084,1.2135821540696,0.950969562327306
|
||||||
|
"15",1.4,0.98544972998846,1.32784804980202,0.76685418220594,1.2818141129714,0.899892140468108
|
||||||
|
"16",1.5,0.997494986604054,1.23565831982523,1.07310713979952,1.2548338349408,0.961170357331681
|
||||||
|
"17",1.6,0.999573603041505,1.90289281875567,0.88003153305018,1.47254506382487,0.94006950203764
|
||||||
|
"18",1.7,0.991664810452469,1.68871194985252,1.01829329437246,1.56940444551462,0.955793455192302
|
||||||
|
"19",1.8,0.973847630878195,1.72179983981017,1.02268013575533,1.64902528694529,0.988666907865147
|
||||||
|
"20",1.9,0.946300087687414,2.0758716236832,0.805032560816536,1.83908127693465,0.928000158917177
|
||||||
|
"21",2,0.909297426825682,2.11118945422405,1.0134691646089,1.94365432453739,0.957334347939419
|
||||||
|
"22",2.1,0.863209366648874,2.00475777514698,0.86568986134637,1.9826265174693,0.924298444442167
|
||||||
|
"23",2.2,0.80849640381959,2.40773948766051,0.667018023975934,2.15807575978944,0.826761739840873
|
||||||
|
"24",2.3,0.74570521217672,2.14892522112975,0.872704236332415,2.17485332420928,0.839957045849706
|
||||||
|
"25",2.4,0.675463180551151,2.41696701330131,0.253955021611832,2.26412064248401,0.631186439537074
|
||||||
|
"26",2.5,0.598472144103957,2.4087686184711,0.49450592290142,2.33847747374241,0.557319074033222
|
||||||
|
"27",2.6,0.515501371821464,2.55312145187913,0.343944677655963,2.4151672191424,0.467867318187242
|
||||||
|
"28",2.7,0.42737988023383,2.6585492172135,0.528990826178838,2.51649125567521,0.447178678139147
|
||||||
|
"29",2.8,0.334988150155905,2.86281283456189,0.311400289332401,2.65184232661008,0.399952143417531
|
||||||
|
"30",2.9,0.239249329213982,2.74379162744449,0.501282616227342,2.70796893413474,0.432791852065713
|
||||||
|
"31",3,0.141120008059867,2.95951338295806,0.241385538727577,2.81576254355573,0.373424929745113
|
||||||
|
"32",3.1,0.0415806624332905,2.87268165585702,0.0764217470113609,2.85626015646841,0.264426413128825
|
||||||
|
"33",3.2,-0.0583741434275801,3.29898326143096,-0.272500742891131,3.0101734240017,0.0756660807058224
|
||||||
|
"34",3.3,-0.157745694143249,3.64473302259565,-0.24394459655987,3.24463496592626,-0.0688606479078372
|
||||||
|
"35",3.4,-0.255541102026832,3.46698556586598,-0.184272732807665,3.35339770834784,-0.15210430721581
|
||||||
|
"36",3.5,-0.35078322768962,3.67208160089566,-0.119933071489115,3.51318482264886,-0.176430496141549
|
||||||
|
"37",3.6,-0.442520443294852,3.73738883546162,-0.486197268315415,3.62961845872181,-0.283186040443485
|
||||||
|
"38",3.7,-0.529836140908493,3.77209072631297,-0.70275845349803,3.68619468325631,-0.422698101171958
|
||||||
|
"39",3.8,-0.611857890942719,3.66424718733509,-0.482410535792735,3.69727905622484,-0.462935060857071
|
||||||
|
"40",3.9,-0.687766159183974,3.72257849834575,-0.58477261395861,3.71784166083333,-0.543108060927685
|
||||||
|
"41",4,-0.756802495307928,3.85906293918747,-0.703015362823377,3.76539960460785,-0.618449987254768
|
||||||
|
"42",4.1,-0.818277111064411,4.0131961543859,-0.900410257326814,3.84632588679948,-0.708384794580195
|
||||||
|
"43",4.2,-0.871575772413588,4.0263131749378,-0.906044808231391,3.92085812717095,-0.789303202089581
|
||||||
|
"44",4.3,-0.916165936749455,4.77220075671212,-0.530827398816399,4.22925719163087,-0.729943577630504
|
||||||
|
"45",4.4,-0.951602073889516,4.4795636311648,-1.26672674728111,4.35331987391088,-0.921377204806384
|
||||||
|
"46",4.5,-0.977530117665097,4.5088210845027,-0.886168448505782,4.44898342417679,-0.914264630323723
|
||||||
|
"47",4.6,-0.993691003633465,4.70645816063034,-1.1082213336257,4.58861983576766,-0.97806804633887
|
||||||
|
"48",4.7,-0.999923257564101,4.48408312008838,-0.98352521226689,4.55827710678399,-1.01979325501755
|
||||||
|
"49",4.8,-0.996164608835841,4.97817348334347,-1.03043977928678,4.69715193557134,-1.02203657500247
|
||||||
|
"50",4.9,-0.982452612624332,5.09171179984929,-0.948912592308037,4.8484480091335,-0.999631162740658
|
||||||
|
"51",5,-0.958924274663138,4.87710566000798,-0.825224506141761,4.87693462801326,-0.937722874707385
|
||||||
|
"52",5.1,-0.925814682327732,5.04139294635392,-0.718936957124138,4.97198282698482,-0.856650521199568
|
||||||
|
"53",5.2,-0.883454655720153,4.94893136398377,-0.992753696742329,4.98294046406006,-0.885371127105841
|
||||||
|
"54",5.3,-0.832267442223901,5.38128555915899,-0.717434652733088,5.10670981664685,-0.816103747160468
|
||||||
|
"55",5.4,-0.772764487555987,5.46192736637355,-0.724060934669406,5.2398375587704,-0.780347098915984
|
||||||
|
"56",5.5,-0.705540325570392,5.30834840605735,-0.721772537926303,5.28807996342596,-0.766498807502665
|
||||||
|
"57",5.6,-0.631266637872321,5.53199687756185,-0.583133415115471,5.40779902870202,-0.688843253413245
|
||||||
|
"58",5.7,-0.550685542597638,5.9238064899769,-0.541063721566544,5.59865656961444,-0.627040990301198
|
||||||
|
"59",5.8,-0.464602179413757,5.8067999294844,-0.43156566524513,5.68077207716296,-0.552246304884294
|
||||||
|
"60",5.9,-0.373876664830236,5.93089453525347,-0.604056792592816,5.80084302534748,-0.550733954237757
|
||||||
|
"61",6,-0.279415498198926,6.02965160059402,-0.234452930170458,5.91786841211583,-0.434812265604247
|
||||||
|
"62",6.1,-0.182162504272095,5.88697419016579,-0.135764844759742,5.91990685000071,-0.323660336266941
|
||||||
|
"63",6.2,-0.0830894028174964,5.91445270773648,-0.0073552500992853,5.92798052258888,-0.205537962618181
|
|
141
TeX/Plots/RN_vs_RS.tex
Normal file
141
TeX/Plots/RN_vs_RS.tex
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
\pgfplotsset{
|
||||||
|
compat=1.11,
|
||||||
|
legend image code/.code={
|
||||||
|
\draw[mark repeat=2,mark phase=2]
|
||||||
|
plot coordinates {
|
||||||
|
(0cm,0cm)
|
||||||
|
(0.075cm,0cm) %% default is (0.3cm,0cm)
|
||||||
|
(0.15cm,0cm) %% default is (0.6cm,0cm)
|
||||||
|
};%
|
||||||
|
}
|
||||||
|
}
|
||||||
|
\begin{figure}
|
||||||
|
\begin{subfigure}[b]{0.5\textwidth}
|
||||||
|
\begin{subfigure}[b]{\textwidth}
|
||||||
|
\begin{adjustbox}{width=\textwidth, height=0.25\textheight}
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[
|
||||||
|
ytick = {-1, 0, 1, 2},
|
||||||
|
yticklabels = {$-1$, $\phantom{-0.}0$, $1$, $2$},]
|
||||||
|
\addplot table [x=x, y=y, col sep=comma, only marks,
|
||||||
|
forget plot] {Plots/Data/sin_6.csv};
|
||||||
|
\addplot [black, line width=2pt] table [x=x, y=y, col
|
||||||
|
sep=comma, mark=none] {Plots/Data/matlab_0.csv};
|
||||||
|
\addplot [red, line width = 1.5pt, dashed] table [x=x_n_5000_tl_0.0,
|
||||||
|
y=y_n_5000_tl_0.0, col sep=comma, mark=none] {Plots/Data/scala_out_sin.csv};
|
||||||
|
\addlegendentry{$f_1^{*, 0.1}$};
|
||||||
|
\addlegendentry{$\mathcal{RN}_w^{\tilde{\lambda}}$};
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\end{adjustbox}
|
||||||
|
\caption{$\lambda = 0.1$}
|
||||||
|
\end{subfigure}\\
|
||||||
|
\begin{subfigure}[b]{\textwidth}
|
||||||
|
\begin{adjustbox}{width=\textwidth, height=0.25\textheight}
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}
|
||||||
|
\addplot table [x=x, y=y, col sep=comma, only marks,
|
||||||
|
forget plot] {Plots/Data/sin_6.csv};
|
||||||
|
\addplot [black, line width=2pt] table [x=x, y=y, col sep=comma, mark=none] {Plots/Data/matlab_1.csv};
|
||||||
|
\addplot [red, line width = 1.5pt, dashed] table [x=x_n_5000_tl_1.0,
|
||||||
|
y=y_n_5000_tl_1.0, col sep=comma, mark=none] {Plots/Data/scala_out_sin.csv};
|
||||||
|
\addlegendentry{$f_1^{*, 1.0}$};
|
||||||
|
\addlegendentry{$\mathcal{RN}_w^{\tilde{\lambda}}$};
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\end{adjustbox}
|
||||||
|
\caption{$\lambda = 1.0$}
|
||||||
|
\end{subfigure}\\
|
||||||
|
\begin{subfigure}[b]{\textwidth}
|
||||||
|
\begin{adjustbox}{width=\textwidth, height=0.25\textheight}
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}
|
||||||
|
\addplot table [x=x, y=y, col sep=comma, only marks,
|
||||||
|
forget plot] {Plots/Data/sin_6.csv};
|
||||||
|
\addplot [black, line width=2pt] table [x=x, y=y, col sep=comma, mark=none] {Plots/Data/matlab_3.csv};
|
||||||
|
\addplot [red, line width = 1.5pt, dashed] table [x=x_n_5000_tl_3.0,
|
||||||
|
y=y_n_5000_tl_3.0, col sep=comma, mark=none] {Plots/Data/scala_out_sin.csv};
|
||||||
|
\addlegendentry{$f_1^{*, 3.0}$};
|
||||||
|
\addlegendentry{$\mathcal{RN}_w^{\tilde{\lambda}}$};
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\end{adjustbox}
|
||||||
|
\caption{$\lambda = 3.0$}
|
||||||
|
\end{subfigure}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}[b]{0.5\textwidth}
|
||||||
|
\begin{subfigure}[b]{\textwidth}
|
||||||
|
\begin{adjustbox}{width=\textwidth, height=0.245\textheight}
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[
|
||||||
|
ytick = {-2,-1, 0, 1, 2},
|
||||||
|
yticklabels = {$-2$,$-1$, $\phantom{-0.}0$, $1$, $2$},]
|
||||||
|
\addplot table [x=x, y=y, col sep=comma, only marks,
|
||||||
|
forget plot] {Plots/Data/data_sin_d_t.csv};
|
||||||
|
\addplot [black, line width=2pt] table [x=x, y=y, col sep=comma, mark=none] {Plots/Data/matlab_sin_d_01.csv};
|
||||||
|
\addplot [red, line width = 1.5pt, dashed] table [x=x_n_5000_tl_0.1,
|
||||||
|
y=y_n_5000_tl_0.1, col sep=comma, mark=none] {Plots/Data/scala_out_d_1_t.csv};
|
||||||
|
\addlegendentry{$f_1^{*, 0.1}$};
|
||||||
|
\addlegendentry{$\mathcal{RN}_w^{\tilde{\lambda}}$};
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\end{adjustbox}
|
||||||
|
\caption{$\lambda = 0.1$}
|
||||||
|
\end{subfigure}\\
|
||||||
|
\begin{subfigure}[b]{\textwidth}
|
||||||
|
\begin{adjustbox}{width=\textwidth, height=0.25\textheight}
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}
|
||||||
|
\addplot table [x=x, y=y, col sep=comma, only marks,
|
||||||
|
forget plot] {Plots/Data/data_sin_d_t.csv};
|
||||||
|
\addplot [black, line width=2pt] table [x=x, y=y, col sep=comma, mark=none] {Plots/Data/matlab_sin_d_1.csv};
|
||||||
|
\addplot [red, line width = 1.5pt, dashed] table [x=x_n_5000_tl_1.0,
|
||||||
|
y=y_n_5000_tl_1.0, col sep=comma, mark=none] {Plots/Data/scala_out_d_1_t.csv};
|
||||||
|
\addlegendentry{$f_1^{*, 1.0}$};
|
||||||
|
\addlegendentry{$\mathcal{RN}_w^{\tilde{\lambda},*}$};
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\end{adjustbox}
|
||||||
|
\caption{$\lambda = 1.0$}
|
||||||
|
\end{subfigure}\\
|
||||||
|
\begin{subfigure}[b]{\textwidth}
|
||||||
|
\begin{adjustbox}{width=\textwidth, height=0.25\textheight}
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}
|
||||||
|
\addplot table [x=x, y=y, col sep=comma, only marks,
|
||||||
|
forget plot] {Plots/Data/data_sin_d_t.csv};
|
||||||
|
\addplot [black, line width=2pt] table [x=x, y=y, col sep=comma, mark=none] {Plots/Data/matlab_sin_d_3.csv};
|
||||||
|
\addplot [red, line width = 1.5pt, dashed] table [x=x_n_5000_tl_3.0,
|
||||||
|
y=y_n_5000_tl_3.0, col sep=comma, mark=none] {Plots/Data/scala_out_d_1_t.csv};
|
||||||
|
\addlegendentry{$f_1^{*, 3.0}$};
|
||||||
|
\addlegendentry{$\mathcal{RN}_w^{\tilde{\lambda}}$};
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\end{adjustbox}
|
||||||
|
\caption{$\lambda = 3.0$}
|
||||||
|
\end{subfigure}
|
||||||
|
\end{subfigure}
|
||||||
|
\caption[Comparison of shallow neural networks and regression
|
||||||
|
splines]{% In these Figures the behaviour stated in ... is
|
||||||
|
% visualized
|
||||||
|
% in two exaples. For $(a), (b), (c)$ six values of sinus equidistantly
|
||||||
|
% spaced on $[-\pi, \pi]$ have been used as training data. For
|
||||||
|
% $(d),(e),(f)$ 15 equidistand values have been used, where
|
||||||
|
% $y_i^{train} = \sin(x_i^{train}) + \varepsilon_i$ and
|
||||||
|
% $\varepsilon_i \sim \mathcal{N}(0, 0.3)$. For
|
||||||
|
% $\mathcal{RN}_w^{\tilde{\lambda, *}}$ the random weights are
|
||||||
|
% distributed as follows
|
||||||
|
% \begin{align*}
|
||||||
|
% \xi_k &\sim
|
||||||
|
% \end{align*}
|
||||||
|
Ridge Penalized Neural Network compared to Regression Spline,
|
||||||
|
with them being trained on $\text{data}_A$ in a), b), c) and on
|
||||||
|
$\text{data}_B$ in d), e), f).
|
||||||
|
The Parameters of each are given above.
|
||||||
|
}
|
||||||
|
\label{fig:rn_vs_rs}
|
||||||
|
\end{figure}
|
||||||
|
%%% Local Variables:
|
||||||
|
%%% mode: latex
|
||||||
|
%%% TeX-master:
|
||||||
|
%%% End:
|
93
TeX/Plots/SGD_vs_GD.tex
Normal file
93
TeX/Plots/SGD_vs_GD.tex
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
\pgfplotsset{
|
||||||
|
compat=1.11,
|
||||||
|
legend image code/.code={
|
||||||
|
\draw[mark repeat=2,mark phase=2]
|
||||||
|
plot coordinates {
|
||||||
|
(0cm,0cm)
|
||||||
|
(0.0cm,0cm) %% default is (0.3cm,0cm)
|
||||||
|
(0.0cm,0cm) %% default is (0.6cm,0cm)
|
||||||
|
};%
|
||||||
|
}
|
||||||
|
}
|
||||||
|
\begin{figure}
|
||||||
|
\begin{subfigure}[h!]{\textwidth}
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[tick style = {draw = none}, width = \textwidth,
|
||||||
|
height = 0.6\textwidth,
|
||||||
|
xtick = {1, 3, 5,7,9,11,13,15,17,19},
|
||||||
|
xticklabels = {$2$, $4$, $6$, $8$,
|
||||||
|
$10$,$12$,$14$,$16$,$18$,$20$},
|
||||||
|
xlabel = {training epoch}, ylabel = {classification accuracy}]
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma] {Plots/Data/GD_01.log};
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma] {Plots/Data/GD_05.log};
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma] {Plots/Data/GD_1.log};
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma]
|
||||||
|
{Plots/Data/SGD_01_b32.log};
|
||||||
|
|
||||||
|
\addlegendentry{GD$_{0.01}$}
|
||||||
|
\addlegendentry{GD$_{0.05}$}
|
||||||
|
\addlegendentry{GD$_{0.1}$}
|
||||||
|
\addlegendentry{SGD$_{0.01}$}
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
%\caption{Classification accuracy}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}[b]{\textwidth}
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[tick style = {draw = none}, width = \textwidth,
|
||||||
|
height = 0.6\textwidth,
|
||||||
|
ytick = {0, 1, 2, 3, 4},
|
||||||
|
yticklabels = {$0$, $1$, $\phantom{0.}2$, $3$, $4$},
|
||||||
|
xtick = {1, 3, 5,7,9,11,13,15,17,19},
|
||||||
|
xticklabels = {$2$, $4$, $6$, $8$,
|
||||||
|
$10$,$12$,$14$,$16$,$18$,$20$},
|
||||||
|
xlabel = {training epoch}, ylabel = {error measure\vphantom{fy}}]
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_loss, col sep=comma] {Plots/Data/GD_01.log};
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_loss, col sep=comma] {Plots/Data/GD_05.log};
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_loss, col sep=comma] {Plots/Data/GD_1.log};
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_loss, col sep=comma] {Plots/Data/SGD_01_b32.log};
|
||||||
|
|
||||||
|
\addlegendentry{GD$_{0.01}$}
|
||||||
|
\addlegendentry{GD$_{0.05}$}
|
||||||
|
\addlegendentry{GD$_{0.1}$}
|
||||||
|
\addlegendentry{SGD$_{0.01}$}
|
||||||
|
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\caption{Performance metrics during training}
|
||||||
|
\end{subfigure}
|
||||||
|
% \\~\\
|
||||||
|
\caption[Performance comparison of SDG and GD]{The neural network given in ?? trained with different
|
||||||
|
algorithms on the MNIST handwritten digits data set. For gradient
|
||||||
|
descent the learning rated 0.01, 0.05 and 0.1 are (GD$_{\cdot}$). For
|
||||||
|
stochastic gradient descend a batch size of 32 and learning rate
|
||||||
|
of 0.01 is used (SDG$_{0.01}$).}
|
||||||
|
\label{fig:sgd_vs_gd}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\begin{table}[h]
|
||||||
|
\begin{tabu} to \textwidth {@{} *4{X[c]}c*4{X[c]} @{}}
|
||||||
|
\multicolumn{4}{c}{Classification Accuracy}
|
||||||
|
&~&\multicolumn{4}{c}{Error Measure}
|
||||||
|
\\\cline{1-4}\cline{6-9}
|
||||||
|
GD$_{0.01}$&GD$_{0.05}$&GD$_{0.1}$&SGD$_{0.01}$&&GD$_{0.01}$&GD$_{0.05}$&GD$_{0.1}$&SGD$_{0.01}$
|
||||||
|
\\\cline{1-4}\cline{6-9}
|
||||||
|
\multicolumn{9}{c}{test}\\
|
||||||
|
0.265&0.633&0.203&0.989&&2.267&1.947&3.91&0.032
|
||||||
|
\end{tabu}
|
||||||
|
\caption{Performance metrics of the networks trained in
|
||||||
|
Figure~\ref{fig:sgd_vs_gd} after 20 training epochs.}
|
||||||
|
\label{table:sgd_vs_gd}
|
||||||
|
\end{table}
|
||||||
|
%%% Local Variables:
|
||||||
|
%%% mode: latex
|
||||||
|
%%% TeX-master: "../main"
|
||||||
|
%%% End:
|
71
TeX/Plots/_region_.tex
Normal file
71
TeX/Plots/_region_.tex
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
\message{ !name(pfg_test.tex)}\documentclass{article}
|
||||||
|
\usepackage{pgfplots}
|
||||||
|
\usepackage{filecontents}
|
||||||
|
\usepackage{subcaption}
|
||||||
|
\usepackage{adjustbox}
|
||||||
|
\usepackage{xcolor}
|
||||||
|
\usepackage{graphicx}
|
||||||
|
\usetikzlibrary{calc, 3d}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\message{ !name(pfg_test.tex) !offset(6) }
|
||||||
|
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\end{adjustbox}
|
||||||
|
\caption{True position (\textcolor{red}{red}), distorted data (black)}
|
||||||
|
\end{figure}
|
||||||
|
\begin{center}
|
||||||
|
\begin{figure}[h]
|
||||||
|
\begin{subfigure}{0.49\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Data/klammern.jpg}
|
||||||
|
\caption{Original Picure}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.49\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Data/image_conv4.png}
|
||||||
|
\caption{test}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.49\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Data/image_conv5.png}
|
||||||
|
\caption{test}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.49\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Data/image_conv6.png}
|
||||||
|
\caption{test}
|
||||||
|
\end{subfigure}
|
||||||
|
\end{figure}
|
||||||
|
\end{center}
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\begin{adjustbox}{width=\textwidth}
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{scope}[x = (0:1cm), y=(90:1cm), z=(15:-0.5cm)]
|
||||||
|
\node[canvas is xy plane at z=0, transform shape] at (0,0)
|
||||||
|
{\includegraphics[width=5cm]{Data/klammern_r.jpg}};
|
||||||
|
\node[canvas is xy plane at z=2, transform shape] at (0,-0.2)
|
||||||
|
{\includegraphics[width=5cm]{Data/klammern_g.jpg}};
|
||||||
|
\node[canvas is xy plane at z=4, transform shape] at (0,-0.4)
|
||||||
|
{\includegraphics[width=5cm]{Data/klammern_b.jpg}};
|
||||||
|
\node[canvas is xy plane at z=4, transform shape] at (-8,-0.2)
|
||||||
|
{\includegraphics[width=5.3cm]{Data/klammern_rgb.jpg}};
|
||||||
|
\end{scope}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\end{adjustbox}
|
||||||
|
\caption{On the right the red, green and blue chanels of the picture
|
||||||
|
are displayed. In order to better visualize the color channes the
|
||||||
|
black and white picture of each channel has been colored in the
|
||||||
|
respective color. Combining the layers results in the image on the
|
||||||
|
left}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
\message{ !name(pfg_test.tex) !offset(3) }
|
||||||
|
|
||||||
|
\end{document}
|
||||||
|
|
||||||
|
%%% Local Variables:
|
||||||
|
%%% mode: latex
|
||||||
|
%%% TeX-master: t
|
||||||
|
%%% End:
|
53
TeX/Plots/fashion_mnist.tex
Normal file
53
TeX/Plots/fashion_mnist.tex
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
\begin{figure}[h]
|
||||||
|
\centering
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Plots/Data/fashion_mnist0.pdf}
|
||||||
|
\caption{T-shirt/top}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Plots/Data/fashion_mnist1.pdf}
|
||||||
|
\caption{Trousers}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Plots/Data/fashion_mnist2.pdf}
|
||||||
|
\caption{Pullover}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Plots/Data/fashion_mnist3.pdf}
|
||||||
|
\caption{Dress}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Plots/Data/fashion_mnist4.pdf}
|
||||||
|
\caption{Coat}
|
||||||
|
\end{subfigure}\\
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Plots/Data/fashion_mnist5.pdf}
|
||||||
|
\caption{Sandal}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Plots/Data/fashion_mnist6.pdf}
|
||||||
|
\caption{Shirt}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Plots/Data/fashion_mnist7.pdf}
|
||||||
|
\caption{Sneaker}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Plots/Data/fashion_mnist8.pdf}
|
||||||
|
\caption{Bag}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Plots/Data/fashion_mnist9.pdf}
|
||||||
|
\caption{Ankle boot}
|
||||||
|
\end{subfigure}
|
||||||
|
\caption[Fashion MNIST data set]{The fashtion MNIST data set contains 70.000 images of
|
||||||
|
preprocessed product images from Zalando, which are categorized as
|
||||||
|
T-shirt/top, Trouser, Pullover, Dress, Coat, Sandal, Shirt,
|
||||||
|
Sneaker, Bag, Ankle boot. Of these images 60.000 are used as training images, while
|
||||||
|
the rest are used to validate the models trained.}
|
||||||
|
\label{fig:MNIST}
|
||||||
|
\end{figure}
|
||||||
|
%%% Local Variables:
|
||||||
|
%%% mode: latex
|
||||||
|
%%% TeX-master: "../main"
|
||||||
|
%%% End:
|
82
TeX/Plots/gen_dropout.tex
Normal file
82
TeX/Plots/gen_dropout.tex
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
\pgfplotsset{
|
||||||
|
compat=1.11,
|
||||||
|
legend image code/.code={
|
||||||
|
\draw[mark repeat=2,mark phase=2]
|
||||||
|
plot coordinates {
|
||||||
|
(0cm,0cm)
|
||||||
|
(0.15cm,0cm) %% default is (0.3cm,0cm)
|
||||||
|
(0.3cm,0cm) %% default is (0.6cm,0cm)
|
||||||
|
};%
|
||||||
|
}
|
||||||
|
}
|
||||||
|
\begin{figure}
|
||||||
|
\begin{subfigure}[h]{\textwidth}
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
||||||
|
/pgf/number format/precision=3},tick style = {draw = none}, width = \textwidth,
|
||||||
|
height = 0.6\textwidth, ymin = 0.988, legend style={at={(0.9825,0.0175)},anchor=south east},
|
||||||
|
xlabel = {epoch}, ylabel = {Classification Accuracy}, cycle
|
||||||
|
list/Dark2, every axis plot/.append style={line width =1.25pt}]
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
|
{Plots/Data/adam_datagen_full_mean.log};
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
|
{Plots/Data/adam_datagen_dropout_02_full_mean.log};
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
|
{Plots/Data/adam_datagen_dropout_04_full_mean.log};
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
|
{Plots/Data/adam_dropout_02_full_mean.log};
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
|
{Plots/Data/adam_dropout_04_full_mean.log};
|
||||||
|
\addplot [dashed] table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
|
{Plots/Data/adam_full_mean.log};
|
||||||
|
|
||||||
|
\addlegendentry{\footnotesize{G.}}
|
||||||
|
\addlegendentry{\footnotesize{G. + D. 0.2}}
|
||||||
|
\addlegendentry{\footnotesize{G. + D. 0.4}}
|
||||||
|
\addlegendentry{\footnotesize{D. 0.2}}
|
||||||
|
\addlegendentry{\footnotesize{D. 0.4}}
|
||||||
|
\addlegendentry{\footnotesize{Default}}
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\caption{Classification accuracy}
|
||||||
|
\vspace{.25cm}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}[h]{1.0\linewidth}
|
||||||
|
\begin{tabu} to \textwidth {@{}lc*5{X[c]}@{}}
|
||||||
|
\Tstrut \Bstrut & \textsc{\,Adam\,} & D. 0.2 & D. 0.4 & G. &G.+D.\,0.2 & G.+D.\,0.4 \\
|
||||||
|
\hline
|
||||||
|
\multicolumn{7}{c}{Test Accuracy}\Bstrut \\
|
||||||
|
\cline{2-7}
|
||||||
|
mean \Tstrut & 0.9914 & 0.9923 & 0.9930 & 0.9937 & 0.9938 & 0.9943 \\
|
||||||
|
max & 0.9926 & 0.9930 & 0.9934 & 0.9946 & 0.9955 & 0.9956 \\
|
||||||
|
min & 0.9887 & 0.9909 & 0.9922 & 0.9929 & 0.9929 & 0.9934 \\
|
||||||
|
\hline
|
||||||
|
\multicolumn{7}{c}{Training Accuracy}\Bstrut \\
|
||||||
|
\cline{2-7}
|
||||||
|
mean \Tstrut & 0.9994 & 0.9991 & 0.9989 & 0.9967 & 0.9954 & 0.9926 \\
|
||||||
|
max & 0.9996 & 0.9996 & 0.9992 & 0.9979 & 0.9971 & 0.9937 \\
|
||||||
|
min & 0.9992 & 0.9990 & 0.9984 & 0.9947 & 0.9926 & 0.9908 \\
|
||||||
|
\end{tabu}
|
||||||
|
\caption{Mean and maximum accuracy after 48 epochs of training.}
|
||||||
|
\label{fig:gen_dropout_b}
|
||||||
|
\end{subfigure}
|
||||||
|
\caption[Performance comparison of overfitting measures]{Accuracy for the net given in ... with Dropout (D.),
|
||||||
|
data generation (G.), a combination, or neither (Default) implemented and trained
|
||||||
|
with \textsc{Adam}. For each epoch the 60.000 training samples
|
||||||
|
were used, or for data generation 10.000 steps with each using
|
||||||
|
batches of 60 generated data points. For each configuration the
|
||||||
|
model was trained 5 times and the average accuracies at each epoch
|
||||||
|
are given in (a). Mean, maximum and minimum values of accuracy on
|
||||||
|
the test and training set are given in (b).}
|
||||||
|
\label{fig:gen_dropout}
|
||||||
|
\end{figure}
|
||||||
|
%%% Local Variables:
|
||||||
|
%%% mode: latex
|
||||||
|
%%% TeX-master: "../main"
|
||||||
|
%%% End:
|
41
TeX/Plots/mnist.tex
Normal file
41
TeX/Plots/mnist.tex
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
\begin{figure}[h]
|
||||||
|
\centering
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Plots/Data/mnist0.pdf}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Plots/Data/mnist1.pdf}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Plots/Data/mnist2.pdf}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Plots/Data/mnist3.pdf}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Plots/Data/mnist4.pdf}
|
||||||
|
\end{subfigure}\\
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Plots/Data/mnist5.pdf}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Plots/Data/mnist6.pdf}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Plots/Data/mnist7.pdf}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Plots/Data/mnist8.pdf}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Plots/Data/mnist9.pdf}
|
||||||
|
\end{subfigure}
|
||||||
|
\caption[MNIST data set]{The MNIST data set contains 70.000 images of preprocessed handwritten
|
||||||
|
digits. Of these images 60.000 are used as training images, while
|
||||||
|
the rest are used to validate the models trained.}
|
||||||
|
\label{fig:MNIST}
|
||||||
|
\end{figure}
|
||||||
|
%%% Local Variables:
|
||||||
|
%%% mode: latex
|
||||||
|
%%% TeX-master: "../main"
|
||||||
|
%%% End:
|
301
TeX/Plots/pfg_test.tex
Normal file
301
TeX/Plots/pfg_test.tex
Normal file
@ -0,0 +1,301 @@
|
|||||||
|
\documentclass[a4paper, 12pt, draft=true]{article}
|
||||||
|
\usepackage{pgfplots}
|
||||||
|
\usepackage{filecontents}
|
||||||
|
\usepackage{subcaption}
|
||||||
|
\usepackage{adjustbox}
|
||||||
|
\usepackage{xcolor}
|
||||||
|
\usepackage{tabu}
|
||||||
|
\usepackage{showframe}
|
||||||
|
\usepackage{graphicx}
|
||||||
|
\usepackage{titlecaps}
|
||||||
|
\usetikzlibrary{calc, 3d}
|
||||||
|
\usepgfplotslibrary{colorbrewer}
|
||||||
|
|
||||||
|
\newcommand\Tstrut{\rule{0pt}{2.6ex}} % = `top' strut
|
||||||
|
\newcommand\Bstrut{\rule[-0.9ex]{0pt}{0pt}} % = `bottom' strut
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
\pgfplotsset{
|
||||||
|
compat=1.11,
|
||||||
|
legend image code/.code={
|
||||||
|
\draw[mark repeat=2,mark phase=2]
|
||||||
|
plot coordinates {
|
||||||
|
(0cm,0cm)
|
||||||
|
(0.3cm,0cm) %% default is (0.3cm,0cm)
|
||||||
|
(0.6cm,0cm) %% default is (0.6cm,0cm)
|
||||||
|
};%
|
||||||
|
}
|
||||||
|
}
|
||||||
|
\begin{figure}
|
||||||
|
\begin{subfigure}[h]{\textwidth}
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
||||||
|
/pgf/number format/precision=3},tick style = {draw = none}, width = \textwidth,
|
||||||
|
height = 0.35\textwidth, legend style={at={(0.9825,0.0175)},anchor=south east},
|
||||||
|
ylabel = {Test Accuracy}, cycle
|
||||||
|
list/Dark2, every axis plot/.append style={line width
|
||||||
|
=1.25pt}]
|
||||||
|
% \addplot [dashed] table
|
||||||
|
% [x=epoch, y=accuracy, col sep=comma, mark = none]
|
||||||
|
% {Data/adam_datagen_full.log};
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
|
{Data/adam_1.mean};
|
||||||
|
% \addplot [dashed] table
|
||||||
|
% [x=epoch, y=accuracy, col sep=comma, mark = none]
|
||||||
|
% {Data/adam_datagen_dropout_02_full.log};
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
|
{Data/adam_datagen_1.mean};
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
|
{Data/adam_datagen_dropout_02_1.mean};
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
|
{Data/adam_dropout_02_1.mean};
|
||||||
|
|
||||||
|
|
||||||
|
\addlegendentry{\footnotesize{G.}}
|
||||||
|
\addlegendentry{\footnotesize{G. + D. 0.2}}
|
||||||
|
\addlegendentry{\footnotesize{G. + D. 0.4}}
|
||||||
|
\addlegendentry{\footnotesize{D. 0.2}}
|
||||||
|
\addlegendentry{\footnotesize{D. 0.4}}
|
||||||
|
\addlegendentry{\footnotesize{Default}}
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\caption{1 sample per class}
|
||||||
|
\vspace{0.25cm}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}[h]{\textwidth}
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
||||||
|
/pgf/number format/precision=3},tick style = {draw = none}, width = \textwidth,
|
||||||
|
height = 0.35\textwidth, legend style={at={(0.9825,0.0175)},anchor=south east},
|
||||||
|
ylabel = {Test Accuracy}, cycle
|
||||||
|
list/Dark2, every axis plot/.append style={line width
|
||||||
|
=1.25pt}]
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
|
{Data/adam_dropout_00_10.mean};
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
|
{Data/adam_dropout_02_10.mean};
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
|
{Data/adam_datagen_dropout_00_10.mean};
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
|
{Data/adam_datagen_dropout_02_10.mean};
|
||||||
|
|
||||||
|
|
||||||
|
\addlegendentry{\footnotesize{G.}}
|
||||||
|
\addlegendentry{\footnotesize{G. + D. 0.2}}
|
||||||
|
\addlegendentry{\footnotesize{G. + D. 0.4}}
|
||||||
|
\addlegendentry{\footnotesize{D. 0.2}}
|
||||||
|
\addlegendentry{\footnotesize{D. 0.4}}
|
||||||
|
\addlegendentry{\footnotesize{Default}}
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\caption{10 samples per class}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}[h]{\textwidth}
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
||||||
|
/pgf/number format/precision=3},tick style = {draw = none}, width = 0.9875\textwidth,
|
||||||
|
height = 0.35\textwidth, legend style={at={(0.9825,0.0175)},anchor=south east},
|
||||||
|
xlabel = {epoch}, ylabel = {Test Accuracy}, cycle
|
||||||
|
list/Dark2, every axis plot/.append style={line width
|
||||||
|
=1.25pt}, ymin = {0.92}]
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
|
{Data/adam_dropout_00_100.mean};
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
|
{Data/adam_dropout_02_100.mean};
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
|
{Data/adam_datagen_dropout_00_100.mean};
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
|
{Data/adam_datagen_dropout_02_100.mean};
|
||||||
|
|
||||||
|
\addlegendentry{\footnotesize{G.}}
|
||||||
|
\addlegendentry{\footnotesize{G. + D. 0.2}}
|
||||||
|
\addlegendentry{\footnotesize{G. + D. 0.4}}
|
||||||
|
\addlegendentry{\footnotesize{D. 0.2}}
|
||||||
|
\addlegendentry{\footnotesize{D. 0.4}}
|
||||||
|
\addlegendentry{\footnotesize{Default}}
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\caption{100 samples per class}
|
||||||
|
\vspace{.25cm}
|
||||||
|
\end{subfigure}
|
||||||
|
\caption{Accuracy for the net given in ... with Dropout (D.),
|
||||||
|
data generation (G.), a combination, or neither (Default) implemented and trained
|
||||||
|
with \textsc{Adam}. For each epoch the 60.000 training samples
|
||||||
|
were used, or for data generation 10.000 steps with each using
|
||||||
|
batches of 60 generated data points. For each configuration the
|
||||||
|
model was trained 5 times and the average accuracies at each epoch
|
||||||
|
are given in (a). Mean, maximum and minimum values of accuracy on
|
||||||
|
the test and training set are given in (b).}
|
||||||
|
\end{figure}
|
||||||
|
\begin{table}
|
||||||
|
\centering
|
||||||
|
\begin{tabu} to \textwidth {@{}l*4{X[c]}@{}}
|
||||||
|
\Tstrut \Bstrut & \textsc{Adam} & D. 0.2 & Gen & Gen.+D. 0.2 \\
|
||||||
|
\hline
|
||||||
|
&
|
||||||
|
\multicolumn{4}{c}{\titlecap{test accuracy for 1 sample}}\Bstrut \\
|
||||||
|
\cline{2-5}
|
||||||
|
max \Tstrut & 0.5633 & 0.5312 & 0.6704 & 0.6604 \\
|
||||||
|
min & 0.3230 & 0.4224 & 0.4878 & 0.5175 \\
|
||||||
|
mean & 0.4570 & 0.4714 & 0.5862 & 0.6014 \\
|
||||||
|
var & 0.0040 & 0.0012 & 0.0036 & 0.0023 \\
|
||||||
|
\hline
|
||||||
|
&
|
||||||
|
\multicolumn{4}{c}{\titlecap{test accuracy for 10 samples}}\Bstrut \\
|
||||||
|
\cline{2-5}
|
||||||
|
max \Tstrut & 0.8585 & 0.9423 & 0.9310 & 0.9441 \\
|
||||||
|
min & 0.8148 & 0.9081 & 0.9018 & 0.9061 \\
|
||||||
|
mean & 0.8377 & 0.9270 & 0.9185 & 0.9232 \\
|
||||||
|
var & 2.7e-4 & 1.3e-4 & 6e-05 & 1.5e-4 \\
|
||||||
|
\hline
|
||||||
|
&
|
||||||
|
\multicolumn{4}{c}{\titlecap{test accuracy for 100 samples}}\Bstrut \\
|
||||||
|
\cline{2-5}
|
||||||
|
max & 0.9637 & 0.9796 & 0.9810 & 0.9805 \\
|
||||||
|
min & 0.9506 & 0.9719 & 0.9702 & 0.9727 \\
|
||||||
|
mean & 0.9582 & 0.9770 & 0.9769 & 0.9783 \\
|
||||||
|
var & 2e-05 & 1e-05 & 1e-05 & 0 \\
|
||||||
|
\hline
|
||||||
|
\end{tabu}
|
||||||
|
\caption{Values of the test accuracy of the model trained 10 times
|
||||||
|
of random training sets containing 1, 10 and 100 data points per
|
||||||
|
class.}
|
||||||
|
\end{table}
|
||||||
|
|
||||||
|
\begin{center}
|
||||||
|
\begin{figure}[h]
|
||||||
|
\centering
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Data/mnist0.pdf}
|
||||||
|
\caption{original\\image}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Data/mnist_gen_zoom.pdf}
|
||||||
|
\caption{random\\zoom}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Data/mnist_gen_shear.pdf}
|
||||||
|
\caption{random\\shear}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Data/mnist_gen_rotation.pdf}
|
||||||
|
\caption{random\\rotation}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Data/mnist_gen_shift.pdf}
|
||||||
|
\caption{random\\positional shift}
|
||||||
|
\end{subfigure}\\
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Data/mnist5.pdf}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Data/mnist6.pdf}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Data/mnist7.pdf}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Data/mnist8.pdf}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.19\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{Data/mnist9.pdf}
|
||||||
|
\end{subfigure}
|
||||||
|
\caption{The MNIST data set contains 70.000 images of preprocessed handwritten
|
||||||
|
digits. Of these images 60.000 are used as training images, while
|
||||||
|
the rest are used to validate the models trained.}
|
||||||
|
\end{figure}
|
||||||
|
\end{center}
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\begin{adjustbox}{width=\textwidth}
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{scope}[x = (0:1cm), y=(90:1cm), z=(15:-0.5cm)]
|
||||||
|
\node[canvas is xy plane at z=0, transform shape] at (0,0)
|
||||||
|
{\includegraphics[width=5cm]{Data/klammern_r.jpg}};
|
||||||
|
\node[canvas is xy plane at z=2, transform shape] at (0,-0.2)
|
||||||
|
{\includegraphics[width=5cm]{Data/klammern_g.jpg}};
|
||||||
|
\node[canvas is xy plane at z=4, transform shape] at (0,-0.4)
|
||||||
|
{\includegraphics[width=5cm]{Data/klammern_b.jpg}};
|
||||||
|
\node[canvas is xy plane at z=4, transform shape] at (-8,-0.2)
|
||||||
|
{\includegraphics[width=5.3cm]{Data/klammern_rgb.jpg}};
|
||||||
|
\end{scope}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\end{adjustbox}
|
||||||
|
\caption{On the right the red, green and blue chanels of the picture
|
||||||
|
are displayed. In order to better visualize the color channes the
|
||||||
|
black and white picture of each channel has been colored in the
|
||||||
|
respective color. Combining the layers results in the image on the
|
||||||
|
left}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\centering
|
||||||
|
\begin{subfigure}{.45\linewidth}
|
||||||
|
\centering
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[enlargelimits=false, ymin=0, ymax = 1, width=\textwidth]
|
||||||
|
\addplot [domain=-5:5, samples=101,unbounded coords=jump]{1/(1+exp(-x)};
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{.45\linewidth}
|
||||||
|
\centering
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[enlargelimits=false, width=\textwidth]
|
||||||
|
\addplot[domain=-5:5, samples=100]{tanh(x)};
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{.45\linewidth}
|
||||||
|
\centering
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[enlargelimits=false, width=\textwidth,
|
||||||
|
ytick={0,2,4},yticklabels={\hphantom{4.}0,2,4}, ymin=-1]
|
||||||
|
\addplot[domain=-5:5, samples=100]{max(0,x)};
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{.45\linewidth}
|
||||||
|
\centering
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[enlargelimits=false, width=\textwidth, ymin=-1,
|
||||||
|
ytick={0,2,4},yticklabels={$\hphantom{-5.}0$,2,4}]
|
||||||
|
\addplot[domain=-5:5, samples=100]{max(0,x)+ 0.1*min(0,x)};
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\end{subfigure}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[enlargelimits=false]
|
||||||
|
\addplot [domain=-5:5, samples=101,unbounded coords=jump]{1/(1+exp(-x)};
|
||||||
|
\addplot[domain=-5:5, samples=100]{tanh(x)};
|
||||||
|
\addplot[domain=-5:5, samples=100]{max(0,x)};
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[enlargelimits=false]
|
||||||
|
\addplot[domain=-2*pi:2*pi, samples=100]{cos(deg(x))};
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
|
||||||
|
\end{document}
|
||||||
|
|
||||||
|
%%% Local Variables:
|
||||||
|
%%% mode: latex
|
||||||
|
%%% TeX-master: t
|
||||||
|
%%% End:
|
78
TeX/Plots/sdg_comparison.tex
Normal file
78
TeX/Plots/sdg_comparison.tex
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
\pgfplotsset{
|
||||||
|
compat=1.11,
|
||||||
|
legend image code/.code={
|
||||||
|
\draw[mark repeat=2,mark phase=2]
|
||||||
|
plot coordinates {
|
||||||
|
(0cm,0cm)
|
||||||
|
(0.0cm,0cm) %% default is (0.3cm,0cm)
|
||||||
|
(0.0cm,0cm) %% default is (0.6cm,0cm)
|
||||||
|
};%
|
||||||
|
}
|
||||||
|
}
|
||||||
|
\begin{figure}
|
||||||
|
\begin{subfigure}[h]{\textwidth}
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[tick style = {draw = none}, width = \textwidth,
|
||||||
|
height = 0.6\textwidth, ymin = 0.92, legend style={at={(0.9825,0.75)},anchor=north east},
|
||||||
|
xlabel = {epoch}, ylabel = {Classification Accuracy}]
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
|
{Plots/Data/adagrad.log};
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
|
{Plots/Data/adadelta.log};
|
||||||
|
\addplot table
|
||||||
|
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||||
|
{Plots/Data/adam.log};
|
||||||
|
|
||||||
|
\addlegendentry{\footnotesize{ADAGRAD}}
|
||||||
|
\addlegendentry{\footnotesize{ADADELTA}}
|
||||||
|
\addlegendentry{\footnotesize{ADAM}}
|
||||||
|
\addlegendentry{SGD$_{0.01}$}
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
%\caption{Classification accuracy}
|
||||||
|
\vspace{.25cm}
|
||||||
|
\end{subfigure}
|
||||||
|
% \begin{subfigure}[b]{\textwidth}
|
||||||
|
% \begin{tikzpicture}
|
||||||
|
% \begin{axis}[tick style = {draw = none}, width = \textwidth,
|
||||||
|
% height = 0.6\textwidth, ymax = 0.5,
|
||||||
|
% xlabel = {epoch}, ylabel = {Error Measure\vphantom{y}},ytick ={0,0.1,0.2,0.3,0.4,0.45,0.5}, yticklabels =
|
||||||
|
% {0,0.1,0.2,0.3,0.4,\phantom{0.94},0.5}]
|
||||||
|
% \addplot table
|
||||||
|
% [x=epoch, y=val_loss, col sep=comma, mark = none] {Plots/Data/adagrad.log};
|
||||||
|
% \addplot table
|
||||||
|
% [x=epoch, y=val_loss, col sep=comma, mark = none] {Plots/Data/adadelta.log};
|
||||||
|
% \addplot table
|
||||||
|
% [x=epoch, y=val_loss, col sep=comma, mark = none] {Plots/Data/adam.log};
|
||||||
|
|
||||||
|
% \addlegendentry{\footnotesize{ADAGRAD}}
|
||||||
|
% \addlegendentry{\footnotesize{ADADELTA}}
|
||||||
|
% \addlegendentry{\footnotesize{ADAM}}
|
||||||
|
% \addlegendentry{SGD$_{0.01}$}
|
||||||
|
|
||||||
|
% \end{axis}
|
||||||
|
% \end{tikzpicture}
|
||||||
|
% \caption{Performance metrics during training}
|
||||||
|
% \vspace{.25cm}
|
||||||
|
% \end{subfigure}
|
||||||
|
\begin{subfigure}[b]{1.0\linewidth}
|
||||||
|
\begin{tabu} to \textwidth {@{} *3{X[c]}c*3{X[c]} @{}}
|
||||||
|
\multicolumn{3}{c}{Classification Accuracy}
|
||||||
|
&~&\multicolumn{3}{c}{Error Measure}
|
||||||
|
\\\cline{1-3}\cline{5-7}
|
||||||
|
ADAGRAD&ADADELTA&ADAM&&ADAGRAD&ADADELTA&ADAM
|
||||||
|
\\\cline{1-3}\cline{5-7}
|
||||||
|
1&1&1&&1&1&1
|
||||||
|
\end{tabu}
|
||||||
|
\caption{Performace metrics after 20 epochs}
|
||||||
|
\end{subfigure}
|
||||||
|
\caption[Performance comparison of training algorithms]{Classification accuracy on the test set and ...Performance metrics of the network given in ... trained
|
||||||
|
with different optimization algorithms}
|
||||||
|
\label{fig:comp_alg}
|
||||||
|
\end{figure}
|
||||||
|
%%% Local Variables:
|
||||||
|
%%% mode: latex
|
||||||
|
%%% TeX-master: "../main"
|
||||||
|
%%% End:
|
64
TeX/Plots/sin_conv.csv
Normal file
64
TeX/Plots/sin_conv.csv
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
"","x_i","y_i","x_d","y_d","x","y"
|
||||||
|
"1",0,0,0.0815633019993375,0.095134925029757,0.0815633019993375,0.095134925029757
|
||||||
|
"2",0.1,0.0998334166468282,-0.137539012603596,0.503920419784276,-0.137539012603596,0.503920419784276
|
||||||
|
"3",0.2,0.198669330795061,0.219868163218743,0.32022289024623,0.219868163218743,0.32022289024623
|
||||||
|
"4",0.3,0.29552020666134,0.378332723534869,0.474906286765401,0.378332723534869,0.474906286765401
|
||||||
|
"5",0.4,0.389418342308651,0.286034335293811,0.422891394375764,0.215056588291437,0.412478430748051
|
||||||
|
"6",0.5,0.479425538604203,-0.109871707385461,0.229661026779107,0.122574532557623,0.353221043330047
|
||||||
|
"7",0.6,0.564642473395035,0.91036951450573,0.56079130435097,0.451160317716352,0.452893574072324
|
||||||
|
"8",0.7,0.644217687237691,0.899001194675409,0.714355793051917,0.491731451724399,0.514477919331008
|
||||||
|
"9",0.8,0.717356090899523,0.733791390723896,0.694085383523086,0.488943974889845,0.530054084580656
|
||||||
|
"10",0.9,0.783326909627483,0.893642943873427,0.739792642916928,0.599785378272423,0.575149967162231
|
||||||
|
"11",1,0.841470984807897,0.895913227983752,0.658288213778898,0.650886140047209,0.577618711891772
|
||||||
|
"12",1.1,0.891207360061435,1.01252219752013,0.808981437684505,0.726263244907525,0.643161394030218
|
||||||
|
"13",1.2,0.932039085967226,1.30930912337975,1.04111824066026,0.872590842152803,0.745714536528734
|
||||||
|
"14",1.3,0.963558185417193,1.0448292335495,0.741250429230841,0.850147062957694,0.687171673021914
|
||||||
|
"15",1.4,0.98544972998846,1.57369086195552,1.17277927321094,1.06520673597544,0.847936751231165
|
||||||
|
"16",1.5,0.997494986604054,1.61427415976939,1.3908361301708,1.15616745244604,0.969474391592075
|
||||||
|
"17",1.6,0.999573603041505,1.34409615749122,0.976992098566069,1.13543598207093,0.889434319996364
|
||||||
|
"18",1.7,0.991664810452469,1.79278028030419,1.02939764179765,1.33272772191879,0.935067381106346
|
||||||
|
"19",1.8,0.973847630878195,1.50721559744085,0.903076361857071,1.30862923824728,0.91665506605512
|
||||||
|
"20",1.9,0.946300087687414,1.835014641556,0.830477479204284,1.45242210409837,0.889715842048808
|
||||||
|
"21",2,0.909297426825682,1.98589997236352,0.887302138185342,1.56569111721857,0.901843632635883
|
||||||
|
"22",2.1,0.863209366648874,2.31436634488224,0.890096618924313,1.73810390755555,0.899632162941341
|
||||||
|
"23",2.2,0.80849640381959,2.14663445612581,0.697012453130415,1.77071083163663,0.831732978616874
|
||||||
|
"24",2.3,0.74570521217672,2.17162372560288,0.614243640399509,1.84774268936257,0.787400621584077
|
||||||
|
"25",2.4,0.675463180551151,2.2488591417345,0.447664288915269,1.93366609303299,0.707449056213168
|
||||||
|
"26",2.5,0.598472144103957,2.56271588872389,0.553368843490625,2.08922735802261,0.702402440783529
|
||||||
|
"27",2.6,0.515501371821464,2.60986205081511,0.503762006272682,2.17548673152621,0.657831176057599
|
||||||
|
"28",2.7,0.42737988023383,2.47840649766003,0.215060732402894,2.20251747034638,0.533903400086802
|
||||||
|
"29",2.8,0.334988150155905,2.99861119922542,0.28503285049582,2.43015164462239,0.512492561673074
|
||||||
|
"30",2.9,0.239249329213982,3.09513467852082,0.245355736487949,2.54679545455398,0.461447717313721
|
||||||
|
"31",3,0.141120008059867,2.86247369846558,0.0960140633436418,2.55274767368554,0.371740588261606
|
||||||
|
"32",3.1,0.0415806624332905,2.79458017090243,-0.187923650913249,2.59422388058738,0.234694070506915
|
||||||
|
"33",3.2,-0.0583741434275801,3.6498183243501,-0.186738431858275,2.9216851043241,0.173308072295566
|
||||||
|
"34",3.3,-0.157745694143249,3.19424275971809,-0.221908035274934,2.86681135711315,0.101325637659584
|
||||||
|
"35",3.4,-0.255541102026832,3.53166785156005,-0.295496842654793,3.03827050777863,0.0191967841533109
|
||||||
|
"36",3.5,-0.35078322768962,3.53250700922714,-0.364585027403596,3.12709094619305,-0.0558446366563474
|
||||||
|
"37",3.6,-0.442520443294852,3.52114271616751,-0.363845774016092,3.18702722489489,-0.10585071711408
|
||||||
|
"38",3.7,-0.529836140908493,3.72033580551176,-0.386489608468821,3.31200591645168,-0.158195730190865
|
||||||
|
"39",3.8,-0.611857890942719,4.0803717995796,-0.64779795182054,3.49862620703954,-0.284999326812438
|
||||||
|
"40",3.9,-0.687766159183974,3.88351729419721,-0.604406622894426,3.51908925124143,-0.324791870057922
|
||||||
|
"41",4,-0.756802495307928,3.9941257036697,-0.8061112437715,3.62222513609486,-0.438560071688316
|
||||||
|
"42",4.1,-0.818277111064411,3.81674488816054,-0.548538951165239,3.63032709398802,-0.41285438330036
|
||||||
|
"43",4.2,-0.871575772413588,4.47703348424544,-0.998992385231986,3.88581748102334,-0.592305016590357
|
||||||
|
"44",4.3,-0.916165936749455,4.46179199544059,-0.969288921090897,3.96444243944485,-0.643076376622242
|
||||||
|
"45",4.4,-0.951602073889516,4.15184730382548,-1.11987501275525,3.93838897981045,-0.743258835859858
|
||||||
|
"46",4.5,-0.977530117665097,4.64522916494355,-0.772872365801468,4.15504805602606,-0.691414328153313
|
||||||
|
"47",4.6,-0.993691003633465,4.68087925098283,-0.650422764094352,4.24176417425486,-0.675107584174976
|
||||||
|
"48",4.7,-0.999923257564101,5.00475403211142,-0.922605880059771,4.41432228408005,-0.770625346502085
|
||||||
|
"49",4.8,-0.996164608835841,4.71428836112322,-1.14280193223997,4.41279031790692,-0.861010494025717
|
||||||
|
"50",4.9,-0.982452612624332,5.02115518218406,-0.9819618243158,4.57449352886454,-0.843786948015608
|
||||||
|
"51",5,-0.958924274663138,4.92057344952522,-0.872931430146499,4.61418118503201,-0.836318916150308
|
||||||
|
"52",5.1,-0.925814682327732,5.37277893732831,-0.91444926304078,4.81555148166217,-0.864686555983682
|
||||||
|
"53",5.2,-0.883454655720153,5.19524942845082,-1.41169784739596,4.84152902094499,-1.03768305406186
|
||||||
|
"54",5.3,-0.832267442223901,5.4432222181271,-0.726481337519931,4.98565483155961,-0.856094353978009
|
||||||
|
"55",5.4,-0.772764487555987,4.98285013865449,-0.692803346852181,4.90897053115903,-0.838425020062396
|
||||||
|
"56",5.5,-0.705540325570392,5.33298025214155,-0.343702005257262,5.0497327607228,-0.711573964373115
|
||||||
|
"57",5.6,-0.631266637872321,5.49935694796791,-0.828968673188174,5.15036520204232,-0.816467931201244
|
||||||
|
"58",5.7,-0.550685542597638,5.69204187550805,-0.481580461165225,5.26232964126231,-0.689500817105975
|
||||||
|
"59",5.8,-0.464602179413757,5.84391772412888,-0.20453899468884,5.38069867877875,-0.564365367144995
|
||||||
|
"60",5.9,-0.373876664830236,5.48166674139637,-0.597796931577294,5.3357436834558,-0.649913835818738
|
||||||
|
"61",6,-0.279415498198926,5.77474590863769,-0.280234463056808,5.46956415981143,-0.524503219480344
|
||||||
|
"62",6.1,-0.182162504272095,6.36764321572312,-0.0996286988755344,5.7169871104113,-0.422854073705143
|
||||||
|
"63",6.2,-0.0830894028174964,6.46175133910451,-0.025702847911482,5.83540227044819,-0.355719019286555
|
|
45
TeX/Plots/sin_conv.tex
Normal file
45
TeX/Plots/sin_conv.tex
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
\begin{figure}
|
||||||
|
\centering
|
||||||
|
\begin{subfigure}[b]{0.49\textwidth}
|
||||||
|
\centering
|
||||||
|
\begin{adjustbox}{width=\textwidth, height=0.25\textheight}
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[tick style = {draw = none}, xticklabel = \empty,
|
||||||
|
yticklabel=\empty]
|
||||||
|
\addplot [mark options={scale = 0.7}, mark = o] table
|
||||||
|
[x=x_d,y=y_d, col sep = comma] {Plots/Data/sin_conv.csv};
|
||||||
|
\addplot [red, mark=x] table [x=x_i, y=y_i, col sep=comma, color ='black'] {Plots/Data/sin_conv.csv};
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\end{adjustbox}
|
||||||
|
\caption{True position (\textcolor{red}{red}), distorted position data (black)}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}[b]{0.49\textwidth}
|
||||||
|
\centering
|
||||||
|
\begin{adjustbox}{width=\textwidth, height=0.25\textheight}
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[tick style = {draw = none}, xticklabel = \empty,
|
||||||
|
yticklabel=\empty]
|
||||||
|
\addplot [mark options={scale = 0.7}, mark = o] table [x=x,y=y, col
|
||||||
|
sep = comma] {Plots/Data/sin_conv.csv};
|
||||||
|
\addplot [red, mark=x] table [x=x_i, y=y_i, col sep=comma, color ='black'] {Plots/Data/sin_conv.csv};
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\end{adjustbox}
|
||||||
|
\caption{True position (\textcolor{red}{red}), filtered position data (black)}
|
||||||
|
\end{subfigure}
|
||||||
|
\caption[Signal smoothing using convolution]{Example for noise reduction using convolution with simulated
|
||||||
|
positional data. As filter
|
||||||
|
$g(i)=\left(\nicefrac{1}{3},\nicefrac{1}{4},\nicefrac{1}{5},\nicefrac{1}{6},\nicefrac{1}{20}\right)_{(i-1)}$
|
||||||
|
is chosen and applied to the $x$ and $y$ coordinate
|
||||||
|
data seperately. The convolution of both signals with $g$
|
||||||
|
improves the MSE of the positions from 0.196 to 0.170 and
|
||||||
|
visibly smoothes the data.
|
||||||
|
}
|
||||||
|
\label{fig:sin_conv}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
%%% Local Variables:
|
||||||
|
%%% mode: latex
|
||||||
|
%%% TeX-master: "../main"
|
||||||
|
%%% End:
|
5
TeX/Plots/y.tex
Normal file
5
TeX/Plots/y.tex
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
|
||||||
|
%%% Local Variables:
|
||||||
|
%%% mode: latex
|
||||||
|
%%% TeX-master: "../main"
|
||||||
|
%%% End:
|
@ -1,333 +1,24 @@
|
|||||||
|
|
||||||
\newpage
|
\newpage
|
||||||
\begin{appendices}
|
\begin{appendices}
|
||||||
\counterwithin{lstfloat}{section}
|
\section{Proofs for sone Lemmata in ...}
|
||||||
\section{Notes on Proofs of Lemmata in Section~\ref{sec:conv}}
|
In the following there will be proofs for some important Lemmata in
|
||||||
\label{appendix:proofs}
|
Section~\ref{sec:theo38}. Further proofs not discussed here can be
|
||||||
Contrary to \textcite{heiss2019} we do not make the distinction between $f_+$ and
|
found in \textcite{heiss2019}
|
||||||
$f_-$.
|
\begin{Theorem}[Proof of Lemma~\ref{theo38}]
|
||||||
This results in some alterations in the proofs being necessary. In
|
\end{Theorem}
|
||||||
the following the affected proofs and the required changes are given.
|
|
||||||
% Because of that slight alterations are needed in the proofs of
|
|
||||||
% .. auxiliary lemmata.
|
|
||||||
% Alterations that go beyond substituting $F_{+-}^{}$
|
|
||||||
% As the proofs are ... for the most part only
|
|
||||||
% the alterations needed are specified.
|
|
||||||
|
|
||||||
|
\begin{Lemma}[$\frac{w^{*,\tilde{\lambda}}_k}{v_k}\approx\mathcal{O}(\frac{1}{n})$]
|
||||||
|
For any $\lambda > 0$ and training data $(x_i^{\text{train}},
|
||||||
|
y_i^{\text{train}}) \in \mathbb{R}^2, \, i \in
|
||||||
% In the following there will be proofs for some important Lemmata in
|
\left\{1,\dots,N\right\}$, we have
|
||||||
% Section~\ref{sec:theo38}. Further proofs not discussed here can be
|
|
||||||
% found in \textcite{heiss2019}
|
|
||||||
% The proves in this section are based on \textcite{heiss2019}. Slight
|
|
||||||
% alterations have been made to accommodate for not splitting $f$ into
|
|
||||||
% $f_+$ and $f_-$.
|
|
||||||
% \begin{Theorem}[Proof of Lemma~\ref{theo38}]
|
|
||||||
% \end{Theorem}
|
|
||||||
|
|
||||||
% \begin{Lemma}[$\frac{w^{*,\tilde{\lambda}}_k}{v_k}\approx\mathcal{O}(\frac{1}{n})$]
|
|
||||||
% For any $\lambda > 0$ and training data $(x_i^{\text{train}},
|
|
||||||
% y_i^{\text{train}}) \in \mathbb{R}^2, \, i \in
|
|
||||||
% \left\{1,\dots,N\right\}$, we have
|
|
||||||
% \[
|
|
||||||
% \max_{k \in \left\{1,\dots,n\right\}} \frac{w^{*,
|
|
||||||
% \tilde{\lambda}}_k}{v_k} = \po_{n\to\infty}
|
|
||||||
% \]
|
|
||||||
|
|
||||||
|
|
||||||
% \end{Lemma}
|
|
||||||
|
|
||||||
\begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma A.9)]~\\\noindent
|
|
||||||
\label{proof:lem9}
|
|
||||||
With $\tilde{\lambda} \coloneqq \lambda n g(0)$ Lemma~\ref{lem:cnvh} follows
|
|
||||||
analogously when considering $\tilde{w}$, $f_g^{*, \lambda}$, and $h_k$
|
|
||||||
instead of $\tilde{w}^+$, $f_{g,+}^{*, \lambda}$, and $\bar{h}_k$.
|
|
||||||
Consider $\kappa = \left\{1, \dots, n \right\}$ for $n$ nodes
|
|
||||||
instead of $\kappa^+$. With $h_k = \frac{1}{n g_\xi(\xi_n)}$
|
|
||||||
instead of $\bar{h}_k$
|
|
||||||
and \[
|
|
||||||
\mathbb{E} \left[\abs{\left\{m \in \kappa : \xi_m \in [\delta l,
|
|
||||||
\delta(l+1))\right\}}\right] = n \int_{\delta
|
|
||||||
l}^{\delta(l+1)}g_\xi (x) dx \approx n (\delta g_\xi(\delta l)
|
|
||||||
\pm \delta \tilde{\varepsilon}).
|
|
||||||
\]
|
|
||||||
% \[
|
|
||||||
% \sum_{k \in \kappa : \xi_k < T} \varphi(\xi_k, v_k)
|
|
||||||
% h_{k,n} = \sum_{\substack{l \in \mathbb{Z} \\ [\delta l, \delta
|
|
||||||
% (l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T \}]}}
|
|
||||||
% \left(\sum_{\substack{k \in \kappa \\ \xi_k \in
|
|
||||||
% [\delta l , \delta(l+1))}} \varphi(\xi_k, v_k)
|
|
||||||
% h_{k,n}\right) \approx
|
|
||||||
% \]
|
|
||||||
% \[
|
|
||||||
% \approx \sum_{\substack{l \in \mathbb{Z} \\ [\delta l, \delta
|
|
||||||
% (l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T \}]}}
|
|
||||||
% \left(\sum_{\substack{k \in \kappa \\ \xi_k \in
|
|
||||||
% [\delta l , \delta(l+1))}} \left(\varphi(\delta l, v_k)
|
|
||||||
% \frac{1}{n g_\xi (\delta l)} \pm \frac{\varepsilon}{n}\right)
|
|
||||||
% \frac{\abs{\left\{m \in \kappa : \xi_m \in [\delta l,
|
|
||||||
% \delta(l+1))\right\}}}{\abs{\left\{m \in \kappa : \xi_m
|
|
||||||
% \in [\delta l, \delta(l+1))\right\}}}\right)
|
|
||||||
% \]
|
|
||||||
% \[
|
|
||||||
% \approx \sum_{\substack{l \in \mathbb{Z} \\ [\delta l, \delta
|
|
||||||
% (l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T \}]}}
|
|
||||||
% \left(\frac{\sum_{\substack{k \in \kappa \\ \xi_k \in
|
|
||||||
% [\delta l , \delta(l+1))}}\varphi(\delta l,
|
|
||||||
% v_k)}{\abs{\left\{m \in \kappa : \xi_m
|
|
||||||
% \in [\delta l, \delta(l+1))\right\}}}
|
|
||||||
% \frac{\abs{\left\{m \in \kappa : \xi_m \in [\delta l,
|
|
||||||
% \delta(l+1))\right\}}}{n g_\xi (\delta l)}\right) \pm \varepsilon
|
|
||||||
% \]
|
|
||||||
% The amount of kinks in a given interval of length $\delta$ follows a
|
|
||||||
% binomial distribution,
|
|
||||||
% \[
|
|
||||||
% \mathbb{E} \left[\abs{\left\{m \in \kappa : \xi_m \in [\delta l,
|
|
||||||
% \delta(l+1))\right\}}\right] = n \int_{\delta
|
|
||||||
% l}^{\delta(l+1)}g_\xi (x) dx \approx n (\delta g_\xi(\delta l)
|
|
||||||
% \pm \delta \tilde{\varepsilon}),
|
|
||||||
% \]
|
|
||||||
% for any $\delta \leq \delta(\varepsilon, \tilde{\varepsilon})$, since $g_\xi$ is uniformly continuous on its
|
|
||||||
% support by Assumption..
|
|
||||||
% As the distribution of $v$ is continuous as well we get that
|
|
||||||
% $\mathcal{L}(v_k) = \mathcal{L} v| \xi = \delta l) \forall k \in
|
|
||||||
% \kappa : \xi_k \in [\delta l, \delta(l+1))$ for $\delta \leq
|
|
||||||
% \delta(\varepsilon, \tilde{\varepsilon})$. Thus we get with the law of
|
|
||||||
% large numbers
|
|
||||||
% \begin{align*}
|
|
||||||
% &\sum_{k \in \kappa : \xi_k < T} \varphi(\xi_k, v_k)
|
|
||||||
% h_{k,n} \approx\\
|
|
||||||
% &\approx \sum_{\substack{l \in \mathbb{Z} \\ [\delta l, \delta
|
|
||||||
% (l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T
|
|
||||||
% \}]}}\left(\mathbb{E}[\phi(\xi, v)|\xi=\delta l]
|
|
||||||
% \stackrel{\mathbb{P}}{\pm}\right) \delta \left(1 \pm
|
|
||||||
% \frac{\tilde{\varepsilon}}{g_\xi(\delta l)}\right) \pm \varepsilon
|
|
||||||
% \\
|
|
||||||
% &\approx \left(\sum_{\substack{l \in \mathbb{Z} \\ [\delta
|
|
||||||
% l, \delta
|
|
||||||
% (l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T
|
|
||||||
% \}]}}\mathbb{E}[\phi(\xi, v)|\xi=\delta l] \delta
|
|
||||||
% \stackrel{\mathbb{P}}{\pm}\tilde{\tilde{\varepsilon}}
|
|
||||||
% \abs{C_{g_\xi}^u - C_{g_\xi}^l}
|
|
||||||
% \right)\\
|
|
||||||
% &\phantom{\approx}\cdot \left(1 \pm
|
|
||||||
% \frac{\tilde{\varepsilon}}{g_\xi(\delta l)}\right) \pm \varepsilon
|
|
||||||
% \end{align*}
|
|
||||||
\end{Proof}
|
|
||||||
|
|
||||||
% \begin{Lemma}[($L(f_n) \to L(f)$), Heiss, Teichmann, and
|
|
||||||
% Wutte (2019, Lemma A.11)]
|
|
||||||
% For any data $(x_i^{\text{train}}, y_i^{\text{train}}) \in
|
|
||||||
% \mathbb{R}^2, i \in \left\{1,\dots,N\right\}$, let $(f_n)_{n \in
|
|
||||||
% \mathbb{N}}$ be a sequence of functions that converges point-wise
|
|
||||||
% in probability to a function $f : \mathbb{R}\to\mathbb{R}$, then the
|
|
||||||
% loss $L$ of $f_n$ converges is probability to $L(f)$ as $n$ tends to
|
|
||||||
% infinity,
|
|
||||||
% \[
|
|
||||||
% \plimn L(f_n) = L(f).
|
|
||||||
% \]
|
|
||||||
% \proof Vgl. ...
|
|
||||||
% \end{Lemma}
|
|
||||||
|
|
||||||
\begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma A.12)]~\\\noindent
|
|
||||||
\label{proof:lem12}
|
|
||||||
With $\tilde{\lambda} \coloneqq \lambda n g(0)$ Lemma~\ref{lem:s2} follows
|
|
||||||
analogously when considering $\tilde{w}$, $f_g^{*, \lambda}$, and $h_k$
|
|
||||||
instead of $\tilde{w}^+$, $f_{g,+}^{*, \lambda}$, and $\bar{h}_k$.
|
|
||||||
% We start by showing that
|
|
||||||
% \[
|
|
||||||
% \plimn \tilde{\lambda} \norm{\tilde{w}}_2^2 = \lambda g(0)
|
|
||||||
% \left(\int \frac{\left(f_g^{*,\lambda''}\right)^2}{g(x)} dx\right)
|
|
||||||
% \]
|
|
||||||
% With the definitions of $\tilde{w}$, $\tilde{\lambda}$ and
|
|
||||||
% $h$ we have
|
|
||||||
% \begin{align*}
|
|
||||||
% \tilde{\lambda} \norm{\tilde{w}}_2^2
|
|
||||||
% &= \tilde{\lambda} \sum_{k \in
|
|
||||||
% \kappa}\left(f_g^{*,\lambda''}(\xi_k) \frac{h_k
|
|
||||||
% v_k}{\mathbb{E}v^2|\xi = \xi_k]}\right)^2\\
|
|
||||||
% &= \tilde{\lambda} \sum_{k \in
|
|
||||||
% \kappa}\left(\left(f_g^{*,\lambda''}\right)^2(\xi_k) \frac{h_k
|
|
||||||
% v_k^2}{\mathbb{E}v^2|\xi = \xi_k]}\right) h_k\\
|
|
||||||
% & = \lambda g(0) \sum_{k \in
|
|
||||||
% \kappa}\left(\left(f_g^{*,\lambda''}\right)^2(\xi_k)\frac{v_k^2}{g_\xi(\xi_k)\mathbb{E}
|
|
||||||
% [v^2|\xi=\xi_k]}\right)h_k.
|
|
||||||
% \end{align*}
|
|
||||||
% By using Lemma~\ref{lem} with $\phi(x,y) =
|
|
||||||
% \left(f_g^{*,\lambda''}\right)^2(x)\frac{y^2}{g_\xi(\xi)\mathbb{E}[v^2|\xi=y]}$
|
|
||||||
% this converges to
|
|
||||||
% \begin{align*}
|
|
||||||
% &\plimn \tilde{\lambda}\norm{\tilde{w}}_2^2 = \\
|
|
||||||
% &=\lambda
|
|
||||||
% g_\xi(0)\mathbb{E}[v^2|\xi=0]\int_{\supp{g_\xi}}\mathbb{E}\left[
|
|
||||||
% \left(f_g^{*,\lambda''}\right)^2(\xi)\frac{v^2}{
|
|
||||||
% g_\xi(\xi)\mathbb{E}[v^2|\xi=x]^2}\Big{|} \xi = x\right]dx\\
|
|
||||||
% &=\lambda g_\xi(0) \mathbb{E}[v^2|\xi=0] \int_{\supp{g_xi}}
|
|
||||||
% \frac{\left(f_g^{*,\lambda''}\right)^2 (x)}{g_\xi(x)
|
|
||||||
% \mathbb{E}[v^2|\xi=x]} dx \\
|
|
||||||
% &=\lambda g(0) \int_{\supp{g_\xi}} \frac{\left(f_g^{*,\lambda''}\right)^2}{g(x)}dx.
|
|
||||||
% \end{align*}
|
|
||||||
\end{Proof}
|
|
||||||
|
|
||||||
\begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma A.14)]~\\\noindent
|
|
||||||
\label{proof:lem14}
|
|
||||||
Substitute $F_{+-}^{\lambda, g}\left(f_{g,+}^{*,\lambda},
|
|
||||||
f_{g,-}^{*,\lambda}\right)$ with $F^{\lambda,g}\left(f_g^{*,\lambda}\right)$.
|
|
||||||
\end{Proof}
|
|
||||||
% \begin{Lemma}[Heiss, Teichmann, and
|
|
||||||
% Wutte (2019, Lemma A.13)]
|
|
||||||
% Using the notation of Definition .. and ... the following statement
|
|
||||||
% holds:
|
|
||||||
% $\forall \varepsilon \in \mathbb{R}_{>0} : \exists \delta \in
|
|
||||||
% \mathbb{R}_{>0} : \forall \omega \in \Omega : \forall l, l' \in
|
|
||||||
% \left\{1,\dots,N\right\} : \forall n \in \mathbb{N}$
|
|
||||||
% \[
|
|
||||||
% \left(\abs{\xi_l(\omega) - \xi_{l'}(\omega)} < \delta \wedge
|
|
||||||
% \text{sign}(v_l(\omega)) = \text{sign}(v_{l'}(\omega))\right)
|
|
||||||
% \implies \abs{\frac{w_l^{*, \tilde{\lambda}}(\omega)}{v_l(\omega)}
|
|
||||||
% - \frac{w_{l'}^{*, \tilde{\lambda}}(\omega)}{v_{l'}(\omega)}} <
|
|
||||||
% \frac{\varepsilon}{n},
|
|
||||||
% \]
|
|
||||||
% if we assume that $v_k$ is never zero.
|
|
||||||
% \proof given in ..
|
|
||||||
% \end{Lemma}
|
|
||||||
|
|
||||||
% \begin{Lemma}[$\frac{w^{*,\tilde{\lambda}}}{v} \approx
|
|
||||||
% \mathcal{O}(\frac{1}{n})$, Heiss, Teichmann, and
|
|
||||||
% Wutte (2019, Lemma A.14)]
|
|
||||||
% For any $\lambda > 0$ and data $(x_i^{\text{train}},
|
|
||||||
% y_i^{\text{train}}) \in \mathbb{R}^2, i\in
|
|
||||||
% \left\{1,\dots,\right\}$, we have
|
|
||||||
% \[
|
|
||||||
% \forall P \in (0,1) : \exists C \in \mathbb{R}_{>0} : \exists
|
|
||||||
% n_0 \in \mathbb{N} : \forall n > n_0 : \mathbb{P}
|
|
||||||
% \left[\max_{k\in \left\{1,\dots,n\right\}}
|
|
||||||
% \frac{w_k^{*,\tilde{\lambda}}}{v_k} < C
|
|
||||||
% \frac{1}{n}\right] > P
|
|
||||||
% % \max_{k\in \left\{1,\dots,n\right\}}
|
|
||||||
% % \frac{w_k^{*,\tilde{\lambda}}}{v_k} = \plimn
|
|
||||||
% \]
|
|
||||||
% \proof
|
|
||||||
|
|
||||||
|
|
||||||
% Let $k^*_+ \in \argmax_{k\in
|
|
||||||
% \left\{1,\dots,n\right\}}\frac{w^{*,\tilde{\lambda}}}{v_k} : v_k
|
|
||||||
% > 0$ and $k^*_- \in \argmax_{k\in
|
|
||||||
% \left\{1,\dots,n\right\}}\frac{w^{*,\tilde{\lambda}}}{v_k} : v_k
|
|
||||||
% < 0$. W.l.o.g. assume $\frac{w_{k_+^*}^2}{v_{k_+^*}^2} \geq
|
|
||||||
% \frac{w_{k_-^*}^2}{v_{k_-^*}^2}$
|
|
||||||
% \begin{align*}
|
|
||||||
% \frac{F^{\lambda,
|
|
||||||
% g}\left(f^{*,\lambda}_g\right)}{\tilde{\lambda}}
|
|
||||||
% \makebox[2cm][c]{$\stackrel{\mathbb{P}}{\geq}$}
|
|
||||||
% & \frac{1}{2 \tilde{\lambda}}
|
|
||||||
% F_n^{\tilde{\lambda}}\left(\mathcal{RN}^{*,\tilde{\lambda}}\right)
|
|
||||||
% = \frac{1}{2 \tilde{\lambda}}\left[\sum ... + \tilde{\lambda} \norm{w}_2^2\right]
|
|
||||||
% \\
|
|
||||||
% \makebox[2cm][c]{$\geq$}
|
|
||||||
% & \frac{1}{2}\left( \sum_{\substack{k: v_k
|
|
||||||
% > 0 \\\xi_k\in(\xi_{k^*}, \xi_{k^*}
|
|
||||||
% + \delta)}} \left(w_k^{*,\tilde{\lambda}}\right)^2 +
|
|
||||||
% \sum_{\substack{k: v_k < 0 \\\xi_k\in(\xi_{k^*}, \xi_{k^*}
|
|
||||||
% + \delta)}} \left(w_k^{*,\tilde{\lambda}}\right)^2\right) \\
|
|
||||||
% \makebox[2cm][c]{$\overset{\text{Lem. A.6}}{\underset{\delta \text{
|
|
||||||
% small enough}}{\geq}} $}
|
|
||||||
% &
|
|
||||||
% \frac{1}{4}\left(\left(\frac{w_{k_+^*}^{*,\tilde{\lambda}}}
|
|
||||||
% {v_{k_+^*}}\right)^2\sum_{\substack{k:
|
|
||||||
% v_k > 0 \\\xi_k\in(\xi_{k^*}, \xi_{k^*} + \delta)}}v_k^2 +
|
|
||||||
% \left(\frac{w_{k_-^*}^{*,\tilde{\lambda}}}{v_{k_-^*}}\right)^2
|
|
||||||
% \sum_{\substack{k:
|
|
||||||
% v_k < 0 \\\xi_k\in(\xi_{k^*}, \xi_{k^*} +
|
|
||||||
% \delta)}}v_k^2\right)\\
|
|
||||||
% \makebox[2cm][c]{$\stackrel{\mathbb{P}}{\geq}$}
|
|
||||||
% & \frac{1}{8}
|
|
||||||
% \left(\frac{w_{k_+^*}^{*,\tilde{\lambda}}}{v_{k^*}}\right)^2
|
|
||||||
% n \delta g_\xi(\xi_{k_+^*}) \mathbb{P}(v_k
|
|
||||||
% >0)\mathbb{E}[v_k^2|\xi_k = \xi_{k^*_+}]
|
|
||||||
% \end{align*}
|
|
||||||
|
|
||||||
% \end{Lemma}
|
|
||||||
|
|
||||||
\begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma A.15)]~\\\noindent
|
|
||||||
\label{proof:lem15}
|
|
||||||
Consider $\mathcal{RN}^{*,\tilde{\lambda}}$,
|
|
||||||
$f^{w^{*,\tilde{\lambda}}}$, and $\kappa = \left\{1, \dots, n
|
|
||||||
\right\}$ instead of $\mathcal{RN}_+^{*,\tilde{\lambda}}$,
|
|
||||||
$f_+^{w^{*,\tilde{\lambda}}}$, and $\kappa^+$.
|
|
||||||
Assuming w.l.o.g. $max_{k \in
|
|
||||||
\kappa^+}\abs{\frac{w_k^{*,\tilde{\lambda}}}{v_k}} \geq max_{k \in
|
|
||||||
\kappa^-}\abs{\frac{w_k^{*,\tilde{\lambda}}}{v_k}}$
|
|
||||||
Lemma~ref{lem:s3} follows analogously by multiplying (58b) with two.
|
|
||||||
\end{Proof}
|
|
||||||
|
|
||||||
\begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma
|
|
||||||
A.16)]~\\\noindent
|
|
||||||
\label{proof:lem16}
|
|
||||||
As we are considering $F^{\lambda,g}$ instead of
|
|
||||||
$F^{\lambda,g}_{+-}$ we need to substitute $2\lambda g(0)$ with
|
|
||||||
$\lambda g(0)$
|
|
||||||
and thus get
|
|
||||||
\[
|
\[
|
||||||
\left(f^{w^{*,\tilde{\lambda}}}\right)''(x) \approx
|
\max_{k \in \left\{1,\dots,n\right\}} \frac{w^{*,
|
||||||
\frac{w_{l_x}^{*,\tilde{\lambda}}}{v_{l_x}} n g_\xi(x)
|
\tilde{\lambda}}_k}{v_k} = \po_{n\to\infty}
|
||||||
\mathbb{E}\left[v_k^2|\xi_k = x\right] \stackrel{\mathbb{P}}{\pm} \varepsilon_3
|
|
||||||
\]
|
\]
|
||||||
and use this to follow
|
|
||||||
\[
|
|
||||||
\lambda g(0)
|
|
||||||
\int_{\supp(g)}\hspace{-0.15cm}\frac{\left(\left(f^{w^{*,\tilde{\lambda}}}\right)''(x)\right)^2}{g(0)}dx
|
|
||||||
\approx \tilde{\lambda} n
|
|
||||||
\int_{\supp(g)}\left(\frac{w_{l_x}^{*,\tilde{\lambda}}}{v_{l_x}}\right)^2 \hspace{-0.1cm}
|
|
||||||
g_xi(x) \mathbb{E}\left[v_k^2|\xi_k=x\right]dx
|
|
||||||
\]
|
|
||||||
Analogous to the proof of \textcite{heiss2019} we get
|
|
||||||
\begin{align*}
|
|
||||||
\tilde{\lambda} \sum_{k \in \kappa}
|
|
||||||
\left(w_k^{*,\tilde{\lambda}}\right)^2
|
|
||||||
&= \tilde{\lambda} \sum_{k \in \kappa^+}
|
|
||||||
\left(w_k^{*,\tilde{\lambda}}\right)^2 + \tilde{\lambda} \sum_{k \in \kappa^-}
|
|
||||||
\left(w_k^{*,\tilde{\lambda}}\right)^2 \\
|
|
||||||
&\approx \left(\mathbb{P}[v_k <0] + \mathbb{P}[v_k >0]\right)\\
|
|
||||||
&\phantom{=}
|
|
||||||
\int_{\supp(g_xi)}
|
|
||||||
\left(\frac{w_{l_x}^{*,\tilde{\lambda}}}{v_{l_x}}\right)^2
|
|
||||||
g_\xi(x) \mathbb{E}\left[v_k^2|\xi_k = x\right] dx
|
|
||||||
\stackrel{\mathbb{P}}{\pm} \varepsilon_9 \\
|
|
||||||
&= \int_{\supp{g_xi}}
|
|
||||||
\left(\frac{w_{l_x}^{*,\tilde{\lambda}}}{v_{l_x}}\right)^2
|
|
||||||
g_\xi(x) \mathbb{E}\left[v_k^2|\xi_k = x\right] dx
|
|
||||||
\stackrel{\mathbb{P}}{\pm} \varepsilon_9.
|
|
||||||
\end{align*}
|
|
||||||
With these transformations Lemma~\ref{lem:s4} follows analogously.
|
|
||||||
\end{Proof}
|
|
||||||
|
|
||||||
\begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma A.19)]~\\\noindent
|
|
||||||
\label{proof:lem19}
|
|
||||||
The proof works analogously if $F_{+-}^{\lambda,g}$ is substituted
|
|
||||||
by
|
|
||||||
\begin{align*}
|
|
||||||
F_{+-}^{\lambda,g '}(f_+, f_-) =
|
|
||||||
& \sum_{i =
|
|
||||||
1}^N \left(f(x_i^{\text{train}}) -
|
|
||||||
y_i^{\text{train}}\right)^2 \\
|
|
||||||
& + \lambda g(0) \left(\int_{\supp(g)}\frac{\left(f_+''(x)\right)^2}{g(x)}
|
|
||||||
dx + \int_{\supp(g)}\frac{\left(f''_-(x)\right)^2}{g(x)}
|
|
||||||
dx\right).
|
|
||||||
\end{align*}
|
|
||||||
As for $f^n = f_+^n + f_-^n$ such that $\supp(f_+^n) \cap \supp(f_-^n) =
|
|
||||||
\emptyset$ and $h = h_+ + h_-$ such that $\supp(h_+) \cap \supp(h_-) =
|
|
||||||
\emptyset$ it holds
|
|
||||||
\[
|
|
||||||
\plimn F^{\lambda, g}(f^n) = F^{\lambda, g}(h) \implies
|
|
||||||
\plimn F_{+-}^{\lambda,g '}(f_+,f_-) = F_{+-}^{\lambda,g '}(h_+,h_-),
|
|
||||||
\]
|
|
||||||
and all functions can be split in two functions with disjoint support,
|
|
||||||
Lemma~\ref{lem:s7} follows.
|
|
||||||
\end{Proof}
|
|
||||||
\input{Appendix_code.tex}
|
|
||||||
|
|
||||||
|
\end{Lemma}
|
||||||
\end{appendices}
|
\end{appendices}
|
||||||
|
|
||||||
|
|
||||||
|
@ -2,12 +2,15 @@
|
|||||||
series = {arXiv},
|
series = {arXiv},
|
||||||
author = {Heiss, Jakob and Teichmann, Josef and Wutte, Hanna},
|
author = {Heiss, Jakob and Teichmann, Josef and Wutte, Hanna},
|
||||||
publisher = {Cornell University},
|
publisher = {Cornell University},
|
||||||
year = {2019}, copyright = {In Copyright - Non-Commercial Use Permitted},
|
year = {2019},
|
||||||
|
language = {en},
|
||||||
|
copyright = {In Copyright - Non-Commercial Use Permitted},
|
||||||
keywords = {early stopping; implicit regularization; machine learning; neural networks; spline; regression; gradient descent; artificial intelligence},
|
keywords = {early stopping; implicit regularization; machine learning; neural networks; spline; regression; gradient descent; artificial intelligence},
|
||||||
size = {53 p.},
|
size = {53 p.},
|
||||||
|
abstract = {Today, various forms of neural networks are trained to perform approximation tasks in many fields. However, the solutions obtained are not fully understood. Empirical results suggest that typical training algorithms favor regularized solutions.These observations motivate us to analyze properties of the solutions found by gradient descent initialized close to zero, that is frequently employed to perform the training task. As a starting point, we consider one dimensional (shallow) ReLU neural networks in which weights are chosen randomly and only the terminal layer is trained. We show that the resulting solution converges to the smooth spline interpolation of the training data as the number of hidden nodes tends to infinity. Moreover, we derive a correspondence between the early stopped gradient descent and the smoothing spline regression. This might give valuable insight on the properties of the solutions obtained using gradient descent methods in general settings.},
|
||||||
DOI = {10.3929/ethz-b-000402003},
|
DOI = {10.3929/ethz-b-000402003},
|
||||||
title = {How Implicit Regularization of Neural Networks Affects the Learned Function – Part I},
|
title = {How Implicit Regularization of Neural Networks Affects the Learned Function – Part I},
|
||||||
PAGES = {1911.02903},
|
PAGES = {1911.02903}
|
||||||
}
|
}
|
||||||
|
|
||||||
@article{Dropout,
|
@article{Dropout,
|
||||||
@ -18,7 +21,7 @@
|
|||||||
volume = 15,
|
volume = 15,
|
||||||
number = 56,
|
number = 56,
|
||||||
pages = {1929-1958},
|
pages = {1929-1958},
|
||||||
Comment url = {http://jmlr.org/papers/v15/srivastava14a.html}
|
url = {http://jmlr.org/papers/v15/srivastava14a.html}
|
||||||
}
|
}
|
||||||
|
|
||||||
@article{ADADELTA,
|
@article{ADADELTA,
|
||||||
@ -27,10 +30,12 @@
|
|||||||
journal = {CoRR},
|
journal = {CoRR},
|
||||||
volume = {abs/1212.5701},
|
volume = {abs/1212.5701},
|
||||||
year = 2012,
|
year = 2012,
|
||||||
Comment url = {http://arxiv.org/abs/1212.5701},
|
url = {http://arxiv.org/abs/1212.5701},
|
||||||
archivePrefix = {arXiv},
|
archivePrefix = {arXiv},
|
||||||
eprint = {1212.5701},
|
eprint = {1212.5701},
|
||||||
timestamp = {Mon, 13 Aug 2018 16:45:57 +0200},
|
timestamp = {Mon, 13 Aug 2018 16:45:57 +0200},
|
||||||
|
biburl = {https://dblp.org/rec/journals/corr/abs-1212-5701.bib},
|
||||||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
||||||
}
|
}
|
||||||
|
|
||||||
@article{backprop,
|
@article{backprop,
|
||||||
@ -45,31 +50,45 @@ day={01},
|
|||||||
volume={323},
|
volume={323},
|
||||||
number={6088},
|
number={6088},
|
||||||
pages={533-536},
|
pages={533-536},
|
||||||
|
abstract={We describe a new learning procedure, back-propagation, for networks of neurone-like units. The procedure repeatedly adjusts the weights of the connections in the network so as to minimize a measure of the difference between the actual output vector of the net and the desired output vector. As a result of the weight adjustments, internal `hidden' units which are not part of the input or output come to represent important features of the task domain, and the regularities in the task are captured by the interactions of these units. The ability to create useful new features distinguishes back-propagation from earlier, simpler methods such as the perceptron-convergence procedure1.},
|
||||||
issn={1476-4687},
|
issn={1476-4687},
|
||||||
doi={10.1038/323533a0},
|
doi={10.1038/323533a0},
|
||||||
Comment url={https://doi.org/10.1038/323533a0}
|
url={https://doi.org/10.1038/323533a0}
|
||||||
}
|
}
|
||||||
|
|
||||||
@article{MNIST,
|
@article{MNIST,
|
||||||
added-at = {2010-06-28T21:16:30.000+0200},
|
added-at = {2010-06-28T21:16:30.000+0200},
|
||||||
author = {LeCun, Yann and Cortes, Corinna},
|
author = {LeCun, Yann and Cortes, Corinna},
|
||||||
|
biburl = {https://www.bibsonomy.org/bibtex/2935bad99fa1f65e03c25b315aa3c1032/mhwombat},
|
||||||
groups = {public},
|
groups = {public},
|
||||||
howpublished = {http://yann.lecun.com/exdb/mnist/},
|
howpublished = {http://yann.lecun.com/exdb/mnist/},
|
||||||
|
interhash = {21b9d0558bd66279df9452562df6e6f3},
|
||||||
|
intrahash = {935bad99fa1f65e03c25b315aa3c1032},
|
||||||
keywords = {MSc _checked character_recognition mnist network neural},
|
keywords = {MSc _checked character_recognition mnist network neural},
|
||||||
lastchecked = {2016-01-14 14:24:11},
|
lastchecked = {2016-01-14 14:24:11},
|
||||||
timestamp = {2016-07-12T19:25:30.000+0200},
|
timestamp = {2016-07-12T19:25:30.000+0200},
|
||||||
title = {{MNIST} handwritten digit database},
|
title = {{MNIST} handwritten digit database},
|
||||||
Comment url = {http://yann.lecun.com/exdb/mnist/},
|
url = {http://yann.lecun.com/exdb/mnist/},
|
||||||
|
username = {mhwombat},
|
||||||
year = 2010
|
year = 2010
|
||||||
}
|
}
|
||||||
@INPROCEEDINGS{resnet,
|
|
||||||
author={Kaiming {He} and Xiangyu {Zhang} and Shaoqing {Ren} and Jian {Sun}},
|
@article{resnet,
|
||||||
booktitle={2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
|
author = {Kaiming He and
|
||||||
|
Xiangyu Zhang and
|
||||||
|
Shaoqing Ren and
|
||||||
|
Jian Sun},
|
||||||
title = {Deep Residual Learning for Image Recognition},
|
title = {Deep Residual Learning for Image Recognition},
|
||||||
year={2016},
|
journal = {CoRR},
|
||||||
volume={},
|
volume = {abs/1512.03385},
|
||||||
number={},
|
year = 2015,
|
||||||
pages={770-778},}
|
url = {http://arxiv.org/abs/1512.03385},
|
||||||
|
archivePrefix = {arXiv},
|
||||||
|
eprint = {1512.03385},
|
||||||
|
timestamp = {Wed, 17 Apr 2019 17:23:45 +0200},
|
||||||
|
biburl = {https://dblp.org/rec/journals/corr/HeZRS15.bib},
|
||||||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
||||||
|
}
|
||||||
|
|
||||||
@book{PRML,
|
@book{PRML,
|
||||||
title = {Pattern Recognition and Machine Learning},
|
title = {Pattern Recognition and Machine Learning},
|
||||||
@ -98,15 +117,6 @@ numpages = {39}
|
|||||||
}
|
}
|
||||||
|
|
||||||
@article{DBLP:journals/corr/DauphinPGCGB14,
|
@article{DBLP:journals/corr/DauphinPGCGB14,
|
||||||
author = {Dauphin, Yann and Pascanu, Razvan and Gulcehre, Caglar and Cho, Kyunghyun and Ganguli, Surya and Bengio, Y.},
|
|
||||||
year = {2014},
|
|
||||||
month = {06},
|
|
||||||
pages = {},
|
|
||||||
title = {Identifying and attacking the saddle point problem in high-dimensional non-convex optimization},
|
|
||||||
volume = {27},
|
|
||||||
journal = {NIPS}
|
|
||||||
}
|
|
||||||
@article{saddle_point,
|
|
||||||
author = {Yann N. Dauphin and
|
author = {Yann N. Dauphin and
|
||||||
Razvan Pascanu and
|
Razvan Pascanu and
|
||||||
{\c{C}}aglar G{\"{u}}l{\c{c}}ehre and
|
{\c{C}}aglar G{\"{u}}l{\c{c}}ehre and
|
||||||
@ -118,10 +128,11 @@ journal = {NIPS}
|
|||||||
journal = {CoRR},
|
journal = {CoRR},
|
||||||
volume = {abs/1406.2572},
|
volume = {abs/1406.2572},
|
||||||
year = {2014},
|
year = {2014},
|
||||||
Comment url = {http://arxiv.org/abs/1406.2572},
|
url = {http://arxiv.org/abs/1406.2572},
|
||||||
archivePrefix = {arXiv},
|
archivePrefix = {arXiv},
|
||||||
eprint = {1406.2572},
|
eprint = {1406.2572},
|
||||||
timestamp = {Mon, 22 Jul 2019 13:15:46 +0200},
|
timestamp = {Mon, 22 Jul 2019 13:15:46 +0200},
|
||||||
|
biburl = {https://dblp.org/rec/journals/corr/DauphinPGCGB14.bib},
|
||||||
bibsource = {dblp computer science bibliography, https://dblp.org}
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -135,10 +146,12 @@ journal = {NIPS}
|
|||||||
journal = {CoRR},
|
journal = {CoRR},
|
||||||
volume = {abs/1207.0580},
|
volume = {abs/1207.0580},
|
||||||
year = {2012},
|
year = {2012},
|
||||||
Comment url = {http://arxiv.org/abs/1207.0580},
|
url = {http://arxiv.org/abs/1207.0580},
|
||||||
archivePrefix = {arXiv},
|
archivePrefix = {arXiv},
|
||||||
eprint = {1207.0580},
|
eprint = {1207.0580},
|
||||||
timestamp = {Mon, 13 Aug 2018 16:46:10 +0200},
|
timestamp = {Mon, 13 Aug 2018 16:46:10 +0200},
|
||||||
|
biburl = {https://dblp.org/rec/journals/corr/abs-1207-0580.bib},
|
||||||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
||||||
}
|
}
|
||||||
|
|
||||||
@inproceedings{
|
@inproceedings{
|
||||||
@ -147,20 +160,22 @@ title={On the Variance of the Adaptive Learning Rate and Beyond},
|
|||||||
author={Liyuan Liu and Haoming Jiang and Pengcheng He and Weizhu Chen and Xiaodong Liu and Jianfeng Gao and Jiawei Han},
|
author={Liyuan Liu and Haoming Jiang and Pengcheng He and Weizhu Chen and Xiaodong Liu and Jianfeng Gao and Jiawei Han},
|
||||||
booktitle={International Conference on Learning Representations},
|
booktitle={International Conference on Learning Representations},
|
||||||
year={2020},
|
year={2020},
|
||||||
Comment url={https://openreview.net/forum?id=rkgz2aEKDr}
|
url={https://openreview.net/forum?id=rkgz2aEKDr}
|
||||||
}
|
}
|
||||||
|
|
||||||
@inproceedings{ADAM,
|
@inproceedings{ADAM,
|
||||||
author = {Diederik P. Kingma and
|
author = {Diederik P. Kingma and
|
||||||
Jimmy Ba},
|
Jimmy Ba},
|
||||||
@Comment editor = {Yoshua Bengio and
|
editor = {Yoshua Bengio and
|
||||||
@Comment Yann LeCun},
|
Yann LeCun},
|
||||||
title = {Adam: {A} Method for Stochastic Optimization},
|
title = {Adam: {A} Method for Stochastic Optimization},
|
||||||
booktitle = {3rd International Conference on Learning Representations, {ICLR} 2015,
|
booktitle = {3rd International Conference on Learning Representations, {ICLR} 2015,
|
||||||
San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings},
|
San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings},
|
||||||
year = {2015},
|
year = {2015},
|
||||||
Comment url = {http://arxiv.org/abs/1412.6980},
|
url = {http://arxiv.org/abs/1412.6980},
|
||||||
timestamp = {Thu, 25 Jul 2019 14:25:37 +0200},
|
timestamp = {Thu, 25 Jul 2019 14:25:37 +0200},
|
||||||
|
biburl = {https://dblp.org/rec/journals/corr/KingmaB14.bib},
|
||||||
|
bibsource = {dblp computer science bibliography, https://dblp.org}
|
||||||
}
|
}
|
||||||
|
|
||||||
@article{transfer_learning,
|
@article{transfer_learning,
|
||||||
@ -172,11 +187,11 @@ Comment url={https://openreview.net/forum?id=rkgz2aEKDr}
|
|||||||
pages = {020018},
|
pages = {020018},
|
||||||
year = {2017},
|
year = {2017},
|
||||||
doi = {10.1063/1.4992835},
|
doi = {10.1063/1.4992835},
|
||||||
|
URL = {https://aip.scitation.org/doi/abs/10.1063/1.4992835},
|
||||||
eprint = {https://aip.scitation.org/doi/pdf/10.1063/1.4992835}
|
eprint = {https://aip.scitation.org/doi/pdf/10.1063/1.4992835}
|
||||||
}
|
}
|
||||||
|
|
||||||
@article{gan,
|
@article{gan,
|
||||||
author = "Maayan Frid-Adar and Idit Diamant and Eyal Klang and Michal Amitai and Jacob Goldberger and Hayit Greenspan",
|
|
||||||
title = "GAN-based synthetic medical image augmentation for increased CNN performance in liver lesion classification",
|
title = "GAN-based synthetic medical image augmentation for increased CNN performance in liver lesion classification",
|
||||||
journal = "Neurocomputing",
|
journal = "Neurocomputing",
|
||||||
volume = 321,
|
volume = 321,
|
||||||
@ -184,112 +199,6 @@ Comment url={https://openreview.net/forum?id=rkgz2aEKDr}
|
|||||||
year = 2018,
|
year = 2018,
|
||||||
issn = "0925-2312",
|
issn = "0925-2312",
|
||||||
doi = "https://doi.org/10.1016/j.neucom.2018.09.013",
|
doi = "https://doi.org/10.1016/j.neucom.2018.09.013",
|
||||||
Comment url = "http://www.sciencedirect.com/science/article/pii/S0925231218310749",
|
url = "http://www.sciencedirect.com/science/article/pii/S0925231218310749",
|
||||||
}
|
author = "Maayan Frid-Adar and Idit Diamant and Eyal Klang and Michal Amitai and Jacob Goldberger and Hayit Greenspan"
|
||||||
|
|
||||||
@online{fashionMNIST,
|
|
||||||
author = {Han Xiao and Kashif Rasul and Roland Vollgraf},
|
|
||||||
title = {Fashion-MNIST: a Novel Image Dataset for Benchmarking Machine Learning Algorithms},
|
|
||||||
date = {2017-08-28},
|
|
||||||
year = {2017},
|
|
||||||
eprintclass = {cs.LG},
|
|
||||||
eprinttype = {arXiv},
|
|
||||||
eprint = {cs.LG/1708.07747},
|
|
||||||
}
|
|
||||||
|
|
||||||
@inproceedings{10.1145/3206098.3206111,
|
|
||||||
author = {Kowsari, Kamran and Heidarysafa, Mojtaba and Brown, Donald E. and Meimandi, Kiana Jafari and Barnes, Laura E.},
|
|
||||||
title = {RMDL: Random Multimodel Deep Learning for Classification},
|
|
||||||
year = {2018},
|
|
||||||
isbn = {9781450363549},
|
|
||||||
publisher = {Association for Computing Machinery},
|
|
||||||
address = {New York, NY, USA},
|
|
||||||
Comment url = {https://doi.org/10.1145/3206098.3206111},
|
|
||||||
doi = {10.1145/3206098.3206111},
|
|
||||||
booktitle = {Proceedings of the 2nd International Conference on Information System and Data Mining},
|
|
||||||
pages = {19–28},
|
|
||||||
numpages = {10},
|
|
||||||
keywords = {Supervised Learning, Deep Learning, Data Mining, Text Classification, Deep Neural Networks, Image Classification},
|
|
||||||
location = {Lakeland, FL, USA},
|
|
||||||
series = {ICISDM '18}
|
|
||||||
}
|
|
||||||
|
|
||||||
@article{random_erasing,
|
|
||||||
author = {Zhun Zhong and
|
|
||||||
Liang Zheng and
|
|
||||||
Guoliang Kang and
|
|
||||||
Shaozi Li and
|
|
||||||
Yi Yang},
|
|
||||||
title = {Random Erasing Data Augmentation},
|
|
||||||
journal = {CoRR},
|
|
||||||
volume = {abs/1708.04896},
|
|
||||||
year = 2017,
|
|
||||||
Comment url = {http://arxiv.org/abs/1708.04896},
|
|
||||||
archivePrefix = {arXiv},
|
|
||||||
eprint = {1708.04896},
|
|
||||||
timestamp = {Mon, 13 Aug 2018 16:47:52 +0200},
|
|
||||||
}
|
|
||||||
|
|
||||||
@misc{draw_convnet,
|
|
||||||
title = {Python script for illustrating Convolutional Neural Network (ConvNet)},
|
|
||||||
howpublished = {\url{https://github.com/gwding/draw_convnet}},
|
|
||||||
note = {Accessed: 30.08.2020},
|
|
||||||
author = {Gavin Weiguang Ding},
|
|
||||||
year = 2018
|
|
||||||
}
|
|
||||||
|
|
||||||
@book{Haykin,
|
|
||||||
added-at = {2009-06-26T15:25:19.000+0200},
|
|
||||||
author = {Haykin, Simon},
|
|
||||||
note = {2nd edition},
|
|
||||||
publisher = {Prentice Hall},
|
|
||||||
title = {Neural Networks: {A} Comprehensive Foundation},
|
|
||||||
year = 1999
|
|
||||||
}
|
|
||||||
|
|
||||||
@book{Goodfellow,
|
|
||||||
title={Deep Learning},
|
|
||||||
author={Ian Goodfellow and Yoshua Bengio and Aaron Courville},
|
|
||||||
publisher={MIT Press},
|
|
||||||
note={\url{http://www.deeplearningbook.org}},
|
|
||||||
year=2016
|
|
||||||
}
|
|
||||||
|
|
||||||
@article{ruder,
|
|
||||||
author = {Sebastian Ruder},
|
|
||||||
title = {An overview of gradient descent optimization algorithms},
|
|
||||||
journal = {CoRR},
|
|
||||||
volume = {abs/1609.04747},
|
|
||||||
year = {2016},
|
|
||||||
url = {http://arxiv.org/abs/1609.04747},
|
|
||||||
archivePrefix = {arXiv},
|
|
||||||
eprint = {1609.04747},
|
|
||||||
timestamp = {Mon, 13 Aug 2018 16:48:10 +0200},
|
|
||||||
biburl = {https://dblp.org/rec/journals/corr/Ruder16.bib},
|
|
||||||
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|
||||||
}
|
|
||||||
|
|
||||||
@incollection{goodfellow_gan,
|
|
||||||
title = {Generative Adversarial Nets},
|
|
||||||
author = {Goodfellow, Ian and Pouget-Abadie, Jean and Mirza, Mehdi and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and Courville, Aaron and Bengio, Yoshua},
|
|
||||||
booktitle = {Advances in Neural Information Processing Systems 27},
|
|
||||||
pages = {2672--2680},
|
|
||||||
year = {2014},
|
|
||||||
publisher = {Curran Associates, Inc.},
|
|
||||||
url = {http://papers.nips.cc/paper/5423-generative-adversarial-nets.pdf}
|
|
||||||
}
|
|
||||||
|
|
||||||
@book{hastie01statisticallearning,
|
|
||||||
added-at = {2008-05-16T16:17:42.000+0200},
|
|
||||||
address = {New York, NY, USA},
|
|
||||||
author = {Hastie, Trevor and Tibshirani, Robert and Friedman, Jerome},
|
|
||||||
biburl = {https://www.bibsonomy.org/bibtex/2f58afc5c9793fcc8ad8389824e57984c/sb3000},
|
|
||||||
interhash = {d585aea274f2b9b228fc1629bc273644},
|
|
||||||
intrahash = {f58afc5c9793fcc8ad8389824e57984c},
|
|
||||||
keywords = {ml statistics},
|
|
||||||
publisher = {Springer New York Inc.},
|
|
||||||
series = {Springer Series in Statistics},
|
|
||||||
timestamp = {2008-05-16T16:17:43.000+0200},
|
|
||||||
title = {The Elements of Statistical Learning},
|
|
||||||
year = 2001
|
|
||||||
}
|
}
|
File diff suppressed because it is too large
Load Diff
@ -1,74 +1,6 @@
|
|||||||
\section{Introduction}
|
\section{Introduction}
|
||||||
|
|
||||||
Neural networks have become a widely used model for a plethora of
|
|
||||||
applications.
|
|
||||||
They are an attractive choice as they are able to
|
|
||||||
model complex data with relatively little additional input to the
|
|
||||||
training data needed.
|
|
||||||
Additionally, as the price of parallelized computing
|
|
||||||
power in the form of graphics processing unit has decreased drastically over the last
|
|
||||||
years, it has become far more accessible to train and use large
|
|
||||||
neural networks.
|
|
||||||
Furthermore, highly optimized and parallelized frameworks for tensor
|
|
||||||
operations have been developed.
|
|
||||||
With these frameworks, such as TensorFlow and PyTorch, building neural
|
|
||||||
networks has become a much more straightforward process.
|
|
||||||
% Furthermore, with the development of highly optimized and
|
|
||||||
% parallelized implementations of mathematical operations needed for
|
|
||||||
% neural networks, such as TensorFlow or PyTorch, building neural network
|
|
||||||
% models has become a much more straightforward process.
|
|
||||||
% For example the flagship consumer GPU GeForce RTX 3080 of NVIDIA's current
|
|
||||||
% generation has 5.888 CUDS cores at a ... price of 799 Euro compared
|
|
||||||
% to the last generations flagship GeForce RTX 2080 Ti with 4352 CUDA
|
|
||||||
% cores at a ... price of 1259 Euro. These CUDA cores are computing
|
|
||||||
% cores specialized for tensor operations, which are necessary in
|
|
||||||
% fitting and using neural networks.
|
|
||||||
|
|
||||||
In this thesis we want to get an understanding of the behavior of neural %
|
|
||||||
networks and
|
|
||||||
how we can use them for problems with a complex relationship between
|
|
||||||
in- and output.
|
|
||||||
In Section 2 we introduce the mathematical construct of neural
|
|
||||||
networks and how to fit them to training data.
|
|
||||||
|
|
||||||
To gain some insight about the learned function,
|
|
||||||
we examine a simple class of neural networks that contain only one
|
|
||||||
hidden layer.
|
|
||||||
In Section~\ref{sec:shallownn} we proof a relation between such networks and
|
|
||||||
functions that minimize the distance to training data
|
|
||||||
with respect to its second derivative.
|
|
||||||
|
|
||||||
An interesting application of neural networks is the task of
|
|
||||||
classifying images.
|
|
||||||
However, for such complex problems the number of parameters in fully
|
|
||||||
connected neural networks can exceed what is
|
|
||||||
feasible for training.
|
|
||||||
In Section~\ref{sec:cnn} we explore the addition of convolution to neural
|
|
||||||
networks to reduce the number of parameters.
|
|
||||||
|
|
||||||
As these large networks are commonly trained using gradient decent
|
|
||||||
algorithms we compare the performance of different algorithms based on
|
|
||||||
gradient descent in Section~4.4.
|
|
||||||
% and
|
|
||||||
% show that it is beneficial to only use small subsets of the training
|
|
||||||
% data in each iteration rather than using the whole data set to update
|
|
||||||
% the parameters.
|
|
||||||
Most statistical models especially these with large amounts of
|
|
||||||
trainable parameters can struggle with overfitting the data.
|
|
||||||
In Section 4.5 we examine the impact of two measures designed to combat
|
|
||||||
overfitting.
|
|
||||||
|
|
||||||
In some applications such as working with medical images the data
|
|
||||||
available for training can be scarce, which results in the networks
|
|
||||||
being prone to overfitting.
|
|
||||||
As these are interesting applications of neural networks we examine
|
|
||||||
the benefit of the measures to combat overfitting for
|
|
||||||
scenarios with limited amounts of training data.
|
|
||||||
|
|
||||||
% As in some applications such as medical imaging one might be limited
|
|
||||||
% to very small training data we study the impact of two measures in
|
|
||||||
% improving the accuracy in such a case by trying to ... the model from
|
|
||||||
% overfitting the data.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,26 +1,19 @@
|
|||||||
|
|
||||||
\section{Introduction to Neural Networks}
|
\section{Introduction to Neural Networks}
|
||||||
|
|
||||||
This chapter is based on \textcite[Chapter~6]{Goodfellow} and \textcite{Haykin}.
|
Neural Networks (NN) are a mathematical construct inspired by the
|
||||||
|
... of brains in mammals. It consists of an array of neurons that
|
||||||
Neural Networks are a mathematical construct inspired by the
|
receive inputs and compute a accumulated output. These neurons are
|
||||||
structure of brains in mammals. They consist of an array of neurons that
|
arranged in layers, with one input and output layer and a arbirtary
|
||||||
receive inputs and compute an accumulated output. These neurons are
|
amount of hidden layer between them.
|
||||||
arranged in layers, with one input and output layer
|
The amount of neurons in the in- and output layers correspond to the
|
||||||
and an arbitrary
|
|
||||||
amount of hidden layers between them.
|
|
||||||
The number of neurons in the in- and output layers correspond to the
|
|
||||||
desired dimensions of in- and outputs of the model.
|
desired dimensions of in- and outputs of the model.
|
||||||
|
In conventional neural networks the information is passed ... from the
|
||||||
In conventional neural networks, the information is fed forward from the
|
input layer towards the output layer hence they are often called feed
|
||||||
input layer towards the output layer, hence they are often called
|
forward networks. Each neuron in a layer has the outputs of all
|
||||||
feed forward networks. Each neuron in a layer has the outputs of all
|
neurons in the preceding layer as input (fully connected). A
|
||||||
neurons in the preceding layer as input and computes an accumulated
|
illustration of a example neuronal network is given in
|
||||||
value from these (fully connected).
|
Figure~\ref{fig:nn} and one of a neuron in Figure~\ref{fig:neuron}
|
||||||
% An illustration of an example neural network is given in
|
|
||||||
% Figure~\ref{fig:nn} and one of a neuron in Figure~\ref{fig:neuron}.
|
|
||||||
Illustrations of a neural network and the structure of a neuron are given
|
|
||||||
in Figure~\ref{fig:nn} and Figure~\ref{fig:neuron}.
|
|
||||||
|
|
||||||
\tikzset{%
|
\tikzset{%
|
||||||
every neuron/.style={
|
every neuron/.style={
|
||||||
@ -46,16 +39,16 @@ in Figure~\ref{fig:nn} and Figure~\ref{fig:neuron}.
|
|||||||
{\arrow[scale=1.5,>=stealth]{>}}},postaction={decorate}}}
|
{\arrow[scale=1.5,>=stealth]{>}}},postaction={decorate}}}
|
||||||
|
|
||||||
\foreach \m/\l [count=\y] in {1,2,3,missing,4}
|
\foreach \m/\l [count=\y] in {1,2,3,missing,4}
|
||||||
\node [every neuron/.try, neuron \m/.try] (input-\m) at (0,2.55-\y*0.85) {};
|
\node [every neuron/.try, neuron \m/.try] (input-\m) at (0,2.5-\y) {};
|
||||||
|
|
||||||
\foreach \m [count=\y] in {1,missing,2}
|
\foreach \m [count=\y] in {1,missing,2}
|
||||||
\node [every neuron/.try, neuron \m/.try ] (hidden1-\m) at (2.5,2.5-\y*1.25) {};
|
\node [every neuron/.try, neuron \m/.try ] (hidden1-\m) at (2,2-\y*1.25) {};
|
||||||
|
|
||||||
\foreach \m [count=\y] in {1,missing,2}
|
\foreach \m [count=\y] in {1,missing,2}
|
||||||
\node [every neuron/.try, neuron \m/.try ] (hidden2-\m) at (5,2.5-\y*1.25) {};
|
\node [every neuron/.try, neuron \m/.try ] (hidden2-\m) at (5,2-\y*1.25) {};
|
||||||
|
|
||||||
\foreach \m [count=\y] in {1,missing,2}
|
\foreach \m [count=\y] in {1,missing,2}
|
||||||
\node [every neuron/.try, neuron \m/.try ] (output-\m) at (7,1.5-\y*0.75) {};
|
\node [every neuron/.try, neuron \m/.try ] (output-\m) at (7,1.5-\y) {};
|
||||||
|
|
||||||
\foreach \l [count=\i] in {1,2,3,d_i}
|
\foreach \l [count=\i] in {1,2,3,d_i}
|
||||||
\draw [myptr] (input-\i)+(-1,0) -- (input-\i)
|
\draw [myptr] (input-\i)+(-1,0) -- (input-\i)
|
||||||
@ -91,7 +84,7 @@ in Figure~\ref{fig:nn} and Figure~\ref{fig:neuron}.
|
|||||||
\node[fill=white,scale=1.5,inner xsep=10pt,inner ysep=10mm] at ($(hidden1-1)!.5!(hidden2-2)$) {$\dots$};
|
\node[fill=white,scale=1.5,inner xsep=10pt,inner ysep=10mm] at ($(hidden1-1)!.5!(hidden2-2)$) {$\dots$};
|
||||||
|
|
||||||
\end{tikzpicture}}%}
|
\end{tikzpicture}}%}
|
||||||
\caption[Illustration of a Neural Network]{Illustration of a neural network with $d_i$ inputs, $l$
|
\caption[Illustration of a neural network]{Illustration of a neural network with $d_i$ inputs, $l$
|
||||||
hidden layers with $n_{\cdot}$ nodes in each layer, as well as
|
hidden layers with $n_{\cdot}$ nodes in each layer, as well as
|
||||||
$d_o$ outputs.
|
$d_o$ outputs.
|
||||||
}
|
}
|
||||||
@ -100,62 +93,102 @@ in Figure~\ref{fig:nn} and Figure~\ref{fig:neuron}.
|
|||||||
|
|
||||||
\subsection{Nonlinearity of Neural Networks}
|
\subsection{Nonlinearity of Neural Networks}
|
||||||
|
|
||||||
The arguably most important feature of neural networks which sets them
|
The arguably most important feature of neural networks that sets them
|
||||||
apart from linear models is the activation function implemented in the
|
apart from linear models is the activation function implemented in the
|
||||||
neurons. As illustrated in Figure~\ref{fig:neuron} on the weighted sum of the
|
neurons. As seen in Figure~\ref{fig:neuron} on the weighted sum of the
|
||||||
inputs an activation function $\sigma$ is applied resulting in the
|
inputs a activation function $\sigma$ is applied in order to obtain
|
||||||
output of the $k$-th neuron in a layer $l$ with $m$ nodes in layer $l-1$
|
the output resulting in the output of the $k$-th. neuron in a layer
|
||||||
being given by
|
being given by
|
||||||
\begin{align*}
|
\[
|
||||||
o_{l,k} = \sigma\left(b_{l,k} + \sum_{j=1}^{m} w_{l,k,j}
|
o_k = \sigma\left(b_k + \sum_{j=1}^m w_{k,j} i_j\right)
|
||||||
o_{l-1,j}\right),
|
\]
|
||||||
\end{align*}
|
for weights $w_{k,j}$ and biases $b_k$.
|
||||||
for weights $w_{l,k,j}$ and biases $b_{l,k}$. For a network with $L$
|
|
||||||
hidden layers and inputs $o_{0}$ the final outputs of the network
|
|
||||||
are thus given by $o_{L+1}$.
|
|
||||||
The activation function is usually chosen nonlinear (a linear one
|
The activation function is usually chosen nonlinear (a linear one
|
||||||
would result in the entire network collapsing into a linear model) which
|
would result in the entire model collapsing into a linear one\todo{beweis?}) which
|
||||||
allows it to better model data where the relation of in- and output is
|
allows it to better model data where the relation of in- and output is
|
||||||
of nonlinear nature.
|
of nonlinear nature.
|
||||||
There are two types of activation functions, saturating and
|
There are two types of activation functions, saturating and not
|
||||||
non-saturating ones. Popular examples for the former are sigmoid
|
saturating ones. Popular examples for the former are sigmoid
|
||||||
functions where most commonly the standard logistic function or tangens
|
functions where most commonly the standard logisitc function or tangen
|
||||||
hyperbolicus are used
|
hyperbolicus are used
|
||||||
as they have easy to compute derivatives which is desirable for
|
as they have easy to compute derivatives which is desirable for gradient
|
||||||
gradient-based optimization algorithms. The standard logistic function
|
based optimization algorithms. The standard logistic function (often
|
||||||
(often simply referred to as sigmoid function) is given by
|
referred to simply as sigmoid function) is given by
|
||||||
\[
|
\[
|
||||||
f(x) = \frac{1}{1+e^{-x}}
|
f(x) = \frac{1}{1+e^{-x}}
|
||||||
\]
|
\]
|
||||||
and has a realm of $[0,1]$. The tangens hyperbolicus is given by
|
and has a realm of $[0,1]$. Its usage as an activation function is
|
||||||
|
motivated by modeling neurons which
|
||||||
|
are close to deactive until a certain threshold where they grow in
|
||||||
|
intensity until they are fully
|
||||||
|
active, which is similar to the behavior of neurons in
|
||||||
|
brains\todo{besser schreiben}. The tangens hyperbolicus is given by
|
||||||
\[
|
\[
|
||||||
\tanh(x) = \frac{2}{e^{2x}+1}
|
\tanh(x) = \frac{2}{e^{2x}+1}
|
||||||
\]
|
\]
|
||||||
and has a realm of $[-1,1]$. Both functions result in neurons that are
|
|
||||||
close to inactive until a certain threshold is reached where they grow
|
|
||||||
until saturation.
|
|
||||||
The downside of these saturating activation functions is, that their
|
|
||||||
derivatives are close to zero on most of their realm, only assuming
|
|
||||||
larger values in proximity to zero.
|
|
||||||
This can hinder the progress of gradient-based methods.
|
|
||||||
|
|
||||||
The non-saturating activation functions commonly used are the rectified
|
The downside of these saturating activation functions is that given
|
||||||
linear unit (ReLU) or the leaky ReLU. The ReLU is given by
|
their saturating nature their derivatives are close to zero for large or small
|
||||||
\begin{equation}
|
input values which can slow or hinder the progress of gradient based methods.
|
||||||
|
|
||||||
|
The nonsaturating activation functions commonly used are the recified
|
||||||
|
linear using (ReLU) or the leaky RelU. The ReLU is given by
|
||||||
|
\[
|
||||||
r(x) = \max\left\{0, x\right\}.
|
r(x) = \max\left\{0, x\right\}.
|
||||||
\label{eq:relu}
|
\]
|
||||||
\end{equation}
|
|
||||||
This has the benefit of having a constant derivative for values larger
|
This has the benefit of having a constant derivative for values larger
|
||||||
than zero. However, the derivative being zero for negative values has
|
than zero. However the derivative being zero has the same downside for
|
||||||
the same downside for
|
fitting the model with gradient based methods. The leaky ReLU is
|
||||||
fitting the model with gradient-based methods. The leaky ReLU is
|
|
||||||
an attempt to counteract this problem by assigning a small constant
|
an attempt to counteract this problem by assigning a small constant
|
||||||
derivative to all values smaller than zero and for a scalar $\alpha$ is given by
|
derivative to all values smaller than zero and for scalar $\alpha$ is given by
|
||||||
\[
|
\[
|
||||||
l(x) = \max\left\{0, x\right\} + \alpha \min \left\{0, x\right\}.
|
l(x) = \max\left\{0, x\right\} + \alpha \min \left\{0, x\right\}.
|
||||||
\]
|
\]
|
||||||
In Figure~\ref{fig:activation} visualizations of these functions are given.
|
In order to illustrate these functions plots of them are given in Figure~\ref{fig:activation}.
|
||||||
%In order to illustrate these functions plots of them are given in Figure~\ref{fig:activation}.
|
|
||||||
|
\begin{figure}
|
||||||
|
\centering
|
||||||
|
\begin{subfigure}{.45\linewidth}
|
||||||
|
\centering
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[enlargelimits=false, ymin=0, ymax = 1, width=\textwidth]
|
||||||
|
\addplot [domain=-5:5, samples=101,unbounded coords=jump]{1/(1+exp(-x)};
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\caption{\titlecap{standard logistic function}}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{.45\linewidth}
|
||||||
|
\centering
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[enlargelimits=false, width=\textwidth]
|
||||||
|
\addplot[domain=-5:5, samples=100]{tanh(x)};
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\caption{\titlecap{tangens hyperbolicus}}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{.45\linewidth}
|
||||||
|
\centering
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[enlargelimits=false, width=\textwidth,
|
||||||
|
ytick={0,2,4},yticklabels={\hphantom{4.}0,2,4}, ymin=-1]
|
||||||
|
\addplot[domain=-5:5, samples=100]{max(0,x)};
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\caption{ReLU}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{.45\linewidth}
|
||||||
|
\centering
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[enlargelimits=false, width=\textwidth, ymin=-1,
|
||||||
|
ytick={0,2,4},yticklabels={$\hphantom{-5.}0$,2,4}]
|
||||||
|
\addplot[domain=-5:5, samples=100]{max(0,x)+ 0.1*min(0,x)};
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\caption{Leaky ReLU, $\alpha = 0.1$}
|
||||||
|
\end{subfigure}
|
||||||
|
\caption{Plots of the activation functions}
|
||||||
|
\label{fig:activation}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
@ -241,62 +274,17 @@ In Figure~\ref{fig:activation} visualizations of these functions are given.
|
|||||||
% \draw [->] (hidden-\i) -- (output-\j);
|
% \draw [->] (hidden-\i) -- (output-\j);
|
||||||
|
|
||||||
\end{tikzpicture}
|
\end{tikzpicture}
|
||||||
\caption[Structure of a Single Neuron]{Structure of a single neuron.}
|
\caption{Structure of a single neuron}
|
||||||
\label{fig:neuron}
|
\label{fig:neuron}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
\begin{figure}
|
|
||||||
\centering
|
|
||||||
\begin{subfigure}{.45\linewidth}
|
|
||||||
\centering
|
|
||||||
\begin{tikzpicture}
|
|
||||||
\begin{axis}[enlargelimits=false, ymin=0, ymax = 1, width=\textwidth]
|
|
||||||
\addplot [domain=-5:5, samples=101,unbounded coords=jump]{1/(1+exp(-x)};
|
|
||||||
\end{axis}
|
|
||||||
\end{tikzpicture}
|
|
||||||
\caption{Standard Logistic Function}
|
|
||||||
\end{subfigure}
|
|
||||||
\begin{subfigure}{.45\linewidth}
|
|
||||||
\centering
|
|
||||||
\begin{tikzpicture}
|
|
||||||
\begin{axis}[enlargelimits=false, width=\textwidth]
|
|
||||||
\addplot[domain=-5:5, samples=100]{tanh(x)};
|
|
||||||
\end{axis}
|
|
||||||
\end{tikzpicture}
|
|
||||||
\caption{Tangens Hyperbolicus}
|
|
||||||
\end{subfigure}
|
|
||||||
\begin{subfigure}{.45\linewidth}
|
|
||||||
\centering
|
|
||||||
\begin{tikzpicture}
|
|
||||||
\begin{axis}[enlargelimits=false, width=\textwidth,
|
|
||||||
ytick={0,2,4},yticklabels={\hphantom{4.}0,2,4}, ymin=-1]
|
|
||||||
\addplot[domain=-5:5, samples=100]{max(0,x)};
|
|
||||||
\end{axis}
|
|
||||||
\end{tikzpicture}
|
|
||||||
\caption{ReLU}
|
|
||||||
\end{subfigure}
|
|
||||||
\begin{subfigure}{.45\linewidth}
|
|
||||||
\centering
|
|
||||||
\begin{tikzpicture}
|
|
||||||
\begin{axis}[enlargelimits=false, width=\textwidth, ymin=-1,
|
|
||||||
ytick={0,2,4},yticklabels={$\hphantom{-5.}0$,2,4}]
|
|
||||||
\addplot[domain=-5:5, samples=100]{max(0,x)+ 0.1*min(0,x)};
|
|
||||||
\end{axis}
|
|
||||||
\end{tikzpicture}
|
|
||||||
\caption{Leaky ReLU, $\alpha = 0.1$}
|
|
||||||
\end{subfigure}
|
|
||||||
\caption[Plots of the Activation Functions]{Plots of the activation functions.}
|
|
||||||
\label{fig:activation}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
\clearpage
|
\clearpage
|
||||||
\subsection{Training Neural Networks}
|
\subsection{Training Neural Networks}
|
||||||
|
|
||||||
As neural networks are parametric models we need to fit the
|
As neural networks are a PARAMETRIC model we need to fit it to input
|
||||||
parameters to the input
|
data in order to get meaningfull OUTPUT from the network in order to
|
||||||
data to get meaningful predictions from the network. In order
|
do this we first need to discuss how we interpret the output of the
|
||||||
to accomplish this we need to discuss how we interpret the output of the
|
neural network.
|
||||||
neural network and assess the quality of predictions.
|
|
||||||
|
|
||||||
% After a neural network model is designed, like most statistical models
|
% After a neural network model is designed, like most statistical models
|
||||||
% it has to be fit to the data. In the machine learning context this is
|
% it has to be fit to the data. In the machine learning context this is
|
||||||
@ -314,20 +302,20 @@ neural network and assess the quality of predictions.
|
|||||||
% data-point in fitting the model, where usually some distance between
|
% data-point in fitting the model, where usually some distance between
|
||||||
% the model output and the labels is minimized.
|
% the model output and the labels is minimized.
|
||||||
|
|
||||||
\subsubsection{Nonlinearity in the Last Layer}
|
\subsubsection{\titlecap{nonliniarity in last layer}}
|
||||||
|
|
||||||
Given the nature of the neural net, the outputs of the last layer are
|
Given the nature of the neural net the output of the last layer are
|
||||||
real numbers. For regression tasks, this is desirable, for
|
real numbers. For regression tasks this is desirable, for
|
||||||
classification problems however some transformations might be
|
classification problems however some transformations might be
|
||||||
necessary.
|
necessary.
|
||||||
As the goal in the latter is to predict a certain class or classes for
|
As the goal in the latter is to predict a certain class or classes for
|
||||||
an object, the output needs to be of a form that allows this
|
an object the output needs to be of a form that allows this
|
||||||
interpretation.
|
interpretation.
|
||||||
Commonly the nodes in the output layer each correspond to a class and
|
Commonly the nodes in the output layer each correspond to a class and
|
||||||
the class chosen as prediction is the one with the highest value at
|
the class chosen as prediction is the one with the highest value at
|
||||||
the corresponding output node.
|
the corresponding output node.
|
||||||
This can be modeled as a transformation of the output
|
This corresponds to a transformation of the output
|
||||||
vector $o \in \mathbb{R}^n$ into a one-hot vector
|
vector $o$ into a one-hot vector
|
||||||
\[
|
\[
|
||||||
\text{pred}_i =
|
\text{pred}_i =
|
||||||
\begin{cases}
|
\begin{cases}
|
||||||
@ -335,20 +323,19 @@ vector $o \in \mathbb{R}^n$ into a one-hot vector
|
|||||||
0,& \text{else}.
|
0,& \text{else}.
|
||||||
\end{cases}
|
\end{cases}
|
||||||
\]
|
\]
|
||||||
This however makes training the model with gradient-based methods impossible, as the derivative of
|
This however makes training the model with gradient based methods impossible, as the derivative of
|
||||||
the transformation is either zero or undefined.
|
the transformation is either zero or undefined.
|
||||||
An continuous transformation that is close to argmax is given by
|
A continuous transformation that is close to the argmax one is given by
|
||||||
softmax
|
softmax
|
||||||
\begin{equation}
|
\[
|
||||||
\text{softmax}(o)_i = \frac{e^{o_i}}{\sum_j e^{o_j}}.
|
\text{softmax}(o)_i = \frac{e^{o_i}}{\sum_j e^{o_j}}.
|
||||||
\label{eq:softmax}
|
\]
|
||||||
\end{equation}
|
|
||||||
The softmax function transforms the realm of the output to the interval $[0,1]$
|
The softmax function transforms the realm of the output to the interval $[0,1]$
|
||||||
and the individual values sum to one, thus the output can be interpreted as
|
and the individual values sum to one, thus the output can be interpreted as
|
||||||
a probability for each class conditioned on the input.
|
a probability for each class given the input.
|
||||||
Additionally, to being differentiable this allows to evaluate the
|
Additionally to being differentiable this allows for evaluataing the
|
||||||
certainty of a prediction, rather than just whether it is accurate.
|
cetainiy of a prediction, rather than just whether it is accurate.
|
||||||
A similar effect is obtained when for a binary or two-class problem the
|
A similar effect is obtained when for a binary or two class problem the
|
||||||
sigmoid function
|
sigmoid function
|
||||||
\[
|
\[
|
||||||
f(x) = \frac{1}{1 + e^{-x}}
|
f(x) = \frac{1}{1 + e^{-x}}
|
||||||
@ -356,6 +343,7 @@ sigmoid function
|
|||||||
is used and the output $f(x)$ is interpreted as the probability for
|
is used and the output $f(x)$ is interpreted as the probability for
|
||||||
the first class and $1-f(x)$ for the second class.
|
the first class and $1-f(x)$ for the second class.
|
||||||
|
|
||||||
|
\todo{vielleicht additiv invarianz}
|
||||||
% Another property that makes softmax attractive is the invariance to addition
|
% Another property that makes softmax attractive is the invariance to addition
|
||||||
% \[
|
% \[
|
||||||
% \text{sofmax}(o) = \text{softmax}(o + c
|
% \text{sofmax}(o) = \text{softmax}(o + c
|
||||||
@ -391,62 +379,57 @@ the first class and $1-f(x)$ for the second class.
|
|||||||
% way to circumvent this problem is to normalize the output vector is
|
% way to circumvent this problem is to normalize the output vector is
|
||||||
% such a way that the entries add up to one, this allows for the
|
% such a way that the entries add up to one, this allows for the
|
||||||
% interpretation of probabilities assigned to each class.
|
% interpretation of probabilities assigned to each class.
|
||||||
\clearpage
|
|
||||||
\subsubsection{Error Measurement}
|
\subsubsection{Error Measurement}
|
||||||
|
|
||||||
In order to train the network we need to be able to assess the quality
|
In order to make assessment about the quality of a network $\mathcal{NN}$ and train
|
||||||
of predictions using some error measure.
|
it we need to discuss how we measure error. The choice of the error
|
||||||
The choice of the error
|
function is highly dependent on the type of the problem. For
|
||||||
function is highly dependent on the type of problem. For
|
regression problems a commonly used error measure is the mean squared
|
||||||
regression problems, a commonly used error measure is the mean squared
|
|
||||||
error (MSE)
|
error (MSE)
|
||||||
which for a function $f$ and data $(x_i,y_i), i \in \left\{1,\dots,n\right\}$ is given by
|
which for a function $f$ and data $(x_i,y_i), i=1,\dots,n$ is given by
|
||||||
\[
|
\[
|
||||||
MSE(f) = \frac{1}{n} \sum_i^n \left(f(x_i) - y_i\right)^2.
|
MSE(f) = \frac{1}{n} \sum_i^n \left(f(x_i) - y_i\right)^2.
|
||||||
\]
|
\]
|
||||||
However, depending on the problem error measures with different
|
However depending on the problem error measures with differnt
|
||||||
properties might be needed. For example in some contexts it is
|
properties might be needed, for example in some contexts it is
|
||||||
required to consider a proportional rather than absolute error.
|
required to consider a proportional rather than absolute error as is
|
||||||
|
common in time series models. \todo{komisch}
|
||||||
|
|
||||||
As discussed above the output of a neural network for a classification
|
As discussed above the output of a neural network for a classification
|
||||||
problem can be interpreted as a probability distribution over the classes
|
problem can be interpreted as a probability distribution over the classes
|
||||||
conditioned on the input. In this case, it is desirable to
|
conditioned on the input. In this case it is desirable to
|
||||||
use error functions designed to compare probability distributions. A
|
use error functions designed to compare probability distributions. A
|
||||||
widespread error function for this use case is the categorical cross entropy (\textcite{PRML}),
|
widespread error function for this use case is the cross entropy (\textcite{PRML}),
|
||||||
which for two discrete distributions $p, q$ with the same realm $C$ is given by
|
which for two discrete distributions $p, q$ with the same realm $C$ is given by
|
||||||
\[
|
\[
|
||||||
H(p, q) = \sum_{c \in C} p(c) \ln\left(\frac{1}{q(c)}\right),
|
H(p, q) = \sum_{c \in C} p(c) \ln\left(\frac{1}{q(c)}\right),
|
||||||
\]
|
\]
|
||||||
comparing $q$ to a target density $p$.
|
which compares a $q$ to a true underlying distribution $p$.
|
||||||
For a data set $(x_i,y_i), i \in \left\{1,\dots,n\right\}$ where each $y_{i,c}$
|
For a data set $(x_i,y_i), i = 1,\dots,n$ where each $y_{i,c}$
|
||||||
corresponds to the probability of class $c$ given $x_i$ and a predictor
|
corresponds to the probability of class $c$ given $x_i$ and predictor
|
||||||
$f$ we get the loss function
|
$f$ we get the loss function
|
||||||
\begin{equation}
|
\[
|
||||||
CE(f) = \sum_{i=1}^n H(y_i, f(x_i)).
|
Bla = \sum_{i=1}^n H(y_i, f(x_i)).
|
||||||
\label{eq:cross_entropy}
|
\]
|
||||||
\end{equation}
|
|
||||||
|
|
||||||
% \todo{Den satz einbauen}
|
-Maximum Likelihood
|
||||||
% -Maximum Likelihood
|
-Ableitung mit softmax pseudo linear -> fast improvemtns possible
|
||||||
% -Ableitung mit softmax pseudo linear -> fast improvemtns possible
|
|
||||||
|
|
||||||
\subsubsection{Gradient Descent Algorithm}
|
\subsubsection{Gradient Descent Algorithm}
|
||||||
|
|
||||||
Trying to find the optimal parameter for fitting the model to the data
|
Trying to find the optimal parameter for fitting the model to the data
|
||||||
can be a hard problem. Given the complex nature of a neural network
|
can be a hard problem. Given the complex nature of a neural network
|
||||||
with many layers and neurons, it is hard to predict the impact of
|
with many layers and neurons it is hard to predict the impact of
|
||||||
single parameters on the accuracy of the output.
|
single parameters on the accuracy of the output.
|
||||||
Thus using numeric optimization algorithms is the only
|
Thus applying numeric optimization algorithms is the only
|
||||||
feasible way to fit the model.
|
feasible way to fit the model. A attractive algorithm for training
|
||||||
|
neural networks is gradient descent where each parameter $\theta_i$ is
|
||||||
An attractive algorithm for training
|
iterative changed according to the gradient regarding the error
|
||||||
neural networks is gradient descent. Here all parameters are
|
measure and a step size $\gamma$. For this all parameters are
|
||||||
initialized with certain values (often random or close to zero) and
|
initialized (often random or close to zero) and then iteratively
|
||||||
then iteratively updated. The updates are made in the direction of the
|
updated until a certain stopping criterion is hit, mostly either being a fixed
|
||||||
gradient regarding the error with a step size $\gamma$ until a
|
number of iterations or a desired upper limit for the error measure.
|
||||||
specified stopping criterion is hit.
|
|
||||||
% This mostly either being a fixed
|
|
||||||
% number of iterations or a desired upper limit for the error measure.
|
|
||||||
% For a function $f_\theta$ with parameters $\theta \in \mathbb{R}^n$
|
% For a function $f_\theta$ with parameters $\theta \in \mathbb{R}^n$
|
||||||
% and a error function $L(f_\theta)$ the gradient descent algorithm is
|
% and a error function $L(f_\theta)$ the gradient descent algorithm is
|
||||||
% given in \ref{alg:gd}.
|
% given in \ref{alg:gd}.
|
||||||
@ -469,21 +452,16 @@ specified stopping criterion is hit.
|
|||||||
|
|
||||||
The algorithm for gradient descent is given in
|
The algorithm for gradient descent is given in
|
||||||
Algorithm~\ref{alg:gd}. In the context of fitting a neural network
|
Algorithm~\ref{alg:gd}. In the context of fitting a neural network
|
||||||
$f_\theta$ corresponds to an error measurement of a neural network
|
$f_\theta$ corresponds to the error measurement of the network
|
||||||
$\mathcal{NN}_{\theta}$ where $\theta$ is a vector
|
$L\left(\mathcal{NN}_{\theta}\right)$ where $\theta$ is a vector
|
||||||
containing all the weights and biases of the network.
|
containing all the weights and biases of the network.
|
||||||
As can be seen, this requires computing the derivative of the network
|
As ca be seen this requires computing the derivative of the network
|
||||||
with regard to each variable. With the number of variables getting
|
with regard to each variable. With the number of variables getting
|
||||||
large in networks with multiple layers of high neuron count naively
|
large in networks with multiple layers of high neuron count naively
|
||||||
computing the derivatives can get quite memory and computational
|
computing these can get quite memory and computational expensive. But
|
||||||
expensive.
|
by using the chain rule and exploiting the layered structure we can
|
||||||
By using the chain rule and exploiting the layered structure we can
|
compute the gradient much more efficiently by using backpropagation
|
||||||
compute the parameter update much more efficiently. This practice is
|
introduced by \textcite{backprop}.
|
||||||
called backpropagation and was introduced for use in neural networks by
|
|
||||||
\textcite{backprop}. The algorithm
|
|
||||||
for one data point is given in Algorithm~\ref{alg:backprop}, but for all error
|
|
||||||
functions that are sums of errors for single data points (MSE, cross
|
|
||||||
entropy) backpropagation works analogously for larger training data.
|
|
||||||
|
|
||||||
% \subsubsection{Backpropagation}
|
% \subsubsection{Backpropagation}
|
||||||
|
|
||||||
@ -497,34 +475,10 @@ entropy) backpropagation works analogously for larger training data.
|
|||||||
% for each layer from output layer towards the first layer while only
|
% for each layer from output layer towards the first layer while only
|
||||||
% needing to ....
|
% needing to ....
|
||||||
|
|
||||||
\begin{algorithm}[H]
|
\[
|
||||||
\SetAlgoLined
|
\frac{\partial L(...)}{}
|
||||||
\KwInput{Inputs $o_0$, neural network
|
\]
|
||||||
with $L$ hidden layers, weights $w$, and biases $b$ for $n_l$
|
\todo{Backprop richtig aufschreiben}
|
||||||
nodes as well as an activation function $\sigma_l$ in layer $l$
|
|
||||||
and loss function $\tilde{L}$.}
|
|
||||||
Forward Propagation:
|
|
||||||
\For{$l \in \left\{1, \dots, L+1\right\}$}{
|
|
||||||
Compute values for layer $l$:
|
|
||||||
$z_{l,k} \leftarrow b_{l,k} + w_{l,k}^{\mathrm{T}} o_{l-1}, k \in \left\{1,\dots,n_l\right\}$\;
|
|
||||||
$o_{l,k} \leftarrow \sigma_l(z_{l,k}), k \in \left\{1,\dots,n_l\right\}$ \;
|
|
||||||
}
|
|
||||||
Calculate derivative for output layer: $\delta_{L+1, k} \leftarrow
|
|
||||||
\frac{\partial\tilde{L}(o_{L+1})}{\partial o_{L+1,k}} \sigma_{L+1}'(z_{L+1,k})$\;
|
|
||||||
Back propagate the error:
|
|
||||||
\For{$l \in \left\{L,\dots,1\right\}$}{
|
|
||||||
$\delta_{l,k} \leftarrow w_{l+1,k}^{\mathrm{T}} \delta_{l+1}
|
|
||||||
\sigma_{l}'(z_{l,k}), k=1,\dots,n_k$
|
|
||||||
}
|
|
||||||
Calculate gradients:
|
|
||||||
$\frac{\partial\tilde{L}}{\partial w_{l,k,j}} =
|
|
||||||
\delta_{l,k}o_{l-1,j}$,
|
|
||||||
$\frac{\partial\tilde{L}}{\partial b_{l,k}} =
|
|
||||||
\delta_{l,k}$\;
|
|
||||||
|
|
||||||
\caption{Backpropagation for one data point}
|
|
||||||
\label{alg:backprop}
|
|
||||||
\end{algorithm}
|
|
||||||
|
|
||||||
%%% Local Variables:
|
%%% Local Variables:
|
||||||
%%% mode: latex
|
%%% mode: latex
|
||||||
|
3726
TeX/main.bcf
3726
TeX/main.bcf
File diff suppressed because it is too large
Load Diff
@ -1,6 +0,0 @@
|
|||||||
\boolfalse {citerequest}\boolfalse {citetracker}\boolfalse {pagetracker}\boolfalse {backtracker}\relax
|
|
||||||
\babel@toc {english}{}
|
|
||||||
\defcounter {refsection}{0}\relax
|
|
||||||
\contentsline {table}{\numberline {4.1}{\ignorespaces Values of Test Accuracies for Models Trained on Subsets of MNIST Handwritten Digits}}{41}{table.4.1}%
|
|
||||||
\defcounter {refsection}{0}\relax
|
|
||||||
\contentsline {table}{\numberline {4.2}{\ignorespaces Values of Test Accuracies for Models Trained on Subsets of Fashion MNIST}}{41}{table.4.2}%
|
|
25
TeX/main.out
25
TeX/main.out
@ -1,25 +0,0 @@
|
|||||||
\BOOKMARK [1][-]{section.1}{Introduction}{}% 1
|
|
||||||
\BOOKMARK [1][-]{section.2}{Introduction to Neural Networks}{}% 2
|
|
||||||
\BOOKMARK [2][-]{subsection.2.1}{Nonlinearity of Neural Networks}{section.2}% 3
|
|
||||||
\BOOKMARK [2][-]{subsection.2.2}{Training Neural Networks}{section.2}% 4
|
|
||||||
\BOOKMARK [3][-]{subsubsection.2.2.1}{Nonlinearity in the Last Layer}{subsection.2.2}% 5
|
|
||||||
\BOOKMARK [3][-]{subsubsection.2.2.2}{Error Measurement}{subsection.2.2}% 6
|
|
||||||
\BOOKMARK [3][-]{subsubsection.2.2.3}{Gradient Descent Algorithm}{subsection.2.2}% 7
|
|
||||||
\BOOKMARK [1][-]{section.3}{Shallow Neural Networks}{}% 8
|
|
||||||
\BOOKMARK [2][-]{subsection.3.1}{Convergence Behavior of One-Dimensional Randomized Shallow Neural Networks}{section.3}% 9
|
|
||||||
\BOOKMARK [2][-]{subsection.3.2}{Simulations}{section.3}% 10
|
|
||||||
\BOOKMARK [1][-]{section.4}{Application of Neural Networks to Higher Complexity Problems}{}% 11
|
|
||||||
\BOOKMARK [2][-]{subsection.4.1}{Convolution}{section.4}% 12
|
|
||||||
\BOOKMARK [2][-]{subsection.4.2}{Convolutional Neural Networks}{section.4}% 13
|
|
||||||
\BOOKMARK [2][-]{subsection.4.3}{Stochastic Training Algorithms}{section.4}% 14
|
|
||||||
\BOOKMARK [2][-]{subsection.4.4}{Modified Stochastic Gradient Descent}{section.4}% 15
|
|
||||||
\BOOKMARK [2][-]{subsection.4.5}{Combating Overfitting}{section.4}% 16
|
|
||||||
\BOOKMARK [3][-]{subsubsection.4.5.1}{Dropout}{subsection.4.5}% 17
|
|
||||||
\BOOKMARK [3][-]{subsubsection.4.5.2}{Manipulation of Input Data}{subsection.4.5}% 18
|
|
||||||
\BOOKMARK [3][-]{subsubsection.4.5.3}{Comparisons}{subsection.4.5}% 19
|
|
||||||
\BOOKMARK [3][-]{subsubsection.4.5.4}{Effectiveness for Small Training Sets}{subsection.4.5}% 20
|
|
||||||
\BOOKMARK [1][-]{section.5}{Summary and Outlook}{}% 21
|
|
||||||
\BOOKMARK [1][-]{section*.27}{Appendices}{}% 22
|
|
||||||
\BOOKMARK [1][-]{Appendix.1.A}{Notes on Proofs of Lemmata in Section 3.1}{}% 23
|
|
||||||
\BOOKMARK [1][-]{Appendix.1.B}{Implementations}{}% 24
|
|
||||||
\BOOKMARK [1][-]{Appendix.1.C}{Additional Comparisons}{}% 25
|
|
164
TeX/main.tex
164
TeX/main.tex
@ -1,4 +1,4 @@
|
|||||||
\documentclass[a4paper, 12pt]{article}
|
\documentclass[a4paper, 12pt, draft=true]{article}
|
||||||
|
|
||||||
%\usepackage[margin=1in]{geometry}
|
%\usepackage[margin=1in]{geometry}
|
||||||
%\geometry{a4paper, left=30mm, right=40mm,top=25mm, bottom=20mm}
|
%\geometry{a4paper, left=30mm, right=40mm,top=25mm, bottom=20mm}
|
||||||
@ -6,7 +6,6 @@
|
|||||||
\usepackage[english]{babel}
|
\usepackage[english]{babel}
|
||||||
\usepackage[utf8]{inputenc}
|
\usepackage[utf8]{inputenc}
|
||||||
\usepackage[T1]{fontenc}
|
\usepackage[T1]{fontenc}
|
||||||
\usepackage{hyperref}
|
|
||||||
\usepackage{textcomp}
|
\usepackage{textcomp}
|
||||||
%\usepackage{libertine}
|
%\usepackage{libertine}
|
||||||
\usepackage{amsmath}
|
\usepackage{amsmath}
|
||||||
@ -35,18 +34,10 @@
|
|||||||
\usepackage{todonotes}
|
\usepackage{todonotes}
|
||||||
\usepackage{lipsum}
|
\usepackage{lipsum}
|
||||||
\usepackage[ruled,vlined]{algorithm2e}
|
\usepackage[ruled,vlined]{algorithm2e}
|
||||||
%\usepackage{showframe}
|
\usepackage{showframe}
|
||||||
\usepackage[protrusion=true, expansion=true, kerning=true, letterspace
|
\usepackage[protrusion=true, expansion=true, kerning=true, letterspace
|
||||||
= 150]{microtype}
|
= 150]{microtype}
|
||||||
%\usepackage{titlecaps}
|
\usepackage{titlecaps}
|
||||||
\usepackage{afterpage}
|
|
||||||
\usepackage{xcolor}
|
|
||||||
\usepackage{chngcntr}
|
|
||||||
%\usepackage{hyperref}
|
|
||||||
% \hypersetup{
|
|
||||||
% linktoc=all, %set to all if you want both sections and subsections linked
|
|
||||||
% }
|
|
||||||
\allowdisplaybreaks
|
|
||||||
|
|
||||||
\captionsetup[sub]{justification=centering}
|
\captionsetup[sub]{justification=centering}
|
||||||
|
|
||||||
@ -61,125 +52,9 @@
|
|||||||
\pgfplotsset{compat = 1.16}
|
\pgfplotsset{compat = 1.16}
|
||||||
\usepackage[export]{adjustbox}
|
\usepackage[export]{adjustbox}
|
||||||
|
|
||||||
\definecolor{maroon}{cmyk}{0, 0.87, 0.68, 0.32}
|
|
||||||
\definecolor{halfgray}{gray}{0.55}
|
|
||||||
\definecolor{ipython_frame}{RGB}{207, 207, 207}
|
|
||||||
\definecolor{ipython_bg}{RGB}{247, 247, 247}
|
|
||||||
\definecolor{ipython_red}{RGB}{186, 33, 33}
|
|
||||||
\definecolor{ipython_green}{RGB}{0, 128, 0}
|
|
||||||
\definecolor{ipython_cyan}{RGB}{64, 128, 128}
|
|
||||||
\definecolor{ipython_purple}{RGB}{110, 64, 130}
|
|
||||||
|
|
||||||
\usepackage{listings}
|
|
||||||
\usepackage{float}
|
|
||||||
|
|
||||||
\newfloat{lstfloat}{htbp}{lop}
|
\usepackage[style=authoryear, backend=bibtex]{biblatex}
|
||||||
\floatname{lstfloat}{Listing}
|
|
||||||
\def\lstfloatautorefname{Listing}
|
|
||||||
|
|
||||||
\lstset{
|
|
||||||
breaklines=true,
|
|
||||||
%
|
|
||||||
extendedchars=true,
|
|
||||||
literate=
|
|
||||||
{á}{{\'a}}1 {é}{{\'e}}1 {í}{{\'i}}1 {ó}{{\'o}}1 {ú}{{\'u}}1
|
|
||||||
{Á}{{\'A}}1 {É}{{\'E}}1 {Í}{{\'I}}1 {Ó}{{\'O}}1 {Ú}{{\'U}}1
|
|
||||||
{à}{{\`a}}1 {è}{{\`e}}1 {ì}{{\`i}}1 {ò}{{\`o}}1 {ù}{{\`u}}1
|
|
||||||
{À}{{\`A}}1 {È}{{\'E}}1 {Ì}{{\`I}}1 {Ò}{{\`O}}1 {Ù}{{\`U}}1
|
|
||||||
{ä}{{\"a}}1 {ë}{{\"e}}1 {ï}{{\"i}}1 {ö}{{\"o}}1 {ü}{{\"u}}1
|
|
||||||
{Ä}{{\"A}}1 {Ë}{{\"E}}1 {Ï}{{\"I}}1 {Ö}{{\"O}}1 {Ü}{{\"U}}1
|
|
||||||
{â}{{\^a}}1 {ê}{{\^e}}1 {î}{{\^i}}1 {ô}{{\^o}}1 {û}{{\^u}}1
|
|
||||||
{Â}{{\^A}}1 {Ê}{{\^E}}1 {Î}{{\^I}}1 {Ô}{{\^O}}1 {Û}{{\^U}}1
|
|
||||||
{œ}{{\oe}}1 {Œ}{{\OE}}1 {æ}{{\ae}}1 {Æ}{{\AE}}1 {ß}{{\ss}}1
|
|
||||||
{ç}{{\c c}}1 {Ç}{{\c C}}1 {ø}{{\o}}1 {å}{{\r a}}1 {Å}{{\r A}}1
|
|
||||||
{€}{{\EUR}}1 {£}{{\pounds}}1
|
|
||||||
}
|
|
||||||
|
|
||||||
%%
|
|
||||||
%% Python definition (c) 1998 Michael Weber
|
|
||||||
%% Additional definitions (2013) Alexis Dimitriadis
|
|
||||||
%% modified by me (should not have empty lines)
|
|
||||||
%%
|
|
||||||
\lstdefinelanguage{iPython}{
|
|
||||||
morekeywords={access,and,break,class,continue,def,del,elif,else,except,exec,finally,for,from,global,if,import,
|
|
||||||
in,is,lambda,not,or,pass,print,raise,return,try,while},%
|
|
||||||
%
|
|
||||||
% Built-ins
|
|
||||||
morekeywords=[2]{abs,all,any,basestring,bin,bool,bytearray,callable,chr,classmethod,cmp,compile,complex,delattr,dict,dir,divmod,enumerate,eval,execfile,file,filter,float,format,frozenset,getattr,globals,hasattr,hash,help,hex,id,input,int,isinstance,issubclass,iter,len,list,locals,long,map,max,memoryview,min,next,object,oct,open,ord,pow,property,range,raw_input,reduce,reload,repr,reversed,round,set,setattr,slice,sorted,staticmethod,str,sum,super,tuple,type,unichr,unicode,vars,xrange,zip,apply,buffer,coerce,intern,val},%
|
|
||||||
%
|
|
||||||
sensitive=true,%
|
|
||||||
morecomment=[l]\#,%
|
|
||||||
morestring=[b]',%
|
|
||||||
morestring=[b]",%
|
|
||||||
%
|
|
||||||
morestring=[s]{'''}{'''},% used for documentation text (mulitiline strings)
|
|
||||||
morestring=[s]{"""}{"""},% added by Philipp Matthias Hahn
|
|
||||||
%
|
|
||||||
morestring=[s]{r'}{'},% `raw' strings
|
|
||||||
morestring=[s]{r"}{"},%
|
|
||||||
morestring=[s]{r'''}{'''},%
|
|
||||||
morestring=[s]{r"""}{"""},%
|
|
||||||
morestring=[s]{u'}{'},% unicode strings
|
|
||||||
morestring=[s]{u"}{"},%
|
|
||||||
morestring=[s]{u'''}{'''},%
|
|
||||||
morestring=[s]{u"""}{"""},%
|
|
||||||
%
|
|
||||||
% {replace}{replacement}{lenght of replace}
|
|
||||||
% *{-}{-}{1} will not replace in comments and so on
|
|
||||||
literate=
|
|
||||||
{á}{{\'a}}1 {é}{{\'e}}1 {í}{{\'i}}1 {ó}{{\'o}}1 {ú}{{\'u}}1
|
|
||||||
{Á}{{\'A}}1 {É}{{\'E}}1 {Í}{{\'I}}1 {Ó}{{\'O}}1 {Ú}{{\'U}}1
|
|
||||||
{à}{{\`a}}1 {è}{{\`e}}1 {ì}{{\`i}}1 {ò}{{\`o}}1 {ù}{{\`u}}1
|
|
||||||
{À}{{\`A}}1 {È}{{\'E}}1 {Ì}{{\`I}}1 {Ò}{{\`O}}1 {Ù}{{\`U}}1
|
|
||||||
{ä}{{\"a}}1 {ë}{{\"e}}1 {ï}{{\"i}}1 {ö}{{\"o}}1 {ü}{{\"u}}1
|
|
||||||
{Ä}{{\"A}}1 {Ë}{{\"E}}1 {Ï}{{\"I}}1 {Ö}{{\"O}}1 {Ü}{{\"U}}1
|
|
||||||
{â}{{\^a}}1 {ê}{{\^e}}1 {î}{{\^i}}1 {ô}{{\^o}}1 {û}{{\^u}}1
|
|
||||||
{Â}{{\^A}}1 {Ê}{{\^E}}1 {Î}{{\^I}}1 {Ô}{{\^O}}1 {Û}{{\^U}}1
|
|
||||||
{œ}{{\oe}}1 {Œ}{{\OE}}1 {æ}{{\ae}}1 {Æ}{{\AE}}1 {ß}{{\ss}}1
|
|
||||||
{ç}{{\c c}}1 {Ç}{{\c C}}1 {ø}{{\o}}1 {å}{{\r a}}1 {Å}{{\r A}}1
|
|
||||||
{€}{{\EUR}}1 {£}{{\pounds}}1
|
|
||||||
%
|
|
||||||
{^}{{{\color{ipython_purple}\^{}}}}1
|
|
||||||
{=}{{{\color{ipython_purple}=}}}1
|
|
||||||
%
|
|
||||||
{+}{{{\color{ipython_purple}+}}}1
|
|
||||||
{*}{{{\color{ipython_purple}$^\ast$}}}1
|
|
||||||
{/}{{{\color{ipython_purple}/}}}1
|
|
||||||
%
|
|
||||||
{+=}{{{+=}}}1
|
|
||||||
{-=}{{{-=}}}1
|
|
||||||
{*=}{{{$^\ast$=}}}1
|
|
||||||
{/=}{{{/=}}}1,
|
|
||||||
literate=
|
|
||||||
*{-}{{{\color{ipython_purple}-}}}1
|
|
||||||
{?}{{{\color{ipython_purple}?}}}1,
|
|
||||||
%
|
|
||||||
identifierstyle=\color{black}\ttfamily,
|
|
||||||
commentstyle=\color{ipython_red}\ttfamily,
|
|
||||||
stringstyle=\color{ipython_red}\ttfamily,
|
|
||||||
keepspaces=true,
|
|
||||||
showspaces=false,
|
|
||||||
showstringspaces=false,
|
|
||||||
%
|
|
||||||
rulecolor=\color{ipython_frame},
|
|
||||||
frame=single,
|
|
||||||
frameround={t}{t}{t}{t},
|
|
||||||
framexleftmargin=6mm,
|
|
||||||
numbers=left,
|
|
||||||
numberstyle=\tiny\color{halfgray},
|
|
||||||
%
|
|
||||||
%
|
|
||||||
backgroundcolor=\color{ipython_bg},
|
|
||||||
% extendedchars=true,
|
|
||||||
basicstyle=\scriptsize,
|
|
||||||
keywordstyle=\color{ipython_green}\ttfamily,
|
|
||||||
morekeywords = [3]{Int, Double},
|
|
||||||
morekeywords = [2]{foldRight, case},
|
|
||||||
keywordstyle = [3]{\color{ipython_purple}\ttfamily},
|
|
||||||
keywordstyle = [2]{\color{ipython_cyan}\ttfamily},
|
|
||||||
}
|
|
||||||
|
|
||||||
\usepackage[authordate, backend=bibtex, firstinits = true]{biblatex-chicago}
|
|
||||||
\urlstyle{same}
|
\urlstyle{same}
|
||||||
\bibliography{bibliograpy.bib}
|
\bibliography{bibliograpy.bib}
|
||||||
\numberwithin{figure}{section}
|
\numberwithin{figure}{section}
|
||||||
@ -198,13 +73,11 @@
|
|||||||
\newtheorem{Algorithm}[Theorem]{Algorithm}
|
\newtheorem{Algorithm}[Theorem]{Algorithm}
|
||||||
\newtheorem{Example}[Theorem]{Example}
|
\newtheorem{Example}[Theorem]{Example}
|
||||||
\newtheorem{Assumption}[Theorem]{Assumption}
|
\newtheorem{Assumption}[Theorem]{Assumption}
|
||||||
\newtheorem{Proof}[Theorem]{Proof}
|
|
||||||
|
|
||||||
|
|
||||||
\DeclareMathOperator*{\plim}{\mathbb{P}\text{-}\lim}
|
\DeclareMathOperator*{\plim}{\mathbb{P}\text{-}\lim}
|
||||||
\DeclareMathOperator{\supp}{supp}
|
\DeclareMathOperator{\supp}{supp}
|
||||||
\DeclareMathOperator*{\argmin}{arg\,min}
|
\DeclareMathOperator*{\argmin}{arg\,min}
|
||||||
\DeclareMathOperator*{\argmax}{arg\,max}
|
|
||||||
\DeclareMathOperator*{\po}{\mathbb{P}\text{-}\mathcal{O}}
|
\DeclareMathOperator*{\po}{\mathbb{P}\text{-}\mathcal{O}}
|
||||||
\DeclareMathOperator*{\equals}{=}
|
\DeclareMathOperator*{\equals}{=}
|
||||||
\begin{document}
|
\begin{document}
|
||||||
@ -230,36 +103,19 @@
|
|||||||
%\textbf{Seminar Machine--Learning: Unsupervised %Learning} \newline
|
%\textbf{Seminar Machine--Learning: Unsupervised %Learning} \newline
|
||||||
%Institut für Mathematik der Universität %Augsburg\\
|
%Institut für Mathematik der Universität %Augsburg\\
|
||||||
%Lehrstuhl für Rechnerorientierte Statistik und %Datenanalyse\\
|
%Lehrstuhl für Rechnerorientierte Statistik und %Datenanalyse\\
|
||||||
|
\smallskip\hrule\bigskip
|
||||||
|
|
||||||
\begin{center}
|
\begin{center}
|
||||||
\huge \textbf{Master Thesis}\\
|
{\huge{Electricity Price Forecasting based on Regression Tree Models}}
|
||||||
\vspace{1cm}
|
|
||||||
\Large \textbf{University Augsburg\\Department of Mathematics\\Chair of
|
|
||||||
Computational Statistics and Data Analysis}
|
|
||||||
\vspace{1cm}
|
|
||||||
\end{center}
|
\end{center}
|
||||||
|
\hrulefill
|
||||||
\begin{figure}[h]
|
|
||||||
\centering
|
|
||||||
\includegraphics[scale=1.3]{Figures/Uni_Aug_Siegel_32Grad_schwarz.png}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
\begin{center}
|
|
||||||
\vspace{1cm}
|
|
||||||
\huge \textbf{Neural Networks and their Application on Higher Complexity Problems}\\
|
|
||||||
\vspace{1cm}
|
|
||||||
\huge \textbf{Tim Tobias Arndt}\\
|
|
||||||
\vspace{1cm}
|
|
||||||
\Large \textbf{October 2020}
|
|
||||||
\end{center}
|
|
||||||
|
|
||||||
\pagenumbering{gobble}
|
\pagenumbering{gobble}
|
||||||
\clearpage
|
\newpage
|
||||||
%\setcounter{tocdepth}{4}
|
%\setcounter{tocdepth}{4}
|
||||||
\tableofcontents
|
\tableofcontents
|
||||||
\clearpage
|
\clearpage
|
||||||
\listoffigures
|
\listoffigures
|
||||||
\listoftables
|
\listoftodos
|
||||||
\newpage
|
\newpage
|
||||||
\pagenumbering{arabic}
|
\pagenumbering{arabic}
|
||||||
% Introduction
|
% Introduction
|
||||||
@ -281,7 +137,7 @@
|
|||||||
\input{further_applications_of_nn}
|
\input{further_applications_of_nn}
|
||||||
|
|
||||||
\newpage
|
\newpage
|
||||||
\DeclareNameAlias{sortname}{last-first}
|
|
||||||
\printbibliography
|
\printbibliography
|
||||||
|
|
||||||
% Appendix A
|
% Appendix A
|
||||||
|
514
TeX/theo_3_8.tex
514
TeX/theo_3_8.tex
@ -5,18 +5,17 @@
|
|||||||
%%% TeX-master: "main"
|
%%% TeX-master: "main"
|
||||||
%%% End:
|
%%% End:
|
||||||
\section{Shallow Neural Networks}
|
\section{Shallow Neural Networks}
|
||||||
\label{sec:shallownn}
|
|
||||||
% In order to get a some understanding of the behavior of neural
|
% In order to get a some understanding of the behavior of neural
|
||||||
% networks we study a simplified class of networks called shallow neural
|
% networks we study a simplified class of networks called shallow neural
|
||||||
% networks in this chapter.
|
% networks in this chapter.
|
||||||
% We consider shallow neural networks consist of a single
|
% We consider shallow neural networks consist of a single
|
||||||
% hidden layer and
|
% hidden layer and
|
||||||
To get some understanding of the behavior of neural networks
|
In order to get some understanding of the behavior of neural networks
|
||||||
we examine a simple class of networks in this chapter. We consider
|
we examine a simple class of networks in this chapter. We consider
|
||||||
networks that contain only one hidden layer and have a single output
|
networks that contain only one hidden layer and have a single output
|
||||||
node and call these networks shallow neural networks.
|
node. We call these networks shallow neural networks.
|
||||||
\begin{Definition}[Shallow neural network, Heiss, Teichmann, and
|
\begin{Definition}[Shallow neural network]
|
||||||
Wutte (2019, Definition 1.4)]
|
|
||||||
For a input dimension $d$ and a Lipschitz continuous activation function $\sigma:
|
For a input dimension $d$ and a Lipschitz continuous activation function $\sigma:
|
||||||
\mathbb{R} \to \mathbb{R}$ we define a shallow neural network with
|
\mathbb{R} \to \mathbb{R}$ we define a shallow neural network with
|
||||||
$n$ hidden nodes as
|
$n$ hidden nodes as
|
||||||
@ -85,8 +84,8 @@ with
|
|||||||
% \label{fig:shallowNN}
|
% \label{fig:shallowNN}
|
||||||
% \end{figure}
|
% \end{figure}
|
||||||
|
|
||||||
As neural networks with a large number of nodes have a large amount of
|
As neural networks with a large amount of nodes have a large amount of
|
||||||
tunable parameters it can often fit data quite well. If
|
parameters that can be tuned it can often fit the data quite well. If
|
||||||
a ReLU activation function
|
a ReLU activation function
|
||||||
\[
|
\[
|
||||||
\sigma(x) \coloneqq \max{(0, x)}
|
\sigma(x) \coloneqq \max{(0, x)}
|
||||||
@ -106,9 +105,9 @@ on MSE will perfectly fit the data.
|
|||||||
minimizing squared error loss.
|
minimizing squared error loss.
|
||||||
\proof
|
\proof
|
||||||
W.l.o.g. all values $x_{ij}^{\text{train}} \in [0,1],~\forall i \in
|
W.l.o.g. all values $x_{ij}^{\text{train}} \in [0,1],~\forall i \in
|
||||||
\left\{1,\dots, t\right\}, j \in \left\{1,\dots,d\right\}$. Now we
|
\left\{1,\dots\right\}, j \in \left\{1,\dots,d\right\}$. Now we
|
||||||
chose $v^*$ such that the vector-product with $x_i^{\text{train}}$
|
chose $v^*$ in order to calculate a unique value for all
|
||||||
results is distinct values for all $i \in \left\{1,\dots,t\right\}$:
|
$x_i^{\text{train}}$:
|
||||||
\[
|
\[
|
||||||
v^*_{k,j} = v^*_{j} = 10^{j-1}, ~ \forall k \in \left\{1,\dots,n\right\}.
|
v^*_{k,j} = v^*_{j} = 10^{j-1}, ~ \forall k \in \left\{1,\dots,n\right\}.
|
||||||
\]
|
\]
|
||||||
@ -142,32 +141,30 @@ on MSE will perfectly fit the data.
|
|||||||
and $\vartheta^* = (w^*, b^*, v^*, c = 0)$ we get
|
and $\vartheta^* = (w^*, b^*, v^*, c = 0)$ we get
|
||||||
\[
|
\[
|
||||||
\mathcal{NN}_{\vartheta^*} (x_i^{\text{train}}) = \sum_{k =
|
\mathcal{NN}_{\vartheta^*} (x_i^{\text{train}}) = \sum_{k =
|
||||||
1}^{i-1} w_k\left(b_k^* + \left(v^*\right)^{\mathrm{T}}
|
1}^{i-1} w_k\left(\left(v^*\right)^{\mathrm{T}}
|
||||||
x_i^{\text{train}}\right) + w_i\left(b_i^* +\left(v^*\right)^{\mathrm{T}}
|
x_i^{\text{train}}\right) + w_i\left(\left(v^*\right)^{\mathrm{T}}
|
||||||
x_i^{\text{train}}\right) = y_i^{\text{train}}.
|
x_i^{\text{train}}\right) = y_i^{\text{train}}.
|
||||||
\]
|
\]
|
||||||
As the squared error of $\mathcal{NN}_{\vartheta^*}$ is zero all
|
As the squared error of $\mathcal{NN}_{\vartheta^*}$ is zero all
|
||||||
squared error loss minimizing shallow networks with at least $t$ hidden
|
squared error loss minimizing shallow networks with at least $t$ hidden
|
||||||
nodes will perfectly fit the data. \qed
|
nodes will perfectly fit the data.
|
||||||
|
\qed
|
||||||
\label{theo:overfit}
|
\label{theo:overfit}
|
||||||
\end{Theorem}
|
\end{Theorem}
|
||||||
|
|
||||||
However, this behavior is often not desired as overfit models tend to
|
However this behavior is often not desired as over fit models generally
|
||||||
have bad generalization properties, especially if noise is present in
|
have bad generalization properties especially if noise is present in
|
||||||
the data. This effect is illustrated in
|
the data. This effect is illustrated in
|
||||||
Figure~\ref{fig:overfit}.
|
Figure~\ref{fig:overfit}. Here a shallow neural network that perfectly fits the
|
||||||
|
training data regarding MSE is \todo{Formulierung}
|
||||||
Here a shallow neural network is
|
constructed according to the proof of Theorem~\ref{theo:overfit} and
|
||||||
constructed according to the proof of Theorem~\ref{theo:overfit} to
|
compared to a regression spline
|
||||||
perfectly fit some data and
|
|
||||||
compared to a cubic smoothing spline
|
|
||||||
(Definition~\ref{def:wrs}). While the neural network
|
(Definition~\ref{def:wrs}). While the neural network
|
||||||
fits the data better than the spline, the spline represents the
|
fits the data better than the spline, the spline represents the
|
||||||
underlying mechanism that was used to generate the data more accurately. The better
|
underlying mechanism that was used to generate the data more accurately. The better
|
||||||
generalization of the spline compared to the network is further
|
generalization of the spline compared to the network is further
|
||||||
demonstrated by the better performance on newly generated
|
demonstrated by the better validation error computed on newly generated
|
||||||
test data.
|
test data.
|
||||||
|
|
||||||
In order to improve the accuracy of the model we want to reduce
|
In order to improve the accuracy of the model we want to reduce
|
||||||
overfitting. A possible way to achieve this is by explicitly
|
overfitting. A possible way to achieve this is by explicitly
|
||||||
regularizing the network through the cost function as done with
|
regularizing the network through the cost function as done with
|
||||||
@ -175,12 +172,13 @@ ridge penalized networks
|
|||||||
(Definition~\ref{def:rpnn}) where large weights $w$ are punished. In
|
(Definition~\ref{def:rpnn}) where large weights $w$ are punished. In
|
||||||
Theorem~\ref{theo:main1} we will
|
Theorem~\ref{theo:main1} we will
|
||||||
prove that this will result in the shallow neural network converging to
|
prove that this will result in the shallow neural network converging to
|
||||||
a form of splines as the number of nodes in the hidden layer is
|
regressions splines as the amount of nodes in the hidden layer is
|
||||||
increased.
|
increased.
|
||||||
|
|
||||||
\vfill
|
|
||||||
|
|
||||||
\begin{figure}[h]
|
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
\pgfplotsset{
|
\pgfplotsset{
|
||||||
compat=1.11,
|
compat=1.11,
|
||||||
legend image code/.code={
|
legend image code/.code={
|
||||||
@ -197,26 +195,25 @@ increased.
|
|||||||
height = 0.6\textwidth]
|
height = 0.6\textwidth]
|
||||||
\addplot table
|
\addplot table
|
||||||
[x=x, y=y, col sep=comma, only marks,mark options={scale =
|
[x=x, y=y, col sep=comma, only marks,mark options={scale =
|
||||||
0.7}] {Figures/Data/overfit.csv};
|
0.7}] {Plots/Data/overfit.csv};
|
||||||
\addplot [red, line width=0.8pt] table [x=x_n, y=s_n, col
|
\addplot [red, line width=0.8pt] table [x=x_n, y=s_n, col
|
||||||
sep=comma] {Figures/Data/overfit.csv};
|
sep=comma, forget plot] {Plots/Data/overfit.csv};
|
||||||
\addplot [black, line width=0.8pt] table [x=x_n, y=y_n, col
|
\addplot [black, line width=0.8pt] table [x=x_n, y=y_n, col
|
||||||
sep=comma] {Figures/Data/overfit.csv};
|
sep=comma] {Plots/Data/overfit.csv};
|
||||||
\addplot [black, line width=0.8pt, dashed] table [x=x, y=y, col
|
\addplot [black, line width=0.8pt, dashed] table [x=x, y=y, col
|
||||||
sep=comma] {Figures/Data/overfit_spline.csv};
|
sep=comma] {Plots/Data/overfit_spline.csv};
|
||||||
|
|
||||||
\addlegendentry{\footnotesize{Data}};
|
\addlegendentry{\footnotesize{data}};
|
||||||
\addlegendentry{\footnotesize{Truth}};
|
|
||||||
\addlegendentry{\footnotesize{$\mathcal{NN}_{\vartheta^*}$}};
|
\addlegendentry{\footnotesize{$\mathcal{NN}_{\vartheta^*}$}};
|
||||||
\addlegendentry{\footnotesize{Spline}};
|
\addlegendentry{\footnotesize{spline}};
|
||||||
\end{axis}
|
\end{axis}
|
||||||
\end{tikzpicture}
|
\end{tikzpicture}
|
||||||
\caption[Overfitting of Shallow Neural Networks]{For data of the form $y=\sin(\frac{x+\pi}{2 \pi}) +
|
\caption[Overfitting of shallow neural networks]{For data of the form $y=\sin(\frac{x+\pi}{2 \pi}) +
|
||||||
\varepsilon,~ \varepsilon \sim \mathcal{N}(0,0.4)$
|
\varepsilon,~ \varepsilon \sim \mathcal{N}(0,0.4)$
|
||||||
(\textcolor{blue}{blue}) the neural network constructed
|
(\textcolor{blue}{blue dots}) the neural network constructed
|
||||||
according to the proof of Theorem~\ref{theo:overfit} (black) and the
|
according to the proof of Theorem~\ref{theo:overfit} (black) and the
|
||||||
underlying signal (\textcolor{red}{red}). While the network has no
|
underlying signal (\textcolor{red}{red}). While the network has no
|
||||||
bias a cubic smoothing spline (black, dashed) fits the data much
|
bias a regression spline (black dashed) fits the data much
|
||||||
better. For a test set of size 20 with uniformly distributed $x$
|
better. For a test set of size 20 with uniformly distributed $x$
|
||||||
values and responses of the same fashion as the training data the MSE of the neural network is
|
values and responses of the same fashion as the training data the MSE of the neural network is
|
||||||
0.30, while the MSE of the spline is only 0.14 thus generalizing
|
0.30, while the MSE of the spline is only 0.14 thus generalizing
|
||||||
@ -225,43 +222,31 @@ increased.
|
|||||||
\label{fig:overfit}
|
\label{fig:overfit}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
\vfill
|
|
||||||
|
|
||||||
\clearpage
|
\clearpage
|
||||||
\subsection{Convergence Behavior of One-Dimensional Randomized Shallow
|
\subsection{Convergence Behaviour of 1-dim. Randomized Shallow Neural
|
||||||
Neural Networks}
|
Networks}
|
||||||
\label{sec:conv}
|
|
||||||
|
|
||||||
This section is based on \textcite{heiss2019}.
|
|
||||||
In this section, we examine the convergence behavior of certain shallow
|
|
||||||
neural networks.
|
|
||||||
We consider shallow neural networks with a one dimensional input where the parameters in the
|
|
||||||
hidden layer are randomized resulting in only the weights is the
|
|
||||||
output layer being trainable.
|
|
||||||
Additionally, we assume all neurons use a ReLU as an activation function
|
|
||||||
and call such networks randomized shallow neural networks.
|
|
||||||
|
|
||||||
% We will analyze the
|
This section is based on \textcite{heiss2019}. We will analyze the
|
||||||
% connection between randomized shallow
|
connection between randomized shallow
|
||||||
% Neural Networks with one dimensional input with a ReLU as activation
|
Neural Networks with one dimensional input with a ReLU as activation
|
||||||
% function for all neurons and cubic smoothing splines.
|
function for all neurons and regression splines.
|
||||||
% % \[
|
% \[
|
||||||
% % \sigma(x) = \max\left\{0,x\right\}.
|
% \sigma(x) = \max\left\{0,x\right\}.
|
||||||
% % \]
|
% \]
|
||||||
% We will see that the punishment of the size of the weights in training
|
We will see that the punishment of the size of the weights in training
|
||||||
% the randomized shallow
|
the randomized shallow
|
||||||
% Neural Network will result in a learned function that minimizes the second
|
Neural Network will result in a learned function that minimizes the second
|
||||||
% derivative as the amount of hidden nodes is grown to infinity. In order
|
derivative as the amount of hidden nodes is grown to infinity. In order
|
||||||
% to properly formulate this relation we will first need to introduce
|
to properly formulate this relation we will first need to introduce
|
||||||
% some definitions, all neural networks introduced in the following will
|
some definitions, all neural networks introduced in the following will
|
||||||
% use a ReLU as activation at all neurons.
|
use a ReLU as activation at all neurons.
|
||||||
|
|
||||||
% A randomized shallow network is characterized by only the weight
|
A randomized shallow network is characterized by only the weight
|
||||||
% parameter of the output layer being trainable, whereas the other
|
parameter of the output layer being trainable, whereas the other
|
||||||
% parameters are random numbers.
|
parameters are random numbers.
|
||||||
|
|
||||||
\begin{Definition}[Randomized shallow neural network, Heiss, Teichmann, and
|
\begin{Definition}[Randomized shallow neural network]
|
||||||
Wutte (2019, Definition 2.1)]
|
|
||||||
For an input dimension $d$, let $n \in \mathbb{N}$ be the number of
|
For an input dimension $d$, let $n \in \mathbb{N}$ be the number of
|
||||||
hidden nodes and $v(\omega) \in \mathbb{R}^{i \times n}, b(\omega)
|
hidden nodes and $v(\omega) \in \mathbb{R}^{i \times n}, b(\omega)
|
||||||
\in \mathbb{R}^n$ randomly drawn weights. Then for a weight vector
|
\in \mathbb{R}^n$ randomly drawn weights. Then for a weight vector
|
||||||
@ -272,27 +257,15 @@ and call such networks randomized shallow neural networks.
|
|||||||
\]
|
\]
|
||||||
\label{def:rsnn}
|
\label{def:rsnn}
|
||||||
\end{Definition}
|
\end{Definition}
|
||||||
% We call a one dimensional randomized shallow neural network were the
|
We call a one dimensional randomized shallow neural network were the
|
||||||
% are penalized in the loss
|
$L^2$ norm of the trainable weights $w$ are penalized in the loss
|
||||||
% function ridge penalized neural networks.
|
function ridge penalized neural networks.
|
||||||
|
|
||||||
We will prove that if we penalize the amount of the trainable weights
|
|
||||||
when fitting a randomized shallow neural network it will
|
|
||||||
converge to a function that minimizes the distance to the training
|
|
||||||
data with respect to its second derivative as the amount of nodes is increased.
|
|
||||||
We call such a network that is fitted according to MSE and a penalty term for
|
|
||||||
the $L^2$ norm of the trainable weights $w$ a ridge penalized neural network.
|
|
||||||
% $\lam$
|
|
||||||
% We call a randomized shallow neural network trained on MSE and
|
|
||||||
% punished for the amount of the weights $w$ according to a
|
|
||||||
% ... $\lambda$ ridge penalized neural networks.
|
|
||||||
|
|
||||||
% We call a randomized shallow neural network where the size of the trainable
|
% We call a randomized shallow neural network where the size of the trainable
|
||||||
% weights is punished in the error function a ridge penalized
|
% weights is punished in the error function a ridge penalized
|
||||||
% neural network. For a tuning parameter $\tilde{\lambda}$ .. the extent
|
% neural network. For a tuning parameter $\tilde{\lambda}$ .. the extent
|
||||||
% of penalization we get:
|
% of penalization we get:
|
||||||
\begin{Definition}[Ridge penalized Neural Network, Heiss, Teichmann, and
|
\begin{Definition}[Ridge penalized Neural Network]
|
||||||
Wutte (2019, Definition 3.2)]
|
|
||||||
\label{def:rpnn}
|
\label{def:rpnn}
|
||||||
Let $\mathcal{RN}_{w, \omega}$ be a randomized shallow neural
|
Let $\mathcal{RN}_{w, \omega}$ be a randomized shallow neural
|
||||||
network, as introduced in Definition~\ref{def:rsnn} and tuning
|
network, as introduced in Definition~\ref{def:rsnn} and tuning
|
||||||
@ -303,7 +276,7 @@ the $L^2$ norm of the trainable weights $w$ a ridge penalized neural network.
|
|||||||
\mathcal{RN}^{*, \tilde{\lambda}}_{\omega}(x) \coloneqq
|
\mathcal{RN}^{*, \tilde{\lambda}}_{\omega}(x) \coloneqq
|
||||||
\mathcal{RN}_{w^{*, \tilde{\lambda}}(\omega), \omega}
|
\mathcal{RN}_{w^{*, \tilde{\lambda}}(\omega), \omega}
|
||||||
\]
|
\]
|
||||||
with \
|
with
|
||||||
\[
|
\[
|
||||||
w^{*,\tilde{\lambda}}(\omega) :\in \argmin_{w \in
|
w^{*,\tilde{\lambda}}(\omega) :\in \argmin_{w \in
|
||||||
\mathbb{R}^n} \underbrace{ \left\{\overbrace{\sum_{i = 1}^N \left(\mathcal{RN}_{w,
|
\mathbb{R}^n} \underbrace{ \left\{\overbrace{\sum_{i = 1}^N \left(\mathcal{RN}_{w,
|
||||||
@ -319,7 +292,7 @@ having minimal weights, resulting in the \textit{minimum norm
|
|||||||
network} $\mathcal{RN}_{w^{\text{min}}, \omega}$.
|
network} $\mathcal{RN}_{w^{\text{min}}, \omega}$.
|
||||||
\[
|
\[
|
||||||
\mathcal{RN}_{w^{\text{min}}, \omega} \text{ randomized shallow
|
\mathcal{RN}_{w^{\text{min}}, \omega} \text{ randomized shallow
|
||||||
neural network with weights } w^{\text{min}}\colon
|
Neural network with weights } w^{\text{min}}:
|
||||||
\]
|
\]
|
||||||
\[
|
\[
|
||||||
w^{\text{min}} \in \argmin_{w \in \mathbb{R}^n} \norm{w}, \text{
|
w^{\text{min}} \in \argmin_{w \in \mathbb{R}^n} \norm{w}, \text{
|
||||||
@ -331,18 +304,18 @@ For $\tilde{\lambda} \to \infty$ the learned
|
|||||||
function will resemble the data less and with the weights
|
function will resemble the data less and with the weights
|
||||||
approaching $0$ will converge to the constant $0$ function.
|
approaching $0$ will converge to the constant $0$ function.
|
||||||
|
|
||||||
To make the notation more convenient, in the following the
|
In order to make the notation more convinient in the following the
|
||||||
$\omega$ used to express the realized random parameters will no longer
|
$\omega$ used to express the realised random parameters will no longer
|
||||||
be explicitly mentioned.
|
be explicitly mentioned.
|
||||||
|
|
||||||
We call a function that minimizes the cubic distance between training points
|
We call a function that minimizes the cubic distance between training points
|
||||||
and the function with regard to the second
|
and the function with respect\todo{richtiges wort} to the second
|
||||||
derivative of the function a cubic smoothing spline.
|
derivative of the function a regression spline.
|
||||||
|
|
||||||
\begin{Definition}[Cubic Smoothing Spline]
|
\begin{Definition}[Regression Spline]
|
||||||
Let $x_i^{\text{train}}, y_i^{\text{train}} \in \mathbb{R}, i \in
|
Let $x_i^{\text{train}}, y_i^{\text{train}} \in \mathbb{R}, i \in
|
||||||
\left\{1,\dots,N\right\}$ be training data. for a given $\lambda \in
|
\left\{1,\dots,N\right\}$ be trainig data. for a given $\lambda \in
|
||||||
\mathbb{R}$ the cubic smoothing spline is given by
|
\mathbb{R}$ the regression spline is given by
|
||||||
\[
|
\[
|
||||||
f^{*,\lambda} :\in \argmin_{f \in
|
f^{*,\lambda} :\in \argmin_{f \in
|
||||||
\mathcal{C}^2}\left\{\sum_{i=1}^N
|
\mathcal{C}^2}\left\{\sum_{i=1}^N
|
||||||
@ -353,10 +326,10 @@ derivative of the function a cubic smoothing spline.
|
|||||||
|
|
||||||
We will show that for specific hyper parameters the ridge penalized
|
We will show that for specific hyper parameters the ridge penalized
|
||||||
shallow neural networks converge to a slightly modified variant of the
|
shallow neural networks converge to a slightly modified variant of the
|
||||||
cubic smoothing spline. We need to incorporate the densities of the
|
regression spline. We will need to incorporate the densities of the
|
||||||
random parameters in the loss function of the spline to ensure
|
random parameters in the loss function of the spline to ensure
|
||||||
convergence. Thus we define
|
convergence. Thus we define
|
||||||
the adapted weighted cubic smoothing spline where the loss for the second
|
the adapted weighted regression spline where the loss for the second
|
||||||
derivative is weighted by a function $g$ and the support of the second
|
derivative is weighted by a function $g$ and the support of the second
|
||||||
derivative of $f$ has to be a subset the support of $g$. The formal
|
derivative of $f$ has to be a subset the support of $g$. The formal
|
||||||
definition is given in Definition~\ref{def:wrs}.
|
definition is given in Definition~\ref{def:wrs}.
|
||||||
@ -367,20 +340,19 @@ definition is given in Definition~\ref{def:wrs}.
|
|||||||
% spline that allows for weighting the penalty term for the second
|
% spline that allows for weighting the penalty term for the second
|
||||||
% derivative with a weight function $g$. This is needed to ...the
|
% derivative with a weight function $g$. This is needed to ...the
|
||||||
% distributions of the random parameters ... We call this the adapted
|
% distributions of the random parameters ... We call this the adapted
|
||||||
% weighted cubic smoothing spline.
|
% weighted regression spline.
|
||||||
|
|
||||||
% Now we take a look at weighted cubic smoothing splines. Later we will prove
|
% Now we take a look at weighted regression splines. Later we will prove
|
||||||
% that the ridge penalized neural network as defined in
|
% that the ridge penalized neural network as defined in
|
||||||
% Definition~\ref{def:rpnn} converges a weighted cubic smoothing spline, as
|
% Definition~\ref{def:rpnn} converges a weighted regression spline, as
|
||||||
% the amount of hidden nodes is grown to inifity.
|
% the amount of hidden nodes is grown to inifity.
|
||||||
|
|
||||||
\begin{Definition}[Adapted weighted cubic smoothing spline, Heiss, Teichmann, and
|
\begin{Definition}[Adapted Weighted regression spline]
|
||||||
Wutte (2019, Definition 3.5)]
|
|
||||||
\label{def:wrs}
|
\label{def:wrs}
|
||||||
Let $x_i^{\text{train}}, y_i^{\text{train}} \in \mathbb{R}, i \in
|
Let $x_i^{\text{train}}, y_i^{\text{train}} \in \mathbb{R}, i \in
|
||||||
\left\{1,\dots,N\right\}$ be training data. For a given $\lambda \in \mathbb{R}_{>0}$
|
\left\{1,\dots,N\right\}$ be trainig data. For a given $\lambda \in \mathbb{R}_{>0}$
|
||||||
and a function $g: \mathbb{R} \to \mathbb{R}_{>0}$ the weighted
|
and a function $g: \mathbb{R} \to \mathbb{R}_{>0}$ the weighted
|
||||||
cubic smoothing spline $f^{*, \lambda}_g$ is given by
|
regression spline $f^{*, \lambda}_g$ is given by
|
||||||
|
|
||||||
\[
|
\[
|
||||||
f^{*, \lambda}_g :\in \argmin_{\substack{f \in \mathcal{C}^2(\mathbb{R})
|
f^{*, \lambda}_g :\in \argmin_{\substack{f \in \mathcal{C}^2(\mathbb{R})
|
||||||
@ -389,41 +361,43 @@ definition is given in Definition~\ref{def:wrs}.
|
|||||||
\lambda g(0) \int_{\supp(g)}\frac{\left(f''(x)\right)^2}{g(x)}
|
\lambda g(0) \int_{\supp(g)}\frac{\left(f''(x)\right)^2}{g(x)}
|
||||||
dx\right\}}_{\eqqcolon F^{\lambda, g}(f)}.
|
dx\right\}}_{\eqqcolon F^{\lambda, g}(f)}.
|
||||||
\]
|
\]
|
||||||
% \todo{Anforderung an Ableitung von f, doch nicht?}
|
\todo{Anforderung an Ableitung von f, doch nicht?}
|
||||||
\end{Definition}
|
\end{Definition}
|
||||||
|
|
||||||
Similarly to ridge weight penalized neural networks the parameter
|
Similarly to ridge weight penalized neural networks the parameter
|
||||||
$\lambda$ controls a trade-off between accuracy on the training data
|
$\lambda$ controls a trade-off between accuracy on the training data
|
||||||
and smoothness or low second derivative. For $g \equiv 1$ and $\lambda \to 0$ the
|
and smoothness or low second dreivative. For $g \equiv 1$ and $\lambda \to 0$ the
|
||||||
resulting function $f^{*, 0+}$ will interpolate the training data while minimizing
|
resulting function $f^{*, 0+}$ will interpolate the training data while minimizing
|
||||||
the second derivative. Such a function is known as cubic spline
|
the second derivative. Such a function is known as cubic spline
|
||||||
interpolation.
|
interpolation.
|
||||||
\vspace{-0.2cm}
|
\todo{cite cubic spline}
|
||||||
|
|
||||||
\[
|
\[
|
||||||
f^{*, 0+} \text{ smooth spline interpolation: }
|
f^{*, 0+} \text{ smooth spline interpolation: }
|
||||||
\]
|
\]
|
||||||
\[
|
\[
|
||||||
f^{*, 0+} \coloneqq \lim_{\lambda \to 0+} f^{*, \lambda}_1 \in
|
f^{*, 0+} \coloneqq \lim_{\lambda \to 0+} f^{*, \lambda}_1 \in
|
||||||
\argmin_{\substack{f \in \mathcal{C}^2(\mathbb{R}), \\ f(x_i^{\text{train}}) =
|
\argmin_{\substack{f \in \mathcal{C}^2\mathbb{R}, \\ f(x_i^{\text{train}}) =
|
||||||
y_i^{\text{train}}}} = \left( \int _{\mathbb{R}} (f''(x))^2dx\right).
|
y_i^{\text{train}}}} = \left( \int _{\mathbb{R}} (f''(x))^2dx\right).
|
||||||
\]
|
\]
|
||||||
|
|
||||||
For $\lambda \to \infty$ on the other hand $f_g^{*\lambda}$ converges
|
For $\lambda \to \infty$ on the other hand $f_g^{*\lambda}$ converges
|
||||||
to linear regression of the data.
|
to linear regression of the data.
|
||||||
|
|
||||||
We use two intermediary functions in order to show the convergence of
|
We use two intermediary functions in order to show the convergence of
|
||||||
the ridge penalized shallow neural network to adapted cubic smoothing splines.
|
the ridge penalized shallow neural network to adapted regression splines.
|
||||||
% In order to show that ridge penalized shallow neural networks converge
|
% In order to show that ridge penalized shallow neural networks converge
|
||||||
% to adapted cubic smoothing splines for a growing amount of hidden nodes we
|
% to adapted regression splines for a growing amount of hidden nodes we
|
||||||
% define two intermediary functions.
|
% define two intermediary functions.
|
||||||
One being a smooth approximation of a
|
One being a smooth approximation of
|
||||||
neural network and the other being a randomized shallow neural network designed
|
the neural network, and a randomized shallow neural network designed
|
||||||
to approximate a spline.
|
to approximate a spline.
|
||||||
In order to properly construct these functions, we need to take the points
|
In order to properly BUILD these functions we need to take the points
|
||||||
of the network into consideration where the trajectory of the learned
|
of the network into consideration where the TRAJECTORY changes or
|
||||||
function changes
|
their points of discontinuity
|
||||||
(or their points of discontinuity).
|
|
||||||
As we use the ReLU activation the function learned by the
|
As we use the ReLU activation the function learned by the
|
||||||
network will possess points of discontinuity where a neuron in the hidden
|
network will possess points of discontinuity where a neuron in the hidden
|
||||||
layer gets activated and their output is no longer zero. We formalize these points
|
layer gets activated (goes from 0 -> x>0). We formalize these points
|
||||||
as kinks in Definition~\ref{def:kink}.
|
as kinks in Definition~\ref{def:kink}.
|
||||||
\begin{Definition}
|
\begin{Definition}
|
||||||
\label{def:kink}
|
\label{def:kink}
|
||||||
@ -441,9 +415,9 @@ as kinks in Definition~\ref{def:kink}.
|
|||||||
\item Let $\xi_k \coloneqq -\frac{b_k}{v_k}$ be the k-th kink of $\mathcal{RN}_w$.
|
\item Let $\xi_k \coloneqq -\frac{b_k}{v_k}$ be the k-th kink of $\mathcal{RN}_w$.
|
||||||
\item Let $g_{\xi}(\xi_k)$ be the density of the kinks $\xi_k =
|
\item Let $g_{\xi}(\xi_k)$ be the density of the kinks $\xi_k =
|
||||||
- \frac{b_k}{v_k}$ in accordance to the distributions of $b_k$ and
|
- \frac{b_k}{v_k}$ in accordance to the distributions of $b_k$ and
|
||||||
$v_k$. With $\supp(g_\xi) = \left[C_{g_\xi}^l, C_{g_\xi}^u\right]$.
|
$v_k$.
|
||||||
\item Let $h_{k,n} \coloneqq \frac{1}{n g_{\xi}(\xi_k)}$ be the
|
\item Let $h_{k,n} \coloneqq \frac{1}{n g_{\xi}(\xi_k)}$ be the
|
||||||
average estimated distance from kink $\xi_k$ to the next nearest
|
average estmated distance from kink $\xi_k$ to the next nearest
|
||||||
one.
|
one.
|
||||||
\end{enumerate}
|
\end{enumerate}
|
||||||
\end{Definition}
|
\end{Definition}
|
||||||
@ -459,36 +433,40 @@ network by applying the kernel similar to convolution.
|
|||||||
corresponding kink density $g_{\xi}$ as given by
|
corresponding kink density $g_{\xi}$ as given by
|
||||||
Definition~\ref{def:kink}.
|
Definition~\ref{def:kink}.
|
||||||
In order to smooth the RSNN consider following kernel for every $x$:
|
In order to smooth the RSNN consider following kernel for every $x$:
|
||||||
\begin{align*}
|
|
||||||
\kappa_x(s) &\coloneqq \mathds{1}_{\left\{\abs{s} \leq \frac{1}{2 \sqrt{n}
|
\[
|
||||||
g_{\xi}(x)}\right\}}(s)\sqrt{n} g_{\xi}(x), \, \forall s \in \mathbb{R}\\
|
\kappa_x(s) \coloneqq \mathds{1}_{\left\{\abs{s} \leq \frac{1}{2 \sqrt{n}
|
||||||
\intertext{Using this kernel we define a smooth approximation of
|
g_{\xi}(x)}\right\}}(s)\sqrt{n} g_{\xi}(x), \, \forall s \in \mathbb{R}
|
||||||
$\mathcal{RN}_w$ by}
|
\]
|
||||||
f^w(x) &\coloneqq \int_{\mathds{R}} \mathcal{RN}_w(x-s)
|
|
||||||
\kappa_x(s) ds.
|
Using this kernel we define a smooth approximation of
|
||||||
\end{align*}
|
$\mathcal{RN}_w$ by
|
||||||
|
|
||||||
|
\[
|
||||||
|
f^w(x) \coloneqq \int_{\mathds{R}} \mathcal{RN}_w(x-s) \kappa_x(s) ds.
|
||||||
|
\]
|
||||||
\end{Definition}
|
\end{Definition}
|
||||||
|
|
||||||
Note that the kernel introduced in Definition~\ref{def:srsnn}
|
Note that the kernel introduced in Definition~\ref{def:srsnn}
|
||||||
satisfies $\int_{\mathbb{R}}\kappa_x dx = 1$. While $f^w$ looks
|
satisfies $\int_{\mathbb{R}}\kappa_x dx = 1$. While $f^w$ looks highly
|
||||||
similar to a convolution, it differs slightly as the kernel $\kappa_x(s)$
|
similar to a convolution, it differs slightly as the kernel $\kappa_x(s)$
|
||||||
is dependent on $x$. Therefore only $f^w = (\mathcal{RN}_w *
|
is dependent on $x$. Therefore only $f^w = (\mathcal{RN}_w *
|
||||||
\kappa_x)(x)$ is well defined, while $\mathcal{RN}_w * \kappa$ is not.
|
\kappa_x)(x)$ is well defined, while $\mathcal{RN}_w * \kappa$ is not.
|
||||||
We use $f^{w^{*,\tilde{\lambda}}}$ to describe the spline
|
We use $f^{w^{*,\tilde{\lambda}}}$ do describe the spline
|
||||||
approximating the ridge penalized network
|
approximating the ... ridge penalized network
|
||||||
$\mathcal{RN}^{*,\tilde{\lambda}}$.
|
$\mathrm{RN}^{*,\tilde{\lambda}}$.
|
||||||
|
|
||||||
Next, we construct a randomized shallow neural network that
|
Next we construct a randomized shallow neural network which
|
||||||
is designed to be close to a spline, independent from the realization of the random
|
approximates a spline independent from the realization of the random
|
||||||
parameters, by approximating the splines curvature between the
|
parameters. In order to achieve this we ...
|
||||||
kinks.
|
|
||||||
|
|
||||||
\begin{Definition}[Spline approximating Randomized Shallow Neural
|
\begin{Definition}[Spline approximating Randomised Shallow Neural
|
||||||
Network]
|
Network]
|
||||||
\label{def:sann}
|
\label{def:sann}
|
||||||
Let $\mathcal{RN}$ be a randomized shallow Neural Network according
|
Let $\mathcal{RN}$ be a randomised shallow Neural Network according
|
||||||
to Definition~\ref{def:rsnn} and $f^{*, \lambda}_g$ be the weighted
|
to Definition~\ref{def:rsnn} and $f^{*, \lambda}_g$ be the weighted
|
||||||
cubic smoothing spline as introduced in Definition~\ref{def:wrs}. Then
|
regression spline as introduced in Definition~\ref{def:wrs}. Then
|
||||||
the randomized shallow neural network approximating $f^{*,
|
the randomised shallow neural network approximating $f^{*,
|
||||||
\lambda}_g$ is given by
|
\lambda}_g$ is given by
|
||||||
\[
|
\[
|
||||||
\mathcal{RN}_{\tilde{w}}(x) = \sum_{k = 1}^n \tilde{w}_k \sigma(b_k + v_k x),
|
\mathcal{RN}_{\tilde{w}}(x) = \sum_{k = 1}^n \tilde{w}_k \sigma(b_k + v_k x),
|
||||||
@ -496,7 +474,7 @@ kinks.
|
|||||||
with the weights $\tilde{w}_k$ defined as
|
with the weights $\tilde{w}_k$ defined as
|
||||||
\[
|
\[
|
||||||
\tilde{w}_k \coloneqq \frac{h_{k,n} v_k}{\mathbb{E}[v^2 \vert \xi
|
\tilde{w}_k \coloneqq \frac{h_{k,n} v_k}{\mathbb{E}[v^2 \vert \xi
|
||||||
= \xi_k]} \left(f_g^{*, \lambda}\right)''(\xi_k).
|
= \xi_k]} (f_g^{*, \lambda})''(\xi_k).
|
||||||
\]
|
\]
|
||||||
\end{Definition}
|
\end{Definition}
|
||||||
|
|
||||||
@ -510,16 +488,16 @@ derivative of $\mathcal{RN}_{\tilde{w}}(x)$ which is given by
|
|||||||
x}} \tilde{w}_k v_k \nonumber \\
|
x}} \tilde{w}_k v_k \nonumber \\
|
||||||
&= \frac{1}{n} \sum_{\substack{k \in \mathbb{N} \\
|
&= \frac{1}{n} \sum_{\substack{k \in \mathbb{N} \\
|
||||||
\xi_k < x}} \frac{v_k^2}{g_{\xi}(\xi_k) \mathbb{E}[v^2 \vert \xi
|
\xi_k < x}} \frac{v_k^2}{g_{\xi}(\xi_k) \mathbb{E}[v^2 \vert \xi
|
||||||
= \xi_k]} \left(f_g^{*, \lambda}\right)''(\xi_k). \label{eq:derivnn}
|
= \xi_k]} (f_g^{*, \lambda})''(\xi_k). \label{eq:derivnn}
|
||||||
\end{align}
|
\end{align}
|
||||||
As the expression (\ref{eq:derivnn}) behaves similarly to a
|
As the expression (\ref{eq:derivnn}) behaves similary to a
|
||||||
Riemann-sum for $n \to \infty$ it will converge in probability to the
|
Riemann-sum for $n \to \infty$ it will converge in probability to the
|
||||||
first derivative of $f^{*,\lambda}_g$. A formal proof of this behavior
|
first derivative of $f^{*,\lambda}_g$. A formal proof of this behaviour
|
||||||
is given in Lemma~\ref{lem:s0}.
|
is given in Lemma~\ref{lem:s0}.
|
||||||
|
|
||||||
In order to ensure the functions used in the proof of the convergence
|
In order to ensure the functions used in the proof of the convergence
|
||||||
are well defined we need to make some assumptions about properties of the random
|
are well defined we need to assume some properties of the random
|
||||||
parameters and their densities.
|
parameters and their densities
|
||||||
|
|
||||||
% In order to formulate the theorem describing the convergence of $RN_w$
|
% In order to formulate the theorem describing the convergence of $RN_w$
|
||||||
% we need to make a couple of assumptions.
|
% we need to make a couple of assumptions.
|
||||||
@ -528,7 +506,7 @@ parameters and their densities.
|
|||||||
\begin{Assumption}~
|
\begin{Assumption}~
|
||||||
\label{ass:theo38}
|
\label{ass:theo38}
|
||||||
\begin{enumerate}[label=(\alph*)]
|
\begin{enumerate}[label=(\alph*)]
|
||||||
\item The probability density function of the kinks $\xi_k$,
|
\item The probability density fucntion of the kinks $\xi_k$,
|
||||||
namely $g_{\xi}$ as defined in Definition~\ref{def:kink} exists
|
namely $g_{\xi}$ as defined in Definition~\ref{def:kink} exists
|
||||||
and is well defined.
|
and is well defined.
|
||||||
\item The density function $g_\xi$
|
\item The density function $g_\xi$
|
||||||
@ -536,14 +514,14 @@ parameters and their densities.
|
|||||||
\item The density function $g_{\xi}$ is uniformly continuous on $\supp(g_{\xi})$.
|
\item The density function $g_{\xi}$ is uniformly continuous on $\supp(g_{\xi})$.
|
||||||
\item $g_{\xi}(0) \neq 0$.
|
\item $g_{\xi}(0) \neq 0$.
|
||||||
\item $\frac{1}{g_{\xi}}\Big|_{\supp(g_{\xi})}$ is uniformly
|
\item $\frac{1}{g_{\xi}}\Big|_{\supp(g_{\xi})}$ is uniformly
|
||||||
continuous on $\supp(g_{\xi})$.
|
continous on $\supp(g_{\xi})$.
|
||||||
\item The conditional distribution $\mathcal{L}(v_k|\xi_k = x)$
|
\item The conditional distribution $\mathcal{L}(v_k|\xi_k = x)$
|
||||||
is uniformly continuous on $\supp(g_{\xi})$.
|
is uniformly continous on $\supp(g_{\xi})$.
|
||||||
\item $\mathbb{E}\left[v_k^2\right] < \infty$.
|
\item $\mathbb{E}\left[v_k^2\right] < \infty$.
|
||||||
\end{enumerate}
|
\end{enumerate}
|
||||||
\end{Assumption}
|
\end{Assumption}
|
||||||
|
|
||||||
As we will prove the convergence of in the Sobolev Space, we hereby
|
As we will prove the convergence of in the Sobolev space, we hereby
|
||||||
introduce it and the corresponding induced norm.
|
introduce it and the corresponding induced norm.
|
||||||
|
|
||||||
\begin{Definition}[Sobolev Space]
|
\begin{Definition}[Sobolev Space]
|
||||||
@ -551,7 +529,7 @@ introduce it and the corresponding induced norm.
|
|||||||
define the Sobolev space $W^{k,p}(K)$ as the space containing all
|
define the Sobolev space $W^{k,p}(K)$ as the space containing all
|
||||||
real valued functions $u \in L^p(K)$ such that for every multi-index
|
real valued functions $u \in L^p(K)$ such that for every multi-index
|
||||||
$\alpha \in \mathbb{N}^n$ with $\abs{\alpha} \leq
|
$\alpha \in \mathbb{N}^n$ with $\abs{\alpha} \leq
|
||||||
k$ the mixed partial derivatives
|
k$ the mixed parial derivatives
|
||||||
\[
|
\[
|
||||||
u^{(\alpha)} = \frac{\partial^{\abs{\alpha}} u}{\partial
|
u^{(\alpha)} = \frac{\partial^{\abs{\alpha}} u}{\partial
|
||||||
x_1^{\alpha_1} \dots \partial x_n^{\alpha_n}}
|
x_1^{\alpha_1} \dots \partial x_n^{\alpha_n}}
|
||||||
@ -560,8 +538,9 @@ introduce it and the corresponding induced norm.
|
|||||||
\[
|
\[
|
||||||
\norm{u^{(\alpha)}}_{L^p} < \infty.
|
\norm{u^{(\alpha)}}_{L^p} < \infty.
|
||||||
\]
|
\]
|
||||||
|
\todo{feritg machen}
|
||||||
\label{def:sobonorm}
|
\label{def:sobonorm}
|
||||||
The natural norm of the Sobolev Space is given by
|
The natural norm of the sobolev space is given by
|
||||||
\[
|
\[
|
||||||
\norm{f}_{W^{k,p}(K)} =
|
\norm{f}_{W^{k,p}(K)} =
|
||||||
\begin{cases}
|
\begin{cases}
|
||||||
@ -575,21 +554,18 @@ introduce it and the corresponding induced norm.
|
|||||||
\]
|
\]
|
||||||
\end{Definition}
|
\end{Definition}
|
||||||
|
|
||||||
With the important definitions and assumptions in place, we can now
|
With the important definitions and assumptions in place we can now
|
||||||
formulate the main theorem.
|
formulate the main theorem ... the convergence of ridge penalized
|
||||||
% ... the convergence of ridge penalized
|
random neural networks to adapted regression splines when the
|
||||||
% random neural networks to adapted cubic smoothing splines when the
|
parameters are chosen accordingly.
|
||||||
% parameters are chosen accordingly.
|
|
||||||
|
|
||||||
\begin{Theorem}[Ridge Weight Penalty Corresponds to Weighted Cubic
|
\begin{Theorem}[Ridge weight penaltiy corresponds to weighted regression spline]
|
||||||
Smoothing Spline]
|
|
||||||
\label{theo:main1}
|
\label{theo:main1}
|
||||||
For $N \in \mathbb{N}$, arbitrary training data
|
For $N \in \mathbb{N}$ arbitrary training data
|
||||||
$\left(x_i^{\text{train}}, y_i^{\text{train}}
|
\(\left(x_i^{\text{train}}, y_i^{\text{train}}
|
||||||
\right)~\in~\mathbb{R}^2$, with $i \in \left\{1,\dots,N\right\}$,
|
\right)\) and $\mathcal{RN}^{*, \tilde{\lambda}}, f_g^{*, \lambda}$
|
||||||
and $\mathcal{RN}^{*, \tilde{\lambda}}, f_g^{*, \lambda}$
|
|
||||||
according to Definition~\ref{def:rpnn} and Definition~\ref{def:wrs}
|
according to Definition~\ref{def:rpnn} and Definition~\ref{def:wrs}
|
||||||
respectively with Assumption~\ref{ass:theo38} it holds that
|
respectively with Assumption~\ref{ass:theo38} it holds
|
||||||
|
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\label{eq:main1}
|
\label{eq:main1}
|
||||||
@ -605,7 +581,7 @@ formulate the main theorem.
|
|||||||
\end{align*}
|
\end{align*}
|
||||||
\end{Theorem}
|
\end{Theorem}
|
||||||
As mentioned above we will prof Theorem~\ref{theo:main1} utilizing
|
As mentioned above we will prof Theorem~\ref{theo:main1} utilizing
|
||||||
intermediary functions. We show that
|
the ... functions. We show that
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\label{eq:main2}
|
\label{eq:main2}
|
||||||
\plimn \norm{\mathcal{RN}^{*, \tilde{\lambda}} - f^{w^*}}_{W^{1,
|
\plimn \norm{\mathcal{RN}^{*, \tilde{\lambda}} - f^{w^*}}_{W^{1,
|
||||||
@ -617,16 +593,16 @@ and
|
|||||||
\plimn \norm{f^{w^*} - f_g^{*, \lambda}}_{W^{1,\infty}(K)} = 0
|
\plimn \norm{f^{w^*} - f_g^{*, \lambda}}_{W^{1,\infty}(K)} = 0
|
||||||
\end{equation}
|
\end{equation}
|
||||||
and then get (\ref{eq:main1}) using the triangle inequality. In
|
and then get (\ref{eq:main1}) using the triangle inequality. In
|
||||||
order to prove (\ref{eq:main2}) and (\ref{eq:main3}) we need to
|
order to prove (\ref{eq:main2}) and (\ref{eq:main3}) we will need to
|
||||||
introduce a number of auxiliary lemmata, proves of which are
|
introduce a number of auxiliary lemmmata, proves of these will be
|
||||||
given in \textcite{heiss2019} and Appendix~\ref{appendix:proofs}.
|
provided in the appendix.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
\begin{Lemma}[Poincar\'e Typed Inequality]
|
\begin{Lemma}[Poincar\'e typed inequality]
|
||||||
\label{lem:pieq}
|
\label{lem:pieq}
|
||||||
Let \(f:\mathbb{R} \to \mathbb{R}\) differentiable with \(f' :
|
Let \(f:\mathbb{R} \to \mathbb{R}\) differentiable with \(f' :
|
||||||
\mathbb{R} \to \mathbb{R}\) Lebesgue integrable. Then for \(K=[a,b]
|
\mathbb{R} \to \mathbb{R}\) Lesbeque integrable. Then for \(K=[a,b]
|
||||||
\subset \mathbb{R}\) with \(f(a)=0\) it holds that
|
\subset \mathbb{R}\) with \(f(a)=0\) it holds that
|
||||||
\begin{equation*}
|
\begin{equation*}
|
||||||
\label{eq:pti1}
|
\label{eq:pti1}
|
||||||
@ -634,15 +610,14 @@ given in \textcite{heiss2019} and Appendix~\ref{appendix:proofs}.
|
|||||||
\norm{f}_{w^{1,\infty}(K)} \leq C_K^{\infty}
|
\norm{f}_{w^{1,\infty}(K)} \leq C_K^{\infty}
|
||||||
\norm{f'}_{L^{\infty}(K)}.
|
\norm{f'}_{L^{\infty}(K)}.
|
||||||
\end{equation*}
|
\end{equation*}
|
||||||
If additionally \(f'\) is differentiable with \(f'': \mathbb{R} \to
|
If additionaly \(f'\) is differentiable with \(f'': \mathbb{R} \to
|
||||||
\mathbb{R}\) Lebesgue integrable then
|
\mathbb{R}\) Lesbeque integrable then additionally
|
||||||
\begin{equation*}
|
\begin{equation*}
|
||||||
\label{eq:pti2}
|
\label{eq:pti2}
|
||||||
\exists C_K^2 \in \mathbb{R}_{>0} : \norm{f}_{W^{1,\infty}(K)} \leq
|
\exists C_K^2 \in \mathbb{R}_{>0} : \norm{f}_{W^{1,\infty}(K)} \leq
|
||||||
C_K^2 \norm{f''}_{L^2(K)}.
|
C_K^2 \norm{f''}_{L^2(K)}.
|
||||||
\end{equation*}
|
\end{equation*}
|
||||||
% \proof The proof is given in the appendix...
|
\proof The proof is given in the appendix...
|
||||||
|
|
||||||
% With the fundamental theorem of calculus, if
|
% With the fundamental theorem of calculus, if
|
||||||
% \(\norm{f}_{L^{\infty}(K)}<\infty\) we get
|
% \(\norm{f}_{L^{\infty}(K)}<\infty\) we get
|
||||||
% \begin{equation}
|
% \begin{equation}
|
||||||
@ -679,12 +654,11 @@ given in \textcite{heiss2019} and Appendix~\ref{appendix:proofs}.
|
|||||||
\begin{Lemma}
|
\begin{Lemma}
|
||||||
\label{lem:cnvh}
|
\label{lem:cnvh}
|
||||||
Let $\mathcal{RN}$ be a shallow Neural network. For \(\varphi :
|
Let $\mathcal{RN}$ be a shallow Neural network. For \(\varphi :
|
||||||
\mathbb{R}^2 \to \mathbb{R}\) uniformly continuous such that
|
\mathbb{R}^2 \to \mathbb{R}\) uniformly continous such that
|
||||||
\[
|
\[
|
||||||
\forall x \in \supp(g_{\xi}) : \mathbb{E}\left[\varphi(\xi, v)
|
\forall x \in \supp(g_{\xi}) : \mathbb{E}\left[\varphi(\xi, v)
|
||||||
\frac{1}{n g_{\xi}(\xi)} \vert \xi = x \right] < \infty,
|
\frac{1}{n g_{\xi}(\xi)} \vert \xi = x \right] < \infty,
|
||||||
\]
|
\]
|
||||||
\clearpage
|
|
||||||
it holds, that
|
it holds, that
|
||||||
\[
|
\[
|
||||||
\plimn \sum_{k \in \kappa : \xi_k < T} \varphi(\xi_k, v_k)
|
\plimn \sum_{k \in \kappa : \xi_k < T} \varphi(\xi_k, v_k)
|
||||||
@ -693,7 +667,7 @@ given in \textcite{heiss2019} and Appendix~\ref{appendix:proofs}.
|
|||||||
\mathbb{E}\left[\varphi(\xi, v) \vert \xi = x \right] dx
|
\mathbb{E}\left[\varphi(\xi, v) \vert \xi = x \right] dx
|
||||||
\]
|
\]
|
||||||
uniformly in \(T \in K\).
|
uniformly in \(T \in K\).
|
||||||
% \proof The proof is given in appendix...
|
\proof The proof is given in appendix...
|
||||||
% For \(T \leq C_{g_{\xi}}^l\) both sides equal 0, so it is sufficient to
|
% For \(T \leq C_{g_{\xi}}^l\) both sides equal 0, so it is sufficient to
|
||||||
% consider \(T > C_{g_{\xi}}^l\). With \(\varphi\) and
|
% consider \(T > C_{g_{\xi}}^l\). With \(\varphi\) and
|
||||||
% \(\nicefrac{1}{g_{\xi}}\) uniformly continous in \(\xi\),
|
% \(\nicefrac{1}{g_{\xi}}\) uniformly continous in \(\xi\),
|
||||||
@ -738,7 +712,7 @@ given in \textcite{heiss2019} and Appendix~\ref{appendix:proofs}.
|
|||||||
% \kappa : \xi_m \in [\delta l, \delta(l +
|
% \kappa : \xi_m \in [\delta l, \delta(l +
|
||||||
% 1)]\right\}}}{ng_{\xi}(l\delta)}\right) \pm \varepsilon .\\
|
% 1)]\right\}}}{ng_{\xi}(l\delta)}\right) \pm \varepsilon .\\
|
||||||
% \intertext{We use the mean to approximate the number of kinks in
|
% \intertext{We use the mean to approximate the number of kinks in
|
||||||
% each $\delta$-strip, as it follows a binomial distribution this
|
% each $\delta$-strip, as it follows a bonomial distribution this
|
||||||
% amounts to
|
% amounts to
|
||||||
% \[
|
% \[
|
||||||
% \mathbb{E}\left[\abs{\left\{m \in \kappa : \xi_m \in [\delta l,
|
% \mathbb{E}\left[\abs{\left\{m \in \kappa : \xi_m \in [\delta l,
|
||||||
@ -749,15 +723,14 @@ given in \textcite{heiss2019} and Appendix~\ref{appendix:proofs}.
|
|||||||
% Bla Bla Bla $v_k$}
|
% Bla Bla Bla $v_k$}
|
||||||
% \circled{1} & \approx
|
% \circled{1} & \approx
|
||||||
% \end{align*}
|
% \end{align*}
|
||||||
\proof Notes on the proof are given in Proof~\ref{proof:lem9}.
|
|
||||||
\end{Lemma}
|
\end{Lemma}
|
||||||
|
|
||||||
\begin{Lemma}
|
\begin{Lemma}[Step 0]
|
||||||
For any $\lambda > 0$, $N \in \mathbb{N}$, training data $(x_i^{\text{train}}
|
For any $\lambda > 0$, training data $(x_i^{\text{train}}
|
||||||
y_i^{\text{train}}) \in \mathbb{R}^2$, with $ i \in
|
y_i^{\text{train}}) \in \mathbb{R}^2$, with $ i \in
|
||||||
\left\{1,\dots,N\right\}$, and subset $K \subset \mathbb{R}$ the spline approximating randomized
|
\left\{1,\dots,N\right\}$ and subset $K \subset \mathbb{R}$ the spline approximating randomized
|
||||||
shallow neural network $\mathcal{RN}_{\tilde{w}}$ converges to the
|
shallow neural network $\mathcal{RN}_{\tilde{w}}$ converges to the
|
||||||
cubic smoothing spline $f^{*, \lambda}_g$ in
|
regression spline $f^{*, \lambda}_g$ in
|
||||||
$\norm{.}_{W^{1,\infty}(K)}$ as the node count $n$ increases,
|
$\norm{.}_{W^{1,\infty}(K)}$ as the node count $n$ increases,
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\label{eq:s0}
|
\label{eq:s0}
|
||||||
@ -771,91 +744,77 @@ given in \textcite{heiss2019} and Appendix~\ref{appendix:proofs}.
|
|||||||
\lambda}_g)'}_{L^{\infty}} = 0.
|
\lambda}_g)'}_{L^{\infty}} = 0.
|
||||||
\]
|
\]
|
||||||
This can be achieved by using Lemma~\ref{lem:cnvh} with $\varphi(\xi_k,
|
This can be achieved by using Lemma~\ref{lem:cnvh} with $\varphi(\xi_k,
|
||||||
v_k) = \frac{v_k^2}{\mathbb{E}[v^2|\xi = z]} (f^{*, \lambda}_g)''(\xi_k) $
|
v_k) = \frac{v_k^2}{\mathbb{E}[v^2|\xi = z]} (f^{*, \lambda}_w)''(\xi_k) $
|
||||||
thus obtaining
|
thus obtaining
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
\plimn \frac{\partial \mathcal{RN}_{\tilde{w}}}{\partial x} (x)
|
\plimn \frac{\partial \mathcal{RN}_{\tilde{w}}}{\partial x}
|
||||||
\equals^{(\ref{eq:derivnn})}_{\phantom{\text{Lemma 3.1.4}}}
|
\stackrel{(\ref{eq:derivnn})}{=}
|
||||||
%\stackrel{(\ref{eq:derivnn})}{=}
|
& \plimn \sum_{\substack{k \in \mathbb{N} \\
|
||||||
&
|
|
||||||
\plimn \sum_{\substack{k \in \mathbb{N} \\
|
|
||||||
\xi_k < x}} \frac{v_k^2}{\mathbb{E}[v^2 \vert \xi
|
\xi_k < x}} \frac{v_k^2}{\mathbb{E}[v^2 \vert \xi
|
||||||
= \xi_k]} (f_g^{*, \lambda})''(\xi_k) h_{k,n} \\
|
= \xi_k]} (f_g^{*, \lambda})''(\xi_k) h_{k,n}
|
||||||
\stackrel{\text{Lemma}~\ref{lem:cnvh}}{=}
|
\stackrel{\text{Lemma}~\ref{lem:cnvh}}{=} \\
|
||||||
%\stackrel{\phantom{(\ref{eq:derivnn})}}{=}
|
\stackrel{\phantom{(\ref{eq:derivnn})}}{=}
|
||||||
&
|
&
|
||||||
\int_{\max\left\{C_{g_{\xi}}^l,x\right\}}^{\min\left\{C_{g_{\xi}}^u,x\right\}}
|
\int_{\min\left\{C_{g_{\xi}}^l,T\right\}}^{min\left\{C_{g_{\xi}}^u,T\right\}}
|
||||||
\mathbb{E}\left[\frac{v^2}{\mathbb{E}[v^2|\xi = z]} (f^{*,
|
\mathbb{E}\left[\frac{v^2}{\mathbb{E}[v^2|\xi = z]} (f^{*,
|
||||||
\lambda}_g)''(\xi) \vert
|
\lambda}_w)''(\xi) \vert
|
||||||
\xi = z \right] dz\\
|
\xi = x \right] dx \equals^{\text{Tower-}}_{\text{property}} \\
|
||||||
\mathmakebox[\widthof{$\stackrel{\text{Lemma 3.14}}{=}$}][c]{\equals^{\text{Tower-}}_{\text{property}}}
|
\stackrel{\phantom{(\ref{eq:derivnn})}}{=}
|
||||||
%\stackrel{\phantom{(\ref{eq:derivnn})}}{=}
|
|
||||||
&
|
&
|
||||||
\int_{\max\left\{C_{g_{\xi}}^l,
|
\int_{\min\left\{C_{g_{\xi}}^l,
|
||||||
x\right\}}^{\min\left\{C_{g_{\xi}}^u,x\right\}}(f^{*,\lambda}_g)''(z)
|
T\right\}}^{min\left\{C_{g_{\xi}}^u,T\right\}}(f^{*,\lambda}_w)''(x)
|
||||||
dz.
|
dx.
|
||||||
\end{align*}
|
\end{align*}
|
||||||
With the fundamental theorem of calculus we get
|
By the fundamental theorem of calculus and $\supp(f') \subset
|
||||||
\[
|
\supp(f)$, (\ref{eq:s0}) follows with Lemma~\ref{lem:pieq}.
|
||||||
\plimn \mathcal{RN}_{\tilde{w}}'(x) = f_g^{*,\lambda
|
|
||||||
'}(\min\left\{C_{g_{\xi}}^u, x\right\}) - f_g^{*,\lambda
|
|
||||||
'}(\max\left\{C_{g_{\xi}}^l, x\right\})
|
|
||||||
\]
|
|
||||||
As $f_g^{*,\lambda '}$ is constant on $\left[C_{g_\xi}^l,
|
|
||||||
C_{g_\xi}^u\right]^C$ because $\supp(f_g^{*,\lambda ''}) \subseteq
|
|
||||||
\supp(g) \subseteq \supp(g_\xi)$ we get
|
|
||||||
\[
|
|
||||||
\plimn \mathcal{RN}_{\tilde{w}}'(x) = f_g^{*,\lambda
|
|
||||||
'},
|
|
||||||
\]
|
|
||||||
thus (\ref{eq:s0}) follows with Lemma~\ref{lem:pieq}.
|
|
||||||
\qed
|
\qed
|
||||||
\label{lem:s0}
|
\label{lem:s0}
|
||||||
\end{Lemma}
|
\end{Lemma}
|
||||||
|
|
||||||
\begin{Lemma}
|
\begin{Lemma}[Step 2]
|
||||||
For any $\lambda > 0$, $N \in \mathbb{N}$, and training data $(x_i^{\text{train}},
|
For any $\lambda > 0$ and training data $(x_i^{\text{train}},
|
||||||
y_i^{\text{train}}) \in \mathbb{R}^2$, with $i \in
|
y_i^{\text{train}}) \in \mathbb{R}^2, \, i \in
|
||||||
\left\{1,\dots,N\right\}$, we have
|
\left\{1,\dots,N\right\}$, we have
|
||||||
\[
|
\[
|
||||||
\plimn F^{\tilde{\lambda}}_n(\mathcal{RN}_{\tilde{w}}) =
|
\plimn F^{\tilde{\lambda}}_n(\mathcal{RN}_{\tilde{w}}) =
|
||||||
F^{\lambda, g}(f^{*, \lambda}_g) = 0.
|
F^{\lambda, g}(f^{*, \lambda}_g) = 0.
|
||||||
\]
|
\]
|
||||||
\proof Notes on the proof are given in Proof~\ref{proof:lem14}.
|
\proof
|
||||||
|
The proof is given in the appendix...
|
||||||
\label{lem:s2}
|
\label{lem:s2}
|
||||||
\end{Lemma}
|
\end{Lemma}
|
||||||
|
|
||||||
\begin{Lemma}
|
\begin{Lemma}[Step 3]
|
||||||
For any $\lambda > 0$, $N \in \mathbb{N}$, and training data $(x_i^{\text{train}},
|
For any $\lambda > 0$ and training data $(x_i^{\text{train}},
|
||||||
y_i^{\text{train}}) \in \mathbb{R}^2$, with $i \in
|
y_i^{\text{train}}) \in \mathbb{R}^2, \, i \in
|
||||||
\left\{1,\dots,N\right\}$, with $w^*$ as
|
\left\{1,\dots,N\right\}$, with $w^*$ as
|
||||||
defined in Definition~\ref{def:rpnn} and $\tilde{\lambda}$ as
|
defined in Definition~\ref{def:rpnn} and $\tilde{\lambda}$ as
|
||||||
defined in Theorem~\ref{theo:main1}, it holds
|
defined in Theroem~\ref{theo:main1}, it holds
|
||||||
\[
|
\[
|
||||||
\plimn \norm{\mathcal{RN}^{*,\tilde{\lambda}} -
|
\plimn \norm{\mathcal{RN}^{*,\tilde{\lambda}} -
|
||||||
f^{w*, \tilde{\lambda}}}_{W^{1,\infty}(K)} = 0.
|
f^{w*, \tilde{\lambda}}}_{W^{1,\infty}(K)} = 0.
|
||||||
\]
|
\]
|
||||||
\proof Notes on the proof are given in Proof~\ref{proof:lem15}.
|
\proof The proof is given in Appendix ..
|
||||||
\label{lem:s3}
|
\label{lem:s3}
|
||||||
\end{Lemma}
|
\end{Lemma}
|
||||||
|
|
||||||
\begin{Lemma}
|
\begin{Lemma}[Step 4]
|
||||||
For any $\lambda > 0$, $N \in \mathbb{N}$, and training data $(x_i^{\text{train}},
|
For any $\lambda > 0$ and training data $(x_i^{\text{train}},
|
||||||
y_i^{\text{train}}) \in \mathbb{R}^2$, with $i \in
|
y_i^{\text{train}}) \in \mathbb{R}^2, \, i \in
|
||||||
\left\{1,\dots,N\right\}$, with $w^*$ and $\tilde{\lambda}$ as
|
\left\{1,\dots,N\right\}$, with $w^*$ and $\tilde{\lambda}$ as
|
||||||
defined in Definition~\ref{def:rpnn} and Theorem~\ref{theo:main1}
|
defined in Definition~\ref{def:rpnn} and Theroem~\ref{theo:main1}
|
||||||
respectively, it holds
|
respectively, it holds
|
||||||
\[
|
\[
|
||||||
\plimn \abs{F_n^{\tilde{\lambda}}(\mathcal{RN}^{*,\tilde{\lambda}}) -
|
\plimn \abs{F_n^{\tilde{\lambda}}(\mathcal{RN}^{*,\tilde{\lambda}}) -
|
||||||
F^{\lambda, g}(f^{w*, \tilde{\lambda}})} = 0.
|
F^{\lambda, g}(f^{w*, \tilde{\lambda}})} = 0.
|
||||||
\]
|
\]
|
||||||
\proof Notes on the proof are given in Proof~\ref{proof:lem16}.
|
\proof The proof is given in appendix...
|
||||||
\label{lem:s4}
|
\label{lem:s4}
|
||||||
\end{Lemma}
|
\end{Lemma}
|
||||||
|
|
||||||
\begin{Lemma}
|
\begin{Lemma}[Step 7]
|
||||||
For any $\lambda > 0$, $N \in \mathbb{N}$, and training data $(x_i^{\text{train}},
|
For any $\lambda > 0$ and training data $(x_i^{\text{train}},
|
||||||
y_i^{\text{train}}) \in \mathbb{R}^2$, with $i \in
|
y_i^{\text{train}}) \in \mathbb{R}^2, \, i \in
|
||||||
\left\{1,\dots,N\right\}$, for any sequence of functions $f^n \in
|
\left\{1,\dots,N\right\}$, for any sequence of functions $f^n \in
|
||||||
W^{2,2}$ with
|
W^{2,2}$ with
|
||||||
\[
|
\[
|
||||||
@ -865,45 +824,39 @@ given in \textcite{heiss2019} and Appendix~\ref{appendix:proofs}.
|
|||||||
\[
|
\[
|
||||||
\plimn \norm{f^n - f^{*, \lambda}} = 0.
|
\plimn \norm{f^n - f^{*, \lambda}} = 0.
|
||||||
\]
|
\]
|
||||||
\proof Notes on the proof are given in Proof~\ref{proof:lem19}.
|
\proof The proof is given in appendix ...
|
||||||
\label{lem:s7}
|
\label{lem:s7}
|
||||||
\end{Lemma}
|
\end{Lemma}
|
||||||
Using these lemmata we can now proof Theorem~\ref{theo:main1}. We
|
Using these lemmata we can now proof Theorem~\ref{theo:main1}. We
|
||||||
start by showing that the error measure of the smooth approximation of
|
start by showing that the error measure of the smooth approximation of
|
||||||
the ridge penalized randomized shallow neural network $F^{\lambda,
|
the ridge penalized randomized shallow neural network $F^{\lambda,
|
||||||
g}(f^{w^{*,\tilde{\lambda}}})$
|
g}\left(f^{w^{*,\tilde{\lambda}}}\right)$
|
||||||
will converge in probability to the error measure of the adapted weighted regression
|
will converge in probability to the error measure of the adapted weighted regression
|
||||||
spline $F^{\lambda, g}\left(f^{*,\lambda}\right)$ for the specified
|
spline $F^{\lambda, g}\left(f^{*,\lambda}\right)$ for the specified
|
||||||
parameters.
|
parameters.
|
||||||
|
|
||||||
Using Lemma~\ref{lem:s4} we get that for every $P \in (0,1)$ and
|
Using Lemma~\ref{lem:s4} we get that for every $P \in (0,1)$ and
|
||||||
$\varepsilon > 0$ there exists a $n_1 \in \mathbb{N}$ such that
|
$\varepsilon > 0$ there exists a $n_1 \in \mathbb{N}$ such that
|
||||||
\begin{equation}
|
\[
|
||||||
\mathbb{P}\left[F^{\lambda, g}\left(f^{w^{*,\tilde{\lambda}}}\right) \in
|
\mathbb{P}\left[F^{\lambda, g}\left(f^{w^{*,\tilde{\lambda}}}\right) \in
|
||||||
F_n^{\tilde{\lambda}}\left(\mathcal{RN}^{*,\tilde{\lambda}}\right)
|
F_n^{\tilde{\lambda}}\left(\mathcal{RN}^{*,\tilde{\lambda}}\right)
|
||||||
+[-\varepsilon, \varepsilon]\right] > P, \forall n \in
|
+[-\varepsilon, \varepsilon]\right] > P, \forall n \in \mathbb{N}_{> n_1}.
|
||||||
\mathbb{N}_{> n_1}.
|
\]
|
||||||
\label{eq:squeeze_1}
|
|
||||||
\end{equation}
|
|
||||||
As $\mathcal{RN}^{*,\tilde{\lambda}}$ is the optimal network for
|
As $\mathcal{RN}^{*,\tilde{\lambda}}$ is the optimal network for
|
||||||
$F_n^{\tilde{\lambda}}$ we know that
|
$F_n^{\tilde{\lambda}}$ we know that
|
||||||
\begin{equation}
|
\[
|
||||||
F_n^{\tilde{\lambda}}\left(\mathcal{RN}^{*,\tilde{\lambda}}\right)
|
F_n^{\tilde{\lambda}}\left(\mathcal{RN}^{*,\tilde{\lambda}}\right)
|
||||||
\leq F_n^{\tilde{\lambda}}\left(\mathcal{RN}_{\tilde{w}}\right).
|
\leq F_n^{\tilde{\lambda}}\left(\mathcal{RN}_{\tilde{w}}\right).
|
||||||
\label{eq:squeeze_2}
|
\]
|
||||||
\end{equation}
|
|
||||||
Using Lemma~\ref{lem:s2} we get that for every $P \in (0,1)$ and
|
Using Lemma~\ref{lem:s2} we get that for every $P \in (0,1)$ and
|
||||||
$\varepsilon > 0$ a $n_2 \in \mathbb{N}$ exists such that
|
$\varepsilon > 0$ there exists a $n_2 \in \mathbb{N}$ such that
|
||||||
\begin{equation}
|
\[
|
||||||
\mathbb{P}\left[F_n^{\tilde{\lambda}}\left(\mathcal{RN}_{\tilde{w}}\right)
|
\mathbb{P}\left[F_n^{\tilde{\lambda}}\left(\mathcal{RN}_{\tilde{w}}\right)
|
||||||
\in F^{\lambda, g}\left(f^{*,\lambda}_g\right)+[-\varepsilon,
|
\in F^{\lambda, g}\left(f^{*,\lambda}_g\right)+[-\varepsilon,
|
||||||
\varepsilon]\right] > P, \forall n \in \mathbb{N}_{> n_2}.
|
\varepsilon]\right] > P, \forall n \in \mathbb{N}_{> n_2}.
|
||||||
\label{eq:squeeze_3}
|
\]
|
||||||
\end{equation}
|
If we combine these ... we get that for every $P \in (0,1)$ and
|
||||||
Combining (\ref{eq:squeeze_1}), (\ref{eq:squeeze_2}), and
|
$\varepsilon > 0$ and $n_3 \geq
|
||||||
(\ref{eq:squeeze_3}) we get that for every $P \in (0,1)$ and for \linebreak
|
|
||||||
every
|
|
||||||
$\varepsilon > 0$ with $n_3 \geq
|
|
||||||
\max\left\{n_1,n_2\right\}$
|
\max\left\{n_1,n_2\right\}$
|
||||||
\[
|
\[
|
||||||
\mathbb{P}\left[F^{\lambda,
|
\mathbb{P}\left[F^{\lambda,
|
||||||
@ -911,52 +864,45 @@ $\varepsilon > 0$ with $n_3 \geq
|
|||||||
g}\left(f^{*,\lambda}_g\right)+2\varepsilon\right] > P, \forall
|
g}\left(f^{*,\lambda}_g\right)+2\varepsilon\right] > P, \forall
|
||||||
n \in \mathbb{N}_{> n_3}.
|
n \in \mathbb{N}_{> n_3}.
|
||||||
\]
|
\]
|
||||||
As $\supp(f^{w^{*,\tilde{\lambda}}}) \subseteq \supp(g_\xi)$ and $f^{*,\lambda}_g$ is optimal we know that
|
As ... is in ... and ... is optimal we know that
|
||||||
\[
|
\[
|
||||||
F^{\lambda, g}\left(f^{*,\lambda}_g\right) \leq F^{\lambda,
|
F^{\lambda, g}\left(f^{*,\lambda}_g\right) \leq F^{\lambda, g}\left(f^{w^{*,\tilde{\lambda}}}\right)
|
||||||
g}\left(f^{w^{*,\tilde{\lambda}}}\right)
|
|
||||||
\]
|
\]
|
||||||
and thus get with the squeeze theorem
|
and thus get with the squeeze theorem
|
||||||
\[
|
\[
|
||||||
\plimn F^{\lambda, g}\left(f^{w^{*,\tilde{\lambda}}}\right) = F^{\lambda, g}\left(f^{*,\lambda}_g\right).
|
\plimn F^{\lambda, g}\left(f^{w^{*,\tilde{\lambda}}}\right) = F^{\lambda, g}\left(f^{*,\lambda}_g\right).
|
||||||
\]
|
\]
|
||||||
With Lemma~\ref{lem:s7} it follows that
|
We can now use Lemma~\ref{lem:s7} to follow that
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\plimn \norm{f^{w^{*,\tilde{\lambda}}} - f^{*,\lambda}_g}
|
\plimn \norm{f^{w^{*,\tilde{\lambda}}} - f^{*,\lambda}_g}
|
||||||
_{W^{1,\infty}} = 0.
|
_{W^{1,\infty}} = 0.
|
||||||
\label{eq:main4}
|
\label{eq:main2}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
By using the triangle inequality with Lemma~\ref{lem:s3} and
|
Now by using the triangle inequality with Lemma~\ref{lem:s3} and
|
||||||
(\ref{eq:main4}) we get
|
(\ref{eq:main2}) we get
|
||||||
\begin{multline}
|
\begin{align*}
|
||||||
\plimn \norm{\mathcal{RN}^{*, \tilde{\lambda}} - f_g^{*,\lambda}}\\
|
\plimn \norm{\mathcal{RN}^{*, \tilde{\lambda}} - f_g^{*,\lambda}}
|
||||||
\leq \plimn \bigg(\norm{\mathcal{RN}^{*, \tilde{\lambda}} -
|
\leq& \plimn \bigg(\norm{\mathcal{RN}^{*, \tilde{\lambda}} -
|
||||||
f_g^{w^{*,\tilde{\lambda}}}}_{W^{1,\infty}}
|
f_g^{w^{*,\tilde{\lambda}}}}_{W^{1,\infty}}\\
|
||||||
+ \norm{f^{w^{*,\tilde{\lambda}}} - f^{*,\lambda}_g}
|
&+ \norm{f^{w^{*,\tilde{\lambda}}} - f^{*,\lambda}_g}
|
||||||
_{W^{1,\infty}}\bigg) = 0
|
_{W^{1,\infty}}\bigg) = 0
|
||||||
\end{multline}
|
\end{align*}
|
||||||
and thus have proven Theorem~\ref{theo:main1}.
|
and thus have proven Theorem~\ref{theo:main1}.
|
||||||
|
|
||||||
We now know that randomized shallow neural networks behave similar to
|
We now know that randomized shallow neural networks behave similar to
|
||||||
spline regression if we regularize the size of the weights during
|
spline regression if we regularize the size of the weights during
|
||||||
training.
|
training.
|
||||||
|
|
||||||
\textcite{heiss2019} further explore a connection between ridge penalized
|
\textcite{heiss2019} further explore a connection between ridge penalized
|
||||||
networks and randomized shallow neural networks trained using gradient
|
networks and randomized shallow neural networks which are trained
|
||||||
|
which are only trained for a certain amount of epoch using gradient
|
||||||
descent.
|
descent.
|
||||||
They infer that the effect of weight regularization
|
And ... that the effect of weight regularization can be achieved by
|
||||||
can be achieved by stopping the training of the randomized shallow
|
training for a certain amount of iterations this ... between adapted
|
||||||
neural network early, with the number of iterations being proportional to
|
weighted regression splines and randomized shallow neural networks
|
||||||
the tuning parameter penalizing the size of the weights.
|
where training is stopped early.
|
||||||
They use this to further conclude that for a large number of training epochs and number of
|
|
||||||
neurons shallow neural networks trained with gradient descent are
|
|
||||||
very close to spline interpolations. Alternatively if the training
|
|
||||||
is stopped early, they are close to adapted weighted cubic smoothing splines.
|
|
||||||
|
|
||||||
\newpage
|
\newpage
|
||||||
\subsection{Simulations}
|
\subsection{Simulations}
|
||||||
\label{sec:rsnn_sim}
|
In the following the behaviour described in Theorem~\ref{theo:main1}
|
||||||
In the following the behavior described in Theorem~\ref{theo:main1}
|
|
||||||
is visualized in a simulated example. For this two sets of training
|
is visualized in a simulated example. For this two sets of training
|
||||||
data have been generated.
|
data have been generated.
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
@ -989,28 +935,22 @@ Theorem~\ref{theo:main1}
|
|||||||
would equate to $g(x) = \frac{\mathbb{E}[v_k^2|\xi_k = x]}{10}$. In
|
would equate to $g(x) = \frac{\mathbb{E}[v_k^2|\xi_k = x]}{10}$. In
|
||||||
order to utilize the
|
order to utilize the
|
||||||
smoothing spline implemented in Mathlab, $g$ has been simplified to $g
|
smoothing spline implemented in Mathlab, $g$ has been simplified to $g
|
||||||
\equiv \frac{1}{10}$ instead.
|
\equiv \frac{1}{10}$ instead. For all figures $f_1^{*, \lambda}$ has
|
||||||
|
been calculated with Matlab's ..... As ... minimizes
|
||||||
For all figures $f_1^{*, \lambda}$ has
|
|
||||||
been calculated with Matlab's {\sffamily{smoothingspline}}, as this minimizes
|
|
||||||
\[
|
\[
|
||||||
\bar{\lambda} \sum_{i=1}^N(y_i^{train} - f(x_i^{train}))^2 + (1 -
|
\bar{\lambda} \sum_{i=1}^N(y_i^{train} - f(x_i^{train}))^2 + (1 -
|
||||||
\bar{\lambda}) \int (f''(x))^2 dx
|
\bar{\lambda}) \int (f''(x))^2 dx
|
||||||
\]
|
\]
|
||||||
the smoothing parameter used for fitment is $\bar{\lambda} =
|
the smoothing parameter used for fittment is $\bar{\lambda} =
|
||||||
\frac{1}{1 + \lambda}$. The parameter $\tilde{\lambda}$ for training
|
\frac{1}{1 + \lambda}$. The parameter $\tilde{\lambda}$ for training
|
||||||
the networks is chosen as defined in Theorem~\ref{theo:main1}.
|
the networks is chosen as defined in Theorem~\ref{theo:main1} and each
|
||||||
|
one is trained on the full training data for 5000 epoch using
|
||||||
Each
|
|
||||||
network contains 10.000 hidden nodes and is trained on the full
|
|
||||||
training data for 100.000 epochs using
|
|
||||||
gradient descent. The
|
gradient descent. The
|
||||||
results are given in Figure~\ref{fig:rn_vs_rs}, where it can be seen
|
results are given in Figure~\ref{fig:rs_vs_rs}, here it can be seen that in
|
||||||
that the neural network and
|
the intervall of the traing data $[-\pi, \pi]$ the neural network and
|
||||||
smoothing spline are nearly identical, coinciding with the
|
smoothing spline are nearly identical, coinciding with the proposition.
|
||||||
proposition.
|
|
||||||
|
|
||||||
\input{Figures/RN_vs_RS}
|
\input{Plots/RN_vs_RS}
|
||||||
|
|
||||||
|
|
||||||
%%% Local Variables:
|
%%% Local Variables:
|
||||||
|
Loading…
Reference in New Issue
Block a user