You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
343 lines
14 KiB
TeX
343 lines
14 KiB
TeX
|
|
\newpage
|
|
\begin{appendices}
|
|
\counterwithin{lstfloat}{section}
|
|
\section{Notes on Proofs of Lemmata in Section~\ref{sec:conv}}
|
|
\label{appendix:proofs}
|
|
Contrary to \textcite{heiss2019} we do not make the distinction between $f_+$ and
|
|
$f_-$.
|
|
This results in some alterations in the proofs being necessary. In
|
|
the following the affected proofs and the required changes are given.
|
|
% Because of that slight alterations are needed in the proofs of
|
|
% .. auxiliary lemmata.
|
|
% Alterations that go beyond substituting $F_{+-}^{}$
|
|
% As the proofs are ... for the most part only
|
|
% the alterations needed are specified.
|
|
|
|
|
|
|
|
|
|
% In the following there will be proofs for some important Lemmata in
|
|
% Section~\ref{sec:theo38}. Further proofs not discussed here can be
|
|
% found in \textcite{heiss2019}
|
|
% The proves in this section are based on \textcite{heiss2019}. Slight
|
|
% alterations have been made to accommodate for not splitting $f$ into
|
|
% $f_+$ and $f_-$.
|
|
% \begin{Theorem}[Proof of Lemma~\ref{theo38}]
|
|
% \end{Theorem}
|
|
|
|
% \begin{Lemma}[$\frac{w^{*,\tilde{\lambda}}_k}{v_k}\approx\mathcal{O}(\frac{1}{n})$]
|
|
% For any $\lambda > 0$ and training data $(x_i^{\text{train}},
|
|
% y_i^{\text{train}}) \in \mathbb{R}^2, \, i \in
|
|
% \left\{1,\dots,N\right\}$, we have
|
|
% \[
|
|
% \max_{k \in \left\{1,\dots,n\right\}} \frac{w^{*,
|
|
% \tilde{\lambda}}_k}{v_k} = \po_{n\to\infty}
|
|
% \]
|
|
|
|
|
|
% \end{Lemma}
|
|
|
|
\begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma A.9)]~\\\noindent
|
|
\label{proof:lem9}
|
|
With $\tilde{\lambda} \coloneqq \lambda n g(0)$ Lemma~\ref{lem:cnvh} follows
|
|
analogously when considering $\tilde{w}$, $f_g^{*, \lambda}$, and $h_k$
|
|
instead of $\tilde{w}^+$, $f_{g,+}^{*, \lambda}$, and $\bar{h}_k$.
|
|
Consider $\kappa = \left\{1, \dots, n \right\}$ for $n$ nodes
|
|
instead of $\kappa^+$. With $h_k = \frac{1}{n g_\xi(\xi_n)}$
|
|
instead of $\bar{h}_k$
|
|
and \[
|
|
\mathbb{E} \left[\abs{\left\{m \in \kappa : \xi_m \in [\delta l,
|
|
\delta(l+1))\right\}}\right] = n \int_{\delta
|
|
l}^{\delta(l+1)}g_\xi (x) dx \approx n (\delta g_\xi(\delta l)
|
|
\pm \delta \tilde{\varepsilon}).
|
|
\]
|
|
% \[
|
|
% \sum_{k \in \kappa : \xi_k < T} \varphi(\xi_k, v_k)
|
|
% h_{k,n} = \sum_{\substack{l \in \mathbb{Z} \\ [\delta l, \delta
|
|
% (l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T \}]}}
|
|
% \left(\sum_{\substack{k \in \kappa \\ \xi_k \in
|
|
% [\delta l , \delta(l+1))}} \varphi(\xi_k, v_k)
|
|
% h_{k,n}\right) \approx
|
|
% \]
|
|
% \[
|
|
% \approx \sum_{\substack{l \in \mathbb{Z} \\ [\delta l, \delta
|
|
% (l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T \}]}}
|
|
% \left(\sum_{\substack{k \in \kappa \\ \xi_k \in
|
|
% [\delta l , \delta(l+1))}} \left(\varphi(\delta l, v_k)
|
|
% \frac{1}{n g_\xi (\delta l)} \pm \frac{\varepsilon}{n}\right)
|
|
% \frac{\abs{\left\{m \in \kappa : \xi_m \in [\delta l,
|
|
% \delta(l+1))\right\}}}{\abs{\left\{m \in \kappa : \xi_m
|
|
% \in [\delta l, \delta(l+1))\right\}}}\right)
|
|
% \]
|
|
% \[
|
|
% \approx \sum_{\substack{l \in \mathbb{Z} \\ [\delta l, \delta
|
|
% (l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T \}]}}
|
|
% \left(\frac{\sum_{\substack{k \in \kappa \\ \xi_k \in
|
|
% [\delta l , \delta(l+1))}}\varphi(\delta l,
|
|
% v_k)}{\abs{\left\{m \in \kappa : \xi_m
|
|
% \in [\delta l, \delta(l+1))\right\}}}
|
|
% \frac{\abs{\left\{m \in \kappa : \xi_m \in [\delta l,
|
|
% \delta(l+1))\right\}}}{n g_\xi (\delta l)}\right) \pm \varepsilon
|
|
% \]
|
|
% The amount of kinks in a given interval of length $\delta$ follows a
|
|
% binomial distribution,
|
|
% \[
|
|
% \mathbb{E} \left[\abs{\left\{m \in \kappa : \xi_m \in [\delta l,
|
|
% \delta(l+1))\right\}}\right] = n \int_{\delta
|
|
% l}^{\delta(l+1)}g_\xi (x) dx \approx n (\delta g_\xi(\delta l)
|
|
% \pm \delta \tilde{\varepsilon}),
|
|
% \]
|
|
% for any $\delta \leq \delta(\varepsilon, \tilde{\varepsilon})$, since $g_\xi$ is uniformly continuous on its
|
|
% support by Assumption..
|
|
% As the distribution of $v$ is continuous as well we get that
|
|
% $\mathcal{L}(v_k) = \mathcal{L} v| \xi = \delta l) \forall k \in
|
|
% \kappa : \xi_k \in [\delta l, \delta(l+1))$ for $\delta \leq
|
|
% \delta(\varepsilon, \tilde{\varepsilon})$. Thus we get with the law of
|
|
% large numbers
|
|
% \begin{align*}
|
|
% &\sum_{k \in \kappa : \xi_k < T} \varphi(\xi_k, v_k)
|
|
% h_{k,n} \approx\\
|
|
% &\approx \sum_{\substack{l \in \mathbb{Z} \\ [\delta l, \delta
|
|
% (l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T
|
|
% \}]}}\left(\mathbb{E}[\phi(\xi, v)|\xi=\delta l]
|
|
% \stackrel{\mathbb{P}}{\pm}\right) \delta \left(1 \pm
|
|
% \frac{\tilde{\varepsilon}}{g_\xi(\delta l)}\right) \pm \varepsilon
|
|
% \\
|
|
% &\approx \left(\sum_{\substack{l \in \mathbb{Z} \\ [\delta
|
|
% l, \delta
|
|
% (l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T
|
|
% \}]}}\mathbb{E}[\phi(\xi, v)|\xi=\delta l] \delta
|
|
% \stackrel{\mathbb{P}}{\pm}\tilde{\tilde{\varepsilon}}
|
|
% \abs{C_{g_\xi}^u - C_{g_\xi}^l}
|
|
% \right)\\
|
|
% &\phantom{\approx}\cdot \left(1 \pm
|
|
% \frac{\tilde{\varepsilon}}{g_\xi(\delta l)}\right) \pm \varepsilon
|
|
% \end{align*}
|
|
\end{Proof}
|
|
|
|
% \begin{Lemma}[($L(f_n) \to L(f)$), Heiss, Teichmann, and
|
|
% Wutte (2019, Lemma A.11)]
|
|
% For any data $(x_i^{\text{train}}, y_i^{\text{train}}) \in
|
|
% \mathbb{R}^2, i \in \left\{1,\dots,N\right\}$, let $(f_n)_{n \in
|
|
% \mathbb{N}}$ be a sequence of functions that converges point-wise
|
|
% in probability to a function $f : \mathbb{R}\to\mathbb{R}$, then the
|
|
% loss $L$ of $f_n$ converges is probability to $L(f)$ as $n$ tends to
|
|
% infinity,
|
|
% \[
|
|
% \plimn L(f_n) = L(f).
|
|
% \]
|
|
% \proof Vgl. ...
|
|
% \end{Lemma}
|
|
|
|
\begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma A.12)]~\\\noindent
|
|
\label{proof:lem12}
|
|
With $\tilde{\lambda} \coloneqq \lambda n g(0)$ Lemma~\ref{lem:s2} follows
|
|
analogously when considering $\tilde{w}$, $f_g^{*, \lambda}$, and $h_k$
|
|
instead of $\tilde{w}^+$, $f_{g,+}^{*, \lambda}$, and $\bar{h}_k$.
|
|
% We start by showing that
|
|
% \[
|
|
% \plimn \tilde{\lambda} \norm{\tilde{w}}_2^2 = \lambda g(0)
|
|
% \left(\int \frac{\left(f_g^{*,\lambda''}\right)^2}{g(x)} dx\right)
|
|
% \]
|
|
% With the definitions of $\tilde{w}$, $\tilde{\lambda}$ and
|
|
% $h$ we have
|
|
% \begin{align*}
|
|
% \tilde{\lambda} \norm{\tilde{w}}_2^2
|
|
% &= \tilde{\lambda} \sum_{k \in
|
|
% \kappa}\left(f_g^{*,\lambda''}(\xi_k) \frac{h_k
|
|
% v_k}{\mathbb{E}v^2|\xi = \xi_k]}\right)^2\\
|
|
% &= \tilde{\lambda} \sum_{k \in
|
|
% \kappa}\left(\left(f_g^{*,\lambda''}\right)^2(\xi_k) \frac{h_k
|
|
% v_k^2}{\mathbb{E}v^2|\xi = \xi_k]}\right) h_k\\
|
|
% & = \lambda g(0) \sum_{k \in
|
|
% \kappa}\left(\left(f_g^{*,\lambda''}\right)^2(\xi_k)\frac{v_k^2}{g_\xi(\xi_k)\mathbb{E}
|
|
% [v^2|\xi=\xi_k]}\right)h_k.
|
|
% \end{align*}
|
|
% By using Lemma~\ref{lem} with $\phi(x,y) =
|
|
% \left(f_g^{*,\lambda''}\right)^2(x)\frac{y^2}{g_\xi(\xi)\mathbb{E}[v^2|\xi=y]}$
|
|
% this converges to
|
|
% \begin{align*}
|
|
% &\plimn \tilde{\lambda}\norm{\tilde{w}}_2^2 = \\
|
|
% &=\lambda
|
|
% g_\xi(0)\mathbb{E}[v^2|\xi=0]\int_{\supp{g_\xi}}\mathbb{E}\left[
|
|
% \left(f_g^{*,\lambda''}\right)^2(\xi)\frac{v^2}{
|
|
% g_\xi(\xi)\mathbb{E}[v^2|\xi=x]^2}\Big{|} \xi = x\right]dx\\
|
|
% &=\lambda g_\xi(0) \mathbb{E}[v^2|\xi=0] \int_{\supp{g_xi}}
|
|
% \frac{\left(f_g^{*,\lambda''}\right)^2 (x)}{g_\xi(x)
|
|
% \mathbb{E}[v^2|\xi=x]} dx \\
|
|
% &=\lambda g(0) \int_{\supp{g_\xi}} \frac{\left(f_g^{*,\lambda''}\right)^2}{g(x)}dx.
|
|
% \end{align*}
|
|
\end{Proof}
|
|
|
|
\begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma A.14)]~\\\noindent
|
|
\label{proof:lem14}
|
|
Substitute $F_{+-}^{\lambda, g}\left(f_{g,+}^{*,\lambda},
|
|
f_{g,-}^{*,\lambda}\right)$ with $F^{\lambda,g}\left(f_g^{*,\lambda}\right)$.
|
|
\end{Proof}
|
|
% \begin{Lemma}[Heiss, Teichmann, and
|
|
% Wutte (2019, Lemma A.13)]
|
|
% Using the notation of Definition .. and ... the following statement
|
|
% holds:
|
|
% $\forall \varepsilon \in \mathbb{R}_{>0} : \exists \delta \in
|
|
% \mathbb{R}_{>0} : \forall \omega \in \Omega : \forall l, l' \in
|
|
% \left\{1,\dots,N\right\} : \forall n \in \mathbb{N}$
|
|
% \[
|
|
% \left(\abs{\xi_l(\omega) - \xi_{l'}(\omega)} < \delta \wedge
|
|
% \text{sign}(v_l(\omega)) = \text{sign}(v_{l'}(\omega))\right)
|
|
% \implies \abs{\frac{w_l^{*, \tilde{\lambda}}(\omega)}{v_l(\omega)}
|
|
% - \frac{w_{l'}^{*, \tilde{\lambda}}(\omega)}{v_{l'}(\omega)}} <
|
|
% \frac{\varepsilon}{n},
|
|
% \]
|
|
% if we assume that $v_k$ is never zero.
|
|
% \proof given in ..
|
|
% \end{Lemma}
|
|
|
|
% \begin{Lemma}[$\frac{w^{*,\tilde{\lambda}}}{v} \approx
|
|
% \mathcal{O}(\frac{1}{n})$, Heiss, Teichmann, and
|
|
% Wutte (2019, Lemma A.14)]
|
|
% For any $\lambda > 0$ and data $(x_i^{\text{train}},
|
|
% y_i^{\text{train}}) \in \mathbb{R}^2, i\in
|
|
% \left\{1,\dots,\right\}$, we have
|
|
% \[
|
|
% \forall P \in (0,1) : \exists C \in \mathbb{R}_{>0} : \exists
|
|
% n_0 \in \mathbb{N} : \forall n > n_0 : \mathbb{P}
|
|
% \left[\max_{k\in \left\{1,\dots,n\right\}}
|
|
% \frac{w_k^{*,\tilde{\lambda}}}{v_k} < C
|
|
% \frac{1}{n}\right] > P
|
|
% % \max_{k\in \left\{1,\dots,n\right\}}
|
|
% % \frac{w_k^{*,\tilde{\lambda}}}{v_k} = \plimn
|
|
% \]
|
|
% \proof
|
|
|
|
|
|
% Let $k^*_+ \in \argmax_{k\in
|
|
% \left\{1,\dots,n\right\}}\frac{w^{*,\tilde{\lambda}}}{v_k} : v_k
|
|
% > 0$ and $k^*_- \in \argmax_{k\in
|
|
% \left\{1,\dots,n\right\}}\frac{w^{*,\tilde{\lambda}}}{v_k} : v_k
|
|
% < 0$. W.l.o.g. assume $\frac{w_{k_+^*}^2}{v_{k_+^*}^2} \geq
|
|
% \frac{w_{k_-^*}^2}{v_{k_-^*}^2}$
|
|
% \begin{align*}
|
|
% \frac{F^{\lambda,
|
|
% g}\left(f^{*,\lambda}_g\right)}{\tilde{\lambda}}
|
|
% \makebox[2cm][c]{$\stackrel{\mathbb{P}}{\geq}$}
|
|
% & \frac{1}{2 \tilde{\lambda}}
|
|
% F_n^{\tilde{\lambda}}\left(\mathcal{RN}^{*,\tilde{\lambda}}\right)
|
|
% = \frac{1}{2 \tilde{\lambda}}\left[\sum ... + \tilde{\lambda} \norm{w}_2^2\right]
|
|
% \\
|
|
% \makebox[2cm][c]{$\geq$}
|
|
% & \frac{1}{2}\left( \sum_{\substack{k: v_k
|
|
% > 0 \\\xi_k\in(\xi_{k^*}, \xi_{k^*}
|
|
% + \delta)}} \left(w_k^{*,\tilde{\lambda}}\right)^2 +
|
|
% \sum_{\substack{k: v_k < 0 \\\xi_k\in(\xi_{k^*}, \xi_{k^*}
|
|
% + \delta)}} \left(w_k^{*,\tilde{\lambda}}\right)^2\right) \\
|
|
% \makebox[2cm][c]{$\overset{\text{Lem. A.6}}{\underset{\delta \text{
|
|
% small enough}}{\geq}} $}
|
|
% &
|
|
% \frac{1}{4}\left(\left(\frac{w_{k_+^*}^{*,\tilde{\lambda}}}
|
|
% {v_{k_+^*}}\right)^2\sum_{\substack{k:
|
|
% v_k > 0 \\\xi_k\in(\xi_{k^*}, \xi_{k^*} + \delta)}}v_k^2 +
|
|
% \left(\frac{w_{k_-^*}^{*,\tilde{\lambda}}}{v_{k_-^*}}\right)^2
|
|
% \sum_{\substack{k:
|
|
% v_k < 0 \\\xi_k\in(\xi_{k^*}, \xi_{k^*} +
|
|
% \delta)}}v_k^2\right)\\
|
|
% \makebox[2cm][c]{$\stackrel{\mathbb{P}}{\geq}$}
|
|
% & \frac{1}{8}
|
|
% \left(\frac{w_{k_+^*}^{*,\tilde{\lambda}}}{v_{k^*}}\right)^2
|
|
% n \delta g_\xi(\xi_{k_+^*}) \mathbb{P}(v_k
|
|
% >0)\mathbb{E}[v_k^2|\xi_k = \xi_{k^*_+}]
|
|
% \end{align*}
|
|
|
|
% \end{Lemma}
|
|
|
|
\begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma A.15)]~\\\noindent
|
|
\label{proof:lem15}
|
|
Consider $\mathcal{RN}^{*,\tilde{\lambda}}$,
|
|
$f^{w^{*,\tilde{\lambda}}}$, and $\kappa = \left\{1, \dots, n
|
|
\right\}$ instead of $\mathcal{RN}_+^{*,\tilde{\lambda}}$,
|
|
$f_+^{w^{*,\tilde{\lambda}}}$, and $\kappa^+$.
|
|
Assuming w.l.o.g. $max_{k \in
|
|
\kappa^+}\abs{\frac{w_k^{*,\tilde{\lambda}}}{v_k}} \geq max_{k \in
|
|
\kappa^-}\abs{\frac{w_k^{*,\tilde{\lambda}}}{v_k}}$
|
|
Lemma~ref{lem:s3} follows analogously by multiplying (58b) with two.
|
|
\end{Proof}
|
|
|
|
\begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma
|
|
A.16)]~\\\noindent
|
|
\label{proof:lem16}
|
|
As we are considering $F^{\lambda,g}$ instead of
|
|
$F^{\lambda,g}_{+-}$ we need to substitute $2\lambda g(0)$ with
|
|
$\lambda g(0)$
|
|
and thus get
|
|
\[
|
|
\left(f^{w^{*,\tilde{\lambda}}}\right)''(x) \approx
|
|
\frac{w_{l_x}^{*,\tilde{\lambda}}}{v_{l_x}} n g_\xi(x)
|
|
\mathbb{E}\left[v_k^2|\xi_k = x\right] \stackrel{\mathbb{P}}{\pm} \varepsilon_3
|
|
\]
|
|
and use this to follow
|
|
\[
|
|
\lambda g(0)
|
|
\int_{\supp(g)}\hspace{-0.15cm}\frac{\left(\left(f^{w^{*,\tilde{\lambda}}}\right)''(x)\right)^2}{g(0)}dx
|
|
\approx \tilde{\lambda} n
|
|
\int_{\supp(g)}\left(\frac{w_{l_x}^{*,\tilde{\lambda}}}{v_{l_x}}\right)^2 \hspace{-0.1cm}
|
|
g_xi(x) \mathbb{E}\left[v_k^2|\xi_k=x\right]dx
|
|
\]
|
|
Analogous to the proof of \textcite{heiss2019} we get
|
|
\begin{align*}
|
|
\tilde{\lambda} \sum_{k \in \kappa}
|
|
\left(w_k^{*,\tilde{\lambda}}\right)^2
|
|
&= \tilde{\lambda} \sum_{k \in \kappa^+}
|
|
\left(w_k^{*,\tilde{\lambda}}\right)^2 + \tilde{\lambda} \sum_{k \in \kappa^-}
|
|
\left(w_k^{*,\tilde{\lambda}}\right)^2 \\
|
|
&\approx \left(\mathbb{P}[v_k <0] + \mathbb{P}[v_k >0]\right)\\
|
|
&\phantom{=}
|
|
\int_{\supp(g_xi)}
|
|
\left(\frac{w_{l_x}^{*,\tilde{\lambda}}}{v_{l_x}}\right)^2
|
|
g_\xi(x) \mathbb{E}\left[v_k^2|\xi_k = x\right] dx
|
|
\stackrel{\mathbb{P}}{\pm} \varepsilon_9 \\
|
|
&= \int_{\supp{g_xi}}
|
|
\left(\frac{w_{l_x}^{*,\tilde{\lambda}}}{v_{l_x}}\right)^2
|
|
g_\xi(x) \mathbb{E}\left[v_k^2|\xi_k = x\right] dx
|
|
\stackrel{\mathbb{P}}{\pm} \varepsilon_9.
|
|
\end{align*}
|
|
With these transformations Lemma~\ref{lem:s4} follows analogously.
|
|
\end{Proof}
|
|
|
|
\begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma A.19)]~\\\noindent
|
|
\label{proof:lem19}
|
|
The proof works analogously if $F_{+-}^{\lambda,g}$ is substituted
|
|
by
|
|
\begin{align*}
|
|
F_{+-}^{\lambda,g '}(f_+, f_-) =
|
|
& \sum_{i =
|
|
1}^N \left(f(x_i^{\text{train}}) -
|
|
y_i^{\text{train}}\right)^2 \\
|
|
& + \lambda g(0) \left(\int_{\supp(g)}\frac{\left(f_+''(x)\right)^2}{g(x)}
|
|
dx + \int_{\supp(g)}\frac{\left(f''_-(x)\right)^2}{g(x)}
|
|
dx\right).
|
|
\end{align*}
|
|
As for $f^n = f_+^n + f_-^n$ such that $\supp(f_+^n) \cap \supp(f_-^n) =
|
|
\emptyset$ and $h = h_+ + h_-$ such that $\supp(h_+) \cap \supp(h_-) =
|
|
\emptyset$ it holds
|
|
\[
|
|
\plimn F^{\lambda, g}(f^n) = F^{\lambda, g}(h) \implies
|
|
\plimn F_{+-}^{\lambda,g '}(f_+,f_-) = F_{+-}^{\lambda,g '}(h_+,h_-),
|
|
\]
|
|
and all functions can be split in two functions with disjoint support,
|
|
Lemma~\ref{lem:s7} follows.
|
|
\end{Proof}
|
|
\input{Appendix_code.tex}
|
|
|
|
\end{appendices}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
%%% Local Variables:
|
|
%%% mode: latex
|
|
%%% TeX-master: "main"
|
|
%%% End:
|
|
|