You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

343 lines
14 KiB
TeX

\newpage
\begin{appendices}
4 years ago
\counterwithin{lstfloat}{section}
\section{Notes on Proofs of Lemmata in Section~\ref{sec:conv}}
\label{appendix:proofs}
Contrary to \textcite{heiss2019} we do not make the distinction between $f_+$ and
$f_-$.
This results in some alterations in the proofs being necessary. In
the following the affected proofs and the required changes are given.
% Because of that slight alterations are needed in the proofs of
% .. auxiliary lemmata.
% Alterations that go beyond substituting $F_{+-}^{}$
% As the proofs are ... for the most part only
% the alterations needed are specified.
% In the following there will be proofs for some important Lemmata in
% Section~\ref{sec:theo38}. Further proofs not discussed here can be
% found in \textcite{heiss2019}
% The proves in this section are based on \textcite{heiss2019}. Slight
% alterations have been made to accommodate for not splitting $f$ into
% $f_+$ and $f_-$.
% \begin{Theorem}[Proof of Lemma~\ref{theo38}]
% \end{Theorem}
% \begin{Lemma}[$\frac{w^{*,\tilde{\lambda}}_k}{v_k}\approx\mathcal{O}(\frac{1}{n})$]
% For any $\lambda > 0$ and training data $(x_i^{\text{train}},
% y_i^{\text{train}}) \in \mathbb{R}^2, \, i \in
% \left\{1,\dots,N\right\}$, we have
% \[
% \max_{k \in \left\{1,\dots,n\right\}} \frac{w^{*,
% \tilde{\lambda}}_k}{v_k} = \po_{n\to\infty}
% \]
4 years ago
% \end{Lemma}
\begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma A.9)]~\\\noindent
\label{proof:lem9}
With $\tilde{\lambda} \coloneqq \lambda n g(0)$ Lemma~\ref{lem:cnvh} follows
analogously when considering $\tilde{w}$, $f_g^{*, \lambda}$, and $h_k$
instead of $\tilde{w}^+$, $f_{g,+}^{*, \lambda}$, and $\bar{h}_k$.
Consider $\kappa = \left\{1, \dots, n \right\}$ for $n$ nodes
instead of $\kappa^+$. With $h_k = \frac{1}{n g_\xi(\xi_n)}$
instead of $\bar{h}_k$
and \[
\mathbb{E} \left[\abs{\left\{m \in \kappa : \xi_m \in [\delta l,
\delta(l+1))\right\}}\right] = n \int_{\delta
l}^{\delta(l+1)}g_\xi (x) dx \approx n (\delta g_\xi(\delta l)
\pm \delta \tilde{\varepsilon}).
\]
% \[
% \sum_{k \in \kappa : \xi_k < T} \varphi(\xi_k, v_k)
% h_{k,n} = \sum_{\substack{l \in \mathbb{Z} \\ [\delta l, \delta
% (l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T \}]}}
% \left(\sum_{\substack{k \in \kappa \\ \xi_k \in
% [\delta l , \delta(l+1))}} \varphi(\xi_k, v_k)
% h_{k,n}\right) \approx
% \]
% \[
% \approx \sum_{\substack{l \in \mathbb{Z} \\ [\delta l, \delta
% (l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T \}]}}
% \left(\sum_{\substack{k \in \kappa \\ \xi_k \in
% [\delta l , \delta(l+1))}} \left(\varphi(\delta l, v_k)
% \frac{1}{n g_\xi (\delta l)} \pm \frac{\varepsilon}{n}\right)
% \frac{\abs{\left\{m \in \kappa : \xi_m \in [\delta l,
% \delta(l+1))\right\}}}{\abs{\left\{m \in \kappa : \xi_m
% \in [\delta l, \delta(l+1))\right\}}}\right)
% \]
% \[
% \approx \sum_{\substack{l \in \mathbb{Z} \\ [\delta l, \delta
% (l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T \}]}}
% \left(\frac{\sum_{\substack{k \in \kappa \\ \xi_k \in
% [\delta l , \delta(l+1))}}\varphi(\delta l,
% v_k)}{\abs{\left\{m \in \kappa : \xi_m
% \in [\delta l, \delta(l+1))\right\}}}
% \frac{\abs{\left\{m \in \kappa : \xi_m \in [\delta l,
% \delta(l+1))\right\}}}{n g_\xi (\delta l)}\right) \pm \varepsilon
% \]
% The amount of kinks in a given interval of length $\delta$ follows a
% binomial distribution,
% \[
% \mathbb{E} \left[\abs{\left\{m \in \kappa : \xi_m \in [\delta l,
% \delta(l+1))\right\}}\right] = n \int_{\delta
% l}^{\delta(l+1)}g_\xi (x) dx \approx n (\delta g_\xi(\delta l)
% \pm \delta \tilde{\varepsilon}),
% \]
% for any $\delta \leq \delta(\varepsilon, \tilde{\varepsilon})$, since $g_\xi$ is uniformly continuous on its
% support by Assumption..
% As the distribution of $v$ is continuous as well we get that
% $\mathcal{L}(v_k) = \mathcal{L} v| \xi = \delta l) \forall k \in
% \kappa : \xi_k \in [\delta l, \delta(l+1))$ for $\delta \leq
% \delta(\varepsilon, \tilde{\varepsilon})$. Thus we get with the law of
% large numbers
% \begin{align*}
% &\sum_{k \in \kappa : \xi_k < T} \varphi(\xi_k, v_k)
% h_{k,n} \approx\\
% &\approx \sum_{\substack{l \in \mathbb{Z} \\ [\delta l, \delta
% (l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T
% \}]}}\left(\mathbb{E}[\phi(\xi, v)|\xi=\delta l]
% \stackrel{\mathbb{P}}{\pm}\right) \delta \left(1 \pm
% \frac{\tilde{\varepsilon}}{g_\xi(\delta l)}\right) \pm \varepsilon
% \\
% &\approx \left(\sum_{\substack{l \in \mathbb{Z} \\ [\delta
% l, \delta
% (l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T
% \}]}}\mathbb{E}[\phi(\xi, v)|\xi=\delta l] \delta
% \stackrel{\mathbb{P}}{\pm}\tilde{\tilde{\varepsilon}}
% \abs{C_{g_\xi}^u - C_{g_\xi}^l}
% \right)\\
% &\phantom{\approx}\cdot \left(1 \pm
% \frac{\tilde{\varepsilon}}{g_\xi(\delta l)}\right) \pm \varepsilon
% \end{align*}
\end{Proof}
% \begin{Lemma}[($L(f_n) \to L(f)$), Heiss, Teichmann, and
% Wutte (2019, Lemma A.11)]
% For any data $(x_i^{\text{train}}, y_i^{\text{train}}) \in
% \mathbb{R}^2, i \in \left\{1,\dots,N\right\}$, let $(f_n)_{n \in
% \mathbb{N}}$ be a sequence of functions that converges point-wise
% in probability to a function $f : \mathbb{R}\to\mathbb{R}$, then the
% loss $L$ of $f_n$ converges is probability to $L(f)$ as $n$ tends to
% infinity,
% \[
% \plimn L(f_n) = L(f).
% \]
% \proof Vgl. ...
% \end{Lemma}
\begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma A.12)]~\\\noindent
\label{proof:lem12}
With $\tilde{\lambda} \coloneqq \lambda n g(0)$ Lemma~\ref{lem:s2} follows
analogously when considering $\tilde{w}$, $f_g^{*, \lambda}$, and $h_k$
instead of $\tilde{w}^+$, $f_{g,+}^{*, \lambda}$, and $\bar{h}_k$.
% We start by showing that
% \[
% \plimn \tilde{\lambda} \norm{\tilde{w}}_2^2 = \lambda g(0)
% \left(\int \frac{\left(f_g^{*,\lambda''}\right)^2}{g(x)} dx\right)
% \]
% With the definitions of $\tilde{w}$, $\tilde{\lambda}$ and
% $h$ we have
% \begin{align*}
% \tilde{\lambda} \norm{\tilde{w}}_2^2
% &= \tilde{\lambda} \sum_{k \in
% \kappa}\left(f_g^{*,\lambda''}(\xi_k) \frac{h_k
% v_k}{\mathbb{E}v^2|\xi = \xi_k]}\right)^2\\
% &= \tilde{\lambda} \sum_{k \in
% \kappa}\left(\left(f_g^{*,\lambda''}\right)^2(\xi_k) \frac{h_k
% v_k^2}{\mathbb{E}v^2|\xi = \xi_k]}\right) h_k\\
% & = \lambda g(0) \sum_{k \in
% \kappa}\left(\left(f_g^{*,\lambda''}\right)^2(\xi_k)\frac{v_k^2}{g_\xi(\xi_k)\mathbb{E}
% [v^2|\xi=\xi_k]}\right)h_k.
% \end{align*}
% By using Lemma~\ref{lem} with $\phi(x,y) =
% \left(f_g^{*,\lambda''}\right)^2(x)\frac{y^2}{g_\xi(\xi)\mathbb{E}[v^2|\xi=y]}$
% this converges to
% \begin{align*}
% &\plimn \tilde{\lambda}\norm{\tilde{w}}_2^2 = \\
% &=\lambda
% g_\xi(0)\mathbb{E}[v^2|\xi=0]\int_{\supp{g_\xi}}\mathbb{E}\left[
% \left(f_g^{*,\lambda''}\right)^2(\xi)\frac{v^2}{
% g_\xi(\xi)\mathbb{E}[v^2|\xi=x]^2}\Big{|} \xi = x\right]dx\\
% &=\lambda g_\xi(0) \mathbb{E}[v^2|\xi=0] \int_{\supp{g_xi}}
% \frac{\left(f_g^{*,\lambda''}\right)^2 (x)}{g_\xi(x)
% \mathbb{E}[v^2|\xi=x]} dx \\
% &=\lambda g(0) \int_{\supp{g_\xi}} \frac{\left(f_g^{*,\lambda''}\right)^2}{g(x)}dx.
% \end{align*}
\end{Proof}
\begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma A.14)]~\\\noindent
\label{proof:lem14}
Substitute $F_{+-}^{\lambda, g}\left(f_{g,+}^{*,\lambda},
f_{g,-}^{*,\lambda}\right)$ with $F^{\lambda,g}\left(f_g^{*,\lambda}\right)$.
\end{Proof}
% \begin{Lemma}[Heiss, Teichmann, and
% Wutte (2019, Lemma A.13)]
% Using the notation of Definition .. and ... the following statement
% holds:
% $\forall \varepsilon \in \mathbb{R}_{>0} : \exists \delta \in
% \mathbb{R}_{>0} : \forall \omega \in \Omega : \forall l, l' \in
% \left\{1,\dots,N\right\} : \forall n \in \mathbb{N}$
% \[
% \left(\abs{\xi_l(\omega) - \xi_{l'}(\omega)} < \delta \wedge
% \text{sign}(v_l(\omega)) = \text{sign}(v_{l'}(\omega))\right)
% \implies \abs{\frac{w_l^{*, \tilde{\lambda}}(\omega)}{v_l(\omega)}
% - \frac{w_{l'}^{*, \tilde{\lambda}}(\omega)}{v_{l'}(\omega)}} <
% \frac{\varepsilon}{n},
% \]
% if we assume that $v_k$ is never zero.
% \proof given in ..
% \end{Lemma}
% \begin{Lemma}[$\frac{w^{*,\tilde{\lambda}}}{v} \approx
% \mathcal{O}(\frac{1}{n})$, Heiss, Teichmann, and
% Wutte (2019, Lemma A.14)]
% For any $\lambda > 0$ and data $(x_i^{\text{train}},
% y_i^{\text{train}}) \in \mathbb{R}^2, i\in
% \left\{1,\dots,\right\}$, we have
% \[
% \forall P \in (0,1) : \exists C \in \mathbb{R}_{>0} : \exists
% n_0 \in \mathbb{N} : \forall n > n_0 : \mathbb{P}
% \left[\max_{k\in \left\{1,\dots,n\right\}}
% \frac{w_k^{*,\tilde{\lambda}}}{v_k} < C
% \frac{1}{n}\right] > P
% % \max_{k\in \left\{1,\dots,n\right\}}
% % \frac{w_k^{*,\tilde{\lambda}}}{v_k} = \plimn
% \]
% \proof
% Let $k^*_+ \in \argmax_{k\in
% \left\{1,\dots,n\right\}}\frac{w^{*,\tilde{\lambda}}}{v_k} : v_k
% > 0$ and $k^*_- \in \argmax_{k\in
% \left\{1,\dots,n\right\}}\frac{w^{*,\tilde{\lambda}}}{v_k} : v_k
% < 0$. W.l.o.g. assume $\frac{w_{k_+^*}^2}{v_{k_+^*}^2} \geq
% \frac{w_{k_-^*}^2}{v_{k_-^*}^2}$
% \begin{align*}
% \frac{F^{\lambda,
% g}\left(f^{*,\lambda}_g\right)}{\tilde{\lambda}}
% \makebox[2cm][c]{$\stackrel{\mathbb{P}}{\geq}$}
% & \frac{1}{2 \tilde{\lambda}}
% F_n^{\tilde{\lambda}}\left(\mathcal{RN}^{*,\tilde{\lambda}}\right)
% = \frac{1}{2 \tilde{\lambda}}\left[\sum ... + \tilde{\lambda} \norm{w}_2^2\right]
% \\
% \makebox[2cm][c]{$\geq$}
% & \frac{1}{2}\left( \sum_{\substack{k: v_k
% > 0 \\\xi_k\in(\xi_{k^*}, \xi_{k^*}
% + \delta)}} \left(w_k^{*,\tilde{\lambda}}\right)^2 +
% \sum_{\substack{k: v_k < 0 \\\xi_k\in(\xi_{k^*}, \xi_{k^*}
% + \delta)}} \left(w_k^{*,\tilde{\lambda}}\right)^2\right) \\
% \makebox[2cm][c]{$\overset{\text{Lem. A.6}}{\underset{\delta \text{
% small enough}}{\geq}} $}
% &
% \frac{1}{4}\left(\left(\frac{w_{k_+^*}^{*,\tilde{\lambda}}}
% {v_{k_+^*}}\right)^2\sum_{\substack{k:
% v_k > 0 \\\xi_k\in(\xi_{k^*}, \xi_{k^*} + \delta)}}v_k^2 +
% \left(\frac{w_{k_-^*}^{*,\tilde{\lambda}}}{v_{k_-^*}}\right)^2
% \sum_{\substack{k:
% v_k < 0 \\\xi_k\in(\xi_{k^*}, \xi_{k^*} +
% \delta)}}v_k^2\right)\\
% \makebox[2cm][c]{$\stackrel{\mathbb{P}}{\geq}$}
% & \frac{1}{8}
% \left(\frac{w_{k_+^*}^{*,\tilde{\lambda}}}{v_{k^*}}\right)^2
% n \delta g_\xi(\xi_{k_+^*}) \mathbb{P}(v_k
% >0)\mathbb{E}[v_k^2|\xi_k = \xi_{k^*_+}]
% \end{align*}
% \end{Lemma}
\begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma A.15)]~\\\noindent
\label{proof:lem15}
Consider $\mathcal{RN}^{*,\tilde{\lambda}}$,
$f^{w^{*,\tilde{\lambda}}}$, and $\kappa = \left\{1, \dots, n
\right\}$ instead of $\mathcal{RN}_+^{*,\tilde{\lambda}}$,
$f_+^{w^{*,\tilde{\lambda}}}$, and $\kappa^+$.
Assuming w.l.o.g. $max_{k \in
\kappa^+}\abs{\frac{w_k^{*,\tilde{\lambda}}}{v_k}} \geq max_{k \in
\kappa^-}\abs{\frac{w_k^{*,\tilde{\lambda}}}{v_k}}$
Lemma~ref{lem:s3} follows analogously by multiplying (58b) with two.
\end{Proof}
\begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma
A.16)]~\\\noindent
\label{proof:lem16}
As we are considering $F^{\lambda,g}$ instead of
$F^{\lambda,g}_{+-}$ we need to substitute $2\lambda g(0)$ with
$\lambda g(0)$
and thus get
\[
\left(f^{w^{*,\tilde{\lambda}}}\right)''(x) \approx
\frac{w_{l_x}^{*,\tilde{\lambda}}}{v_{l_x}} n g_\xi(x)
\mathbb{E}\left[v_k^2|\xi_k = x\right] \stackrel{\mathbb{P}}{\pm} \varepsilon_3
\]
and use this to follow
\[
\lambda g(0)
\int_{\supp(g)}\hspace{-0.15cm}\frac{\left(\left(f^{w^{*,\tilde{\lambda}}}\right)''(x)\right)^2}{g(0)}dx
\approx \tilde{\lambda} n
\int_{\supp(g)}\left(\frac{w_{l_x}^{*,\tilde{\lambda}}}{v_{l_x}}\right)^2 \hspace{-0.1cm}
g_xi(x) \mathbb{E}\left[v_k^2|\xi_k=x\right]dx
\]
Analogous to the proof of \textcite{heiss2019} we get
\begin{align*}
\tilde{\lambda} \sum_{k \in \kappa}
\left(w_k^{*,\tilde{\lambda}}\right)^2
&= \tilde{\lambda} \sum_{k \in \kappa^+}
\left(w_k^{*,\tilde{\lambda}}\right)^2 + \tilde{\lambda} \sum_{k \in \kappa^-}
\left(w_k^{*,\tilde{\lambda}}\right)^2 \\
&\approx \left(\mathbb{P}[v_k <0] + \mathbb{P}[v_k >0]\right)\\
&\phantom{=}
\int_{\supp(g_xi)}
\left(\frac{w_{l_x}^{*,\tilde{\lambda}}}{v_{l_x}}\right)^2
g_\xi(x) \mathbb{E}\left[v_k^2|\xi_k = x\right] dx
\stackrel{\mathbb{P}}{\pm} \varepsilon_9 \\
&= \int_{\supp{g_xi}}
\left(\frac{w_{l_x}^{*,\tilde{\lambda}}}{v_{l_x}}\right)^2
g_\xi(x) \mathbb{E}\left[v_k^2|\xi_k = x\right] dx
\stackrel{\mathbb{P}}{\pm} \varepsilon_9.
\end{align*}
With these transformations Lemma~\ref{lem:s4} follows analogously.
\end{Proof}
\begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma A.19)]~\\\noindent
\label{proof:lem19}
The proof works analogously if $F_{+-}^{\lambda,g}$ is substituted
by
\begin{align*}
F_{+-}^{\lambda,g '}(f_+, f_-) =
& \sum_{i =
1}^N \left(f(x_i^{\text{train}}) -
y_i^{\text{train}}\right)^2 \\
& + \lambda g(0) \left(\int_{\supp(g)}\frac{\left(f_+''(x)\right)^2}{g(x)}
dx + \int_{\supp(g)}\frac{\left(f''_-(x)\right)^2}{g(x)}
dx\right).
\end{align*}
As for $f^n = f_+^n + f_-^n$ such that $\supp(f_+^n) \cap \supp(f_-^n) =
\emptyset$ and $h = h_+ + h_-$ such that $\supp(h_+) \cap \supp(h_-) =
\emptyset$ it holds
\[
\plimn F^{\lambda, g}(f^n) = F^{\lambda, g}(h) \implies
\plimn F_{+-}^{\lambda,g '}(f_+,f_-) = F_{+-}^{\lambda,g '}(h_+,h_-),
\]
4 years ago
and all functions can be split in two functions with disjoint support,
Lemma~\ref{lem:s7} follows.
\end{Proof}
4 years ago
\input{Appendix_code.tex}
\end{appendices}
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "main"
%%% End: