\newpage \begin{appendices} \counterwithin{lstfloat}{section} \section{Notes on Proofs of Lemmata in Section~\ref{sec:conv}} \label{appendix:proofs} Contrary to \textcite{heiss2019} we do not make the distinction between $f_+$ and $f_-$. This results in some alterations in the proofs being necessary. In the following the affected proofs and the required changes are given. % Because of that slight alterations are needed in the proofs of % .. auxiliary lemmata. % Alterations that go beyond substituting $F_{+-}^{}$ % As the proofs are ... for the most part only % the alterations needed are specified. % In the following there will be proofs for some important Lemmata in % Section~\ref{sec:theo38}. Further proofs not discussed here can be % found in \textcite{heiss2019} % The proves in this section are based on \textcite{heiss2019}. Slight % alterations have been made to accommodate for not splitting $f$ into % $f_+$ and $f_-$. % \begin{Theorem}[Proof of Lemma~\ref{theo38}] % \end{Theorem} % \begin{Lemma}[$\frac{w^{*,\tilde{\lambda}}_k}{v_k}\approx\mathcal{O}(\frac{1}{n})$] % For any $\lambda > 0$ and training data $(x_i^{\text{train}}, % y_i^{\text{train}}) \in \mathbb{R}^2, \, i \in % \left\{1,\dots,N\right\}$, we have % \[ % \max_{k \in \left\{1,\dots,n\right\}} \frac{w^{*, % \tilde{\lambda}}_k}{v_k} = \po_{n\to\infty} % \] % \end{Lemma} \begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma A.9)]~\\\noindent \label{proof:lem9} With $\tilde{\lambda} \coloneqq \lambda n g(0)$ Lemma~\ref{lem:cnvh} follows analogously when considering $\tilde{w}$, $f_g^{*, \lambda}$, and $h_k$ instead of $\tilde{w}^+$, $f_{g,+}^{*, \lambda}$, and $\bar{h}_k$. Consider $\kappa = \left\{1, \dots, n \right\}$ for $n$ nodes instead of $\kappa^+$. With $h_k = \frac{1}{n g_\xi(\xi_n)}$ instead of $\bar{h}_k$ and \[ \mathbb{E} \left[\abs{\left\{m \in \kappa : \xi_m \in [\delta l, \delta(l+1))\right\}}\right] = n \int_{\delta l}^{\delta(l+1)}g_\xi (x) dx \approx n (\delta g_\xi(\delta l) \pm \delta \tilde{\varepsilon}). \] % \[ % \sum_{k \in \kappa : \xi_k < T} \varphi(\xi_k, v_k) % h_{k,n} = \sum_{\substack{l \in \mathbb{Z} \\ [\delta l, \delta % (l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T \}]}} % \left(\sum_{\substack{k \in \kappa \\ \xi_k \in % [\delta l , \delta(l+1))}} \varphi(\xi_k, v_k) % h_{k,n}\right) \approx % \] % \[ % \approx \sum_{\substack{l \in \mathbb{Z} \\ [\delta l, \delta % (l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T \}]}} % \left(\sum_{\substack{k \in \kappa \\ \xi_k \in % [\delta l , \delta(l+1))}} \left(\varphi(\delta l, v_k) % \frac{1}{n g_\xi (\delta l)} \pm \frac{\varepsilon}{n}\right) % \frac{\abs{\left\{m \in \kappa : \xi_m \in [\delta l, % \delta(l+1))\right\}}}{\abs{\left\{m \in \kappa : \xi_m % \in [\delta l, \delta(l+1))\right\}}}\right) % \] % \[ % \approx \sum_{\substack{l \in \mathbb{Z} \\ [\delta l, \delta % (l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T \}]}} % \left(\frac{\sum_{\substack{k \in \kappa \\ \xi_k \in % [\delta l , \delta(l+1))}}\varphi(\delta l, % v_k)}{\abs{\left\{m \in \kappa : \xi_m % \in [\delta l, \delta(l+1))\right\}}} % \frac{\abs{\left\{m \in \kappa : \xi_m \in [\delta l, % \delta(l+1))\right\}}}{n g_\xi (\delta l)}\right) \pm \varepsilon % \] % The amount of kinks in a given interval of length $\delta$ follows a % binomial distribution, % \[ % \mathbb{E} \left[\abs{\left\{m \in \kappa : \xi_m \in [\delta l, % \delta(l+1))\right\}}\right] = n \int_{\delta % l}^{\delta(l+1)}g_\xi (x) dx \approx n (\delta g_\xi(\delta l) % \pm \delta \tilde{\varepsilon}), % \] % for any $\delta \leq \delta(\varepsilon, \tilde{\varepsilon})$, since $g_\xi$ is uniformly continuous on its % support by Assumption.. % As the distribution of $v$ is continuous as well we get that % $\mathcal{L}(v_k) = \mathcal{L} v| \xi = \delta l) \forall k \in % \kappa : \xi_k \in [\delta l, \delta(l+1))$ for $\delta \leq % \delta(\varepsilon, \tilde{\varepsilon})$. Thus we get with the law of % large numbers % \begin{align*} % &\sum_{k \in \kappa : \xi_k < T} \varphi(\xi_k, v_k) % h_{k,n} \approx\\ % &\approx \sum_{\substack{l \in \mathbb{Z} \\ [\delta l, \delta % (l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T % \}]}}\left(\mathbb{E}[\phi(\xi, v)|\xi=\delta l] % \stackrel{\mathbb{P}}{\pm}\right) \delta \left(1 \pm % \frac{\tilde{\varepsilon}}{g_\xi(\delta l)}\right) \pm \varepsilon % \\ % &\approx \left(\sum_{\substack{l \in \mathbb{Z} \\ [\delta % l, \delta % (l+1)) \in [C_{g_\xi}^l,\min\{C_{g_\xi}^u, T % \}]}}\mathbb{E}[\phi(\xi, v)|\xi=\delta l] \delta % \stackrel{\mathbb{P}}{\pm}\tilde{\tilde{\varepsilon}} % \abs{C_{g_\xi}^u - C_{g_\xi}^l} % \right)\\ % &\phantom{\approx}\cdot \left(1 \pm % \frac{\tilde{\varepsilon}}{g_\xi(\delta l)}\right) \pm \varepsilon % \end{align*} \end{Proof} % \begin{Lemma}[($L(f_n) \to L(f)$), Heiss, Teichmann, and % Wutte (2019, Lemma A.11)] % For any data $(x_i^{\text{train}}, y_i^{\text{train}}) \in % \mathbb{R}^2, i \in \left\{1,\dots,N\right\}$, let $(f_n)_{n \in % \mathbb{N}}$ be a sequence of functions that converges point-wise % in probability to a function $f : \mathbb{R}\to\mathbb{R}$, then the % loss $L$ of $f_n$ converges is probability to $L(f)$ as $n$ tends to % infinity, % \[ % \plimn L(f_n) = L(f). % \] % \proof Vgl. ... % \end{Lemma} \begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma A.12)]~\\\noindent \label{proof:lem12} With $\tilde{\lambda} \coloneqq \lambda n g(0)$ Lemma~\ref{lem:s2} follows analogously when considering $\tilde{w}$, $f_g^{*, \lambda}$, and $h_k$ instead of $\tilde{w}^+$, $f_{g,+}^{*, \lambda}$, and $\bar{h}_k$. % We start by showing that % \[ % \plimn \tilde{\lambda} \norm{\tilde{w}}_2^2 = \lambda g(0) % \left(\int \frac{\left(f_g^{*,\lambda''}\right)^2}{g(x)} dx\right) % \] % With the definitions of $\tilde{w}$, $\tilde{\lambda}$ and % $h$ we have % \begin{align*} % \tilde{\lambda} \norm{\tilde{w}}_2^2 % &= \tilde{\lambda} \sum_{k \in % \kappa}\left(f_g^{*,\lambda''}(\xi_k) \frac{h_k % v_k}{\mathbb{E}v^2|\xi = \xi_k]}\right)^2\\ % &= \tilde{\lambda} \sum_{k \in % \kappa}\left(\left(f_g^{*,\lambda''}\right)^2(\xi_k) \frac{h_k % v_k^2}{\mathbb{E}v^2|\xi = \xi_k]}\right) h_k\\ % & = \lambda g(0) \sum_{k \in % \kappa}\left(\left(f_g^{*,\lambda''}\right)^2(\xi_k)\frac{v_k^2}{g_\xi(\xi_k)\mathbb{E} % [v^2|\xi=\xi_k]}\right)h_k. % \end{align*} % By using Lemma~\ref{lem} with $\phi(x,y) = % \left(f_g^{*,\lambda''}\right)^2(x)\frac{y^2}{g_\xi(\xi)\mathbb{E}[v^2|\xi=y]}$ % this converges to % \begin{align*} % &\plimn \tilde{\lambda}\norm{\tilde{w}}_2^2 = \\ % &=\lambda % g_\xi(0)\mathbb{E}[v^2|\xi=0]\int_{\supp{g_\xi}}\mathbb{E}\left[ % \left(f_g^{*,\lambda''}\right)^2(\xi)\frac{v^2}{ % g_\xi(\xi)\mathbb{E}[v^2|\xi=x]^2}\Big{|} \xi = x\right]dx\\ % &=\lambda g_\xi(0) \mathbb{E}[v^2|\xi=0] \int_{\supp{g_xi}} % \frac{\left(f_g^{*,\lambda''}\right)^2 (x)}{g_\xi(x) % \mathbb{E}[v^2|\xi=x]} dx \\ % &=\lambda g(0) \int_{\supp{g_\xi}} \frac{\left(f_g^{*,\lambda''}\right)^2}{g(x)}dx. % \end{align*} \end{Proof} \begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma A.14)]~\\\noindent \label{proof:lem14} Substitute $F_{+-}^{\lambda, g}\left(f_{g,+}^{*,\lambda}, f_{g,-}^{*,\lambda}\right)$ with $F^{\lambda,g}\left(f_g^{*,\lambda}\right)$. \end{Proof} % \begin{Lemma}[Heiss, Teichmann, and % Wutte (2019, Lemma A.13)] % Using the notation of Definition .. and ... the following statement % holds: % $\forall \varepsilon \in \mathbb{R}_{>0} : \exists \delta \in % \mathbb{R}_{>0} : \forall \omega \in \Omega : \forall l, l' \in % \left\{1,\dots,N\right\} : \forall n \in \mathbb{N}$ % \[ % \left(\abs{\xi_l(\omega) - \xi_{l'}(\omega)} < \delta \wedge % \text{sign}(v_l(\omega)) = \text{sign}(v_{l'}(\omega))\right) % \implies \abs{\frac{w_l^{*, \tilde{\lambda}}(\omega)}{v_l(\omega)} % - \frac{w_{l'}^{*, \tilde{\lambda}}(\omega)}{v_{l'}(\omega)}} < % \frac{\varepsilon}{n}, % \] % if we assume that $v_k$ is never zero. % \proof given in .. % \end{Lemma} % \begin{Lemma}[$\frac{w^{*,\tilde{\lambda}}}{v} \approx % \mathcal{O}(\frac{1}{n})$, Heiss, Teichmann, and % Wutte (2019, Lemma A.14)] % For any $\lambda > 0$ and data $(x_i^{\text{train}}, % y_i^{\text{train}}) \in \mathbb{R}^2, i\in % \left\{1,\dots,\right\}$, we have % \[ % \forall P \in (0,1) : \exists C \in \mathbb{R}_{>0} : \exists % n_0 \in \mathbb{N} : \forall n > n_0 : \mathbb{P} % \left[\max_{k\in \left\{1,\dots,n\right\}} % \frac{w_k^{*,\tilde{\lambda}}}{v_k} < C % \frac{1}{n}\right] > P % % \max_{k\in \left\{1,\dots,n\right\}} % % \frac{w_k^{*,\tilde{\lambda}}}{v_k} = \plimn % \] % \proof % Let $k^*_+ \in \argmax_{k\in % \left\{1,\dots,n\right\}}\frac{w^{*,\tilde{\lambda}}}{v_k} : v_k % > 0$ and $k^*_- \in \argmax_{k\in % \left\{1,\dots,n\right\}}\frac{w^{*,\tilde{\lambda}}}{v_k} : v_k % < 0$. W.l.o.g. assume $\frac{w_{k_+^*}^2}{v_{k_+^*}^2} \geq % \frac{w_{k_-^*}^2}{v_{k_-^*}^2}$ % \begin{align*} % \frac{F^{\lambda, % g}\left(f^{*,\lambda}_g\right)}{\tilde{\lambda}} % \makebox[2cm][c]{$\stackrel{\mathbb{P}}{\geq}$} % & \frac{1}{2 \tilde{\lambda}} % F_n^{\tilde{\lambda}}\left(\mathcal{RN}^{*,\tilde{\lambda}}\right) % = \frac{1}{2 \tilde{\lambda}}\left[\sum ... + \tilde{\lambda} \norm{w}_2^2\right] % \\ % \makebox[2cm][c]{$\geq$} % & \frac{1}{2}\left( \sum_{\substack{k: v_k % > 0 \\\xi_k\in(\xi_{k^*}, \xi_{k^*} % + \delta)}} \left(w_k^{*,\tilde{\lambda}}\right)^2 + % \sum_{\substack{k: v_k < 0 \\\xi_k\in(\xi_{k^*}, \xi_{k^*} % + \delta)}} \left(w_k^{*,\tilde{\lambda}}\right)^2\right) \\ % \makebox[2cm][c]{$\overset{\text{Lem. A.6}}{\underset{\delta \text{ % small enough}}{\geq}} $} % & % \frac{1}{4}\left(\left(\frac{w_{k_+^*}^{*,\tilde{\lambda}}} % {v_{k_+^*}}\right)^2\sum_{\substack{k: % v_k > 0 \\\xi_k\in(\xi_{k^*}, \xi_{k^*} + \delta)}}v_k^2 + % \left(\frac{w_{k_-^*}^{*,\tilde{\lambda}}}{v_{k_-^*}}\right)^2 % \sum_{\substack{k: % v_k < 0 \\\xi_k\in(\xi_{k^*}, \xi_{k^*} + % \delta)}}v_k^2\right)\\ % \makebox[2cm][c]{$\stackrel{\mathbb{P}}{\geq}$} % & \frac{1}{8} % \left(\frac{w_{k_+^*}^{*,\tilde{\lambda}}}{v_{k^*}}\right)^2 % n \delta g_\xi(\xi_{k_+^*}) \mathbb{P}(v_k % >0)\mathbb{E}[v_k^2|\xi_k = \xi_{k^*_+}] % \end{align*} % \end{Lemma} \begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma A.15)]~\\\noindent \label{proof:lem15} Consider $\mathcal{RN}^{*,\tilde{\lambda}}$, $f^{w^{*,\tilde{\lambda}}}$, and $\kappa = \left\{1, \dots, n \right\}$ instead of $\mathcal{RN}_+^{*,\tilde{\lambda}}$, $f_+^{w^{*,\tilde{\lambda}}}$, and $\kappa^+$. Assuming w.l.o.g. $max_{k \in \kappa^+}\abs{\frac{w_k^{*,\tilde{\lambda}}}{v_k}} \geq max_{k \in \kappa^-}\abs{\frac{w_k^{*,\tilde{\lambda}}}{v_k}}$ Lemma~ref{lem:s3} follows analogously by multiplying (58b) with two. \end{Proof} \begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma A.16)]~\\\noindent \label{proof:lem16} As we are considering $F^{\lambda,g}$ instead of $F^{\lambda,g}_{+-}$ we need to substitute $2\lambda g(0)$ with $\lambda g(0)$ and thus get \[ \left(f^{w^{*,\tilde{\lambda}}}\right)''(x) \approx \frac{w_{l_x}^{*,\tilde{\lambda}}}{v_{l_x}} n g_\xi(x) \mathbb{E}\left[v_k^2|\xi_k = x\right] \stackrel{\mathbb{P}}{\pm} \varepsilon_3 \] and use this to follow \[ \lambda g(0) \int_{\supp(g)}\hspace{-0.15cm}\frac{\left(\left(f^{w^{*,\tilde{\lambda}}}\right)''(x)\right)^2}{g(0)}dx \approx \tilde{\lambda} n \int_{\supp(g)}\left(\frac{w_{l_x}^{*,\tilde{\lambda}}}{v_{l_x}}\right)^2 \hspace{-0.1cm} g_xi(x) \mathbb{E}\left[v_k^2|\xi_k=x\right]dx \] Analogous to the proof of \textcite{heiss2019} we get \begin{align*} \tilde{\lambda} \sum_{k \in \kappa} \left(w_k^{*,\tilde{\lambda}}\right)^2 &= \tilde{\lambda} \sum_{k \in \kappa^+} \left(w_k^{*,\tilde{\lambda}}\right)^2 + \tilde{\lambda} \sum_{k \in \kappa^-} \left(w_k^{*,\tilde{\lambda}}\right)^2 \\ &\approx \left(\mathbb{P}[v_k <0] + \mathbb{P}[v_k >0]\right)\\ &\phantom{=} \int_{\supp(g_xi)} \left(\frac{w_{l_x}^{*,\tilde{\lambda}}}{v_{l_x}}\right)^2 g_\xi(x) \mathbb{E}\left[v_k^2|\xi_k = x\right] dx \stackrel{\mathbb{P}}{\pm} \varepsilon_9 \\ &= \int_{\supp{g_xi}} \left(\frac{w_{l_x}^{*,\tilde{\lambda}}}{v_{l_x}}\right)^2 g_\xi(x) \mathbb{E}\left[v_k^2|\xi_k = x\right] dx \stackrel{\mathbb{P}}{\pm} \varepsilon_9. \end{align*} With these transformations Lemma~\ref{lem:s4} follows analogously. \end{Proof} \begin{Proof}[Heiss, Teichmann, and Wutte (2019, Lemma A.19)]~\\\noindent \label{proof:lem19} The proof works analogously if $F_{+-}^{\lambda,g}$ is substituted by \begin{align*} F_{+-}^{\lambda,g '}(f_+, f_-) = & \sum_{i = 1}^N \left(f(x_i^{\text{train}}) - y_i^{\text{train}}\right)^2 \\ & + \lambda g(0) \left(\int_{\supp(g)}\frac{\left(f_+''(x)\right)^2}{g(x)} dx + \int_{\supp(g)}\frac{\left(f''_-(x)\right)^2}{g(x)} dx\right). \end{align*} As for $f^n = f_+^n + f_-^n$ such that $\supp(f_+^n) \cap \supp(f_-^n) = \emptyset$ and $h = h_+ + h_-$ such that $\supp(h_+) \cap \supp(h_-) = \emptyset$ it holds \[ \plimn F^{\lambda, g}(f^n) = F^{\lambda, g}(h) \implies \plimn F_{+-}^{\lambda,g '}(f_+,f_-) = F_{+-}^{\lambda,g '}(h_+,h_-), \] and all functions can be split in two functions with disjoint support, Lemma~\ref{lem:s7} follows. \end{Proof} \input{Appendix_code.tex} \end{appendices} %%% Local Variables: %%% mode: latex %%% TeX-master: "main" %%% End: