diff options
author | Jaron Kent-Dobias <jaron@kent-dobias.com> | 2025-02-11 12:23:44 -0300 |
---|---|---|
committer | Jaron Kent-Dobias <jaron@kent-dobias.com> | 2025-02-11 12:23:44 -0300 |
commit | 01a22225f2d207f04df595290e0e5c742a29ccee (patch) | |
tree | c7a885772aeaacd77978775bc1ebe9d50f9e9668 /ictp-saifr_colloquium.tex | |
parent | 64a3acf60804cfa2e504f695526c75c625640973 (diff) | |
download | ictp-saifr_colloquium-01a22225f2d207f04df595290e0e5c742a29ccee.tar.gz ictp-saifr_colloquium-01a22225f2d207f04df595290e0e5c742a29ccee.tar.bz2 ictp-saifr_colloquium-01a22225f2d207f04df595290e0e5c742a29ccee.zip |
More work on presentation
Diffstat (limited to 'ictp-saifr_colloquium.tex')
-rw-r--r-- | ictp-saifr_colloquium.tex | 150 |
1 files changed, 146 insertions, 4 deletions
diff --git a/ictp-saifr_colloquium.tex b/ictp-saifr_colloquium.tex index 2d621f2..dc152f8 100644 --- a/ictp-saifr_colloquium.tex +++ b/ictp-saifr_colloquium.tex @@ -227,7 +227,7 @@ \frametitle{Machine learning is just curve fitting} \begin{columns} - \begin{column}{0.8\textwidth} + \begin{column}{0.9\textwidth} Number of data points $M$ is big: all images on the internet \medskip @@ -243,15 +243,157 @@ \medskip - $\chi^2(\pmb a\mid\text{data})$ is called \emph{cost} or \emph{objective function} + $\chi^2(\pmb a\mid\text{data})$ is called the \emph{cost} or \emph{loss function} \medskip - $\chi^2(\pmb a^*\mid\text{data})$ is call the \emph{training error} + $\chi^2(\pmb a^*\mid\text{data})$ is called the \emph{training error} \medskip - MSE is called \emph{test} or \emph{generalization error} + MSE is called the \emph{test} or \emph{generalization error} + + \bigskip + + \textbf{BUT:} machine learning uses many more parameters $N$ than data points $M$ + \end{column} + \end{columns} +\end{frame} + +\begin{frame} + \frametitle{Curve fitting: the bad, the good, the ugly, and the weird} + \begin{columns} + \begin{column}{0.25\textwidth} + \centering + $M=40$, $N=2$ + + \bigskip + + \includegraphics[width=\columnwidth]{figs/fit_underfit_poly.pdf} + + \bigskip + + Underfit + + \smallskip + + $\chi^2$ is large + + \smallskip + + Best fit has \emph{high bias} \phantom{variance} + \end{column} + \begin{column}{0.25\textwidth} + \centering + $M=40$, $N=7$ + + \bigskip + + \includegraphics[width=\columnwidth]{figs/fit_goodfit_poly.pdf} + + \bigskip + + Good fit! + + \smallskip + + $\chi^2$ is moderate + + \smallskip + + Best fit has \emph{low variance} and \emph{low bias} + \end{column} + \begin{column}{0.25\textwidth} + \centering + $M=40$, $N=40$ + + \bigskip + + \includegraphics[width=\columnwidth]{figs/fit_overfit_poly.pdf} + + \bigskip + + Overfit + + \smallskip + + $\chi^2$ is zero + + \smallskip + + Best fit has \emph{high variance} + \end{column} + \begin{column}{0.25\textwidth} + \centering + $M=40$, $N=80$ + + \bigskip + + \includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly.pdf} + + \bigskip + + Good fit? + + \smallskip + + $\chi^2$ is zero + + \smallskip + + Best fit has \emph{low variance} and \emph{low bias} + \end{column} + \end{columns} +\end{frame} + +\begin{frame} + \frametitle{Curve fitting: the bad, the good, the ugly, and the weird} + + \begin{columns} + \begin{column}{0.5\textwidth} + \centering + $M=40$, $N=80$ + + \bigskip + + \includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly.pdf} + \end{column} + \begin{column}{0.5\textwidth} + \centering + \includegraphics[width=\textwidth]{figs/fit_bias-variance2_poly.pdf} + + \bigskip + + Bias--variance trade-off is blown up! + \end{column} + \end{columns} +\end{frame} + +\begin{frame} + \frametitle{Overparamaterized curve fitting} + \begin{columns} + \begin{column}{0.5\textwidth} + Underparameterized fitting ($M>N$) has a unique minimizing solution + + \medskip + + Overparameterized fits are not unique: $M$ constraints + \[ + 0=y_i-\hat f(x_i\mid\pmb a)\qquad\text{for all $1\leq i\leq M$} + \] + plus $N$ unknowns $\pmb a=[a_1,\ldots, a_N]$ gives a manifold of $N-M$ dimensions + + \medskip + + What leads to the `good' solutions instead of `bad' ones? + \end{column} + \begin{column}{0.5\textwidth} + \centering + $M=40$, $N=80$ + + \bigskip + + \includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly.pdf} \end{column} \end{columns} \end{frame} |