1 files changed, 175 insertions, 8 deletions
diff --git a/ictp-saifr_colloquium.tex b/ictp-saifr_colloquium.tex
index 83c3226..8611f15 100644
--- a/ictp-saifr_colloquium.tex
+++ b/ictp-saifr_colloquium.tex
@@ -1,4 +1,4 @@
-\documentclass[aspectratio=169,usenames,dvipsnames,fleqn]{beamer}
+\documentclass[aspectratio=169,usenames,dvipsnames]{beamer}
 
 \setbeamerfont{title}{family=\bf}
 \setbeamerfont{frametitle}{family=\bf}
@@ -59,27 +59,194 @@
 \end{frame}
 
 \begin{frame}
-  \frametitle{Curve fitting: the good, the bad, and the weird}
+  \frametitle{Curve fitting: the bad, the good, and the ugly}
 
   \begin{columns}
-    \begin{column}{0.5\textwidth}
+    \begin{column}{0.4\textwidth}
+      You have $M$ data points $(x_1,y_1),\ldots,(x_M,y_M)$
+
+      \bigskip
+
+      Perhaps a noisy sample of a ground truth function
+      $y_i=f(x_i)+\xi$
+    \end{column}
+    \begin{column}{0.6\textwidth}
+      \begin{overprint}
+        \onslide<1>\includegraphics[width=\columnwidth]{figs/fit_data.pdf}
+        \onslide<2>\includegraphics[width=\columnwidth]{figs/fit_data_truth.pdf}
+        \onslide<3>\includegraphics[width=\columnwidth]{figs/fit_data.pdf}
+      \end{overprint}
     \end{column}
   \end{columns}
+\end{frame}
+
+\begin{frame}
+  \frametitle{Curve fitting: the bad, the good, and the ugly}
+
   \begin{columns}
-    \begin{column}{0.5\textwidth}
+    \begin{column}{0.4\textwidth}
+      Pick a basis of $N$ functions $b_1(x), \ldots, b_N(x)$
+
+      \bigskip
+
+      Approximate the ground truth
+      \[
+        \hat f(x\mid a_1,\ldots, a_N)=\sum_{j=1}^Na_jb_j(x)
+      \]
+
+      Find $a_1, \ldots, a_N$ minimizing
+      \[
+        \chi^2
+        =\sum_{i=1}^M\left(y_i-\sum_{j=1}^Na_jb_j(x_i)\right)^2
+      \]
+    \end{column}
+    \begin{column}{0.6\textwidth}
+      \begin{overprint}
+        \onslide<1>\includegraphics[width=\columnwidth]{figs/fit_basis_poly.pdf}
+      \end{overprint}
+    \end{column}
+  \end{columns}
+\end{frame}
+\begin{frame}
+  \frametitle{Curve fitting: the bad, the good, and the ugly}
+
+  \begin{columns}
+    \begin{column}{0.333\textwidth}
+      \centering
+      $M=40$, $N=2$
+
+      \bigskip
+
+      \includegraphics[width=\columnwidth]{figs/fit_underfit_poly.pdf}
+
+      \bigskip
+
+      Underfit
+
+      \smallskip
+
+      Too few parameters
+
+      \smallskip
+
+      $\chi^2$ is large
+
+      \smallskip
+
+      Best fit is \emph{biased}
+    \end{column}
+    \begin{column}{0.333\textwidth}
+      \centering
+      $M=40$, $N=7$
+
+      \bigskip
+
+      \includegraphics[width=\columnwidth]{figs/fit_goodfit_poly.pdf}
+
+      \bigskip
+
+      Good fit!
+
+      \smallskip
+
+      Right number of parameters
+
+      \smallskip
+
+      $\chi^2$ is moderate
+
+      \smallskip
+
+      \vphantom{Best fit}
+    \end{column}
+    \begin{column}{0.333\textwidth}
+      \centering
+      $M=40$, $N=40$
+
+      \bigskip
+
+      \includegraphics[width=\columnwidth]{figs/fit_overfit_poly.pdf}
+
+      \bigskip
+
+      Overfit
+
+      \smallskip
+
+      Too many parameters
+
+      \smallskip
+
+      $\chi^2$ is zero
+
+      \smallskip
+
+      Best fit has \emph{high variance}
     \end{column}
   \end{columns}
 \end{frame}
 
 \begin{frame}
-  \frametitle{Curve fitting: the good, the bad, and the weird}
+  \frametitle{Curve fitting: the bad, the good, and the ugly}
 
   \begin{columns}
-    \begin{column}{0.33\textwidth}
+    \begin{column}{0.5\textwidth}
+      Knowing the ground truth, fit error is
+      \[
+        \text{MSE}=\int dx\left(f(x)-\sum_{j=1}^Na_jb_j(x)\right)^2
+      \]
+
+      \smallskip
+
+      Trade-off between \emph{bias} and \emph{variance}:
+      \begin{itemize}
+        \item \textbf{Bias} reflects missing qualitative features of the data
+      \item \textbf{Variance} reflects strong dependence on the noise
+      \end{itemize}
     \end{column}
-    \begin{column}{0.33\textwidth}
+    \begin{column}{0.5\textwidth}
+      \includegraphics[width=\columnwidth]{figs/fit_bias-variance_poly.pdf}
+
+      \medskip
+
+      \includegraphics[width=0.32\columnwidth]{figs/fit_underfit_poly.pdf}
+      \hfill
+      \includegraphics[width=0.32\columnwidth]{figs/fit_goodfit_poly.pdf}
+      \hfill
+      \includegraphics[width=0.32\columnwidth]{figs/fit_overfit_poly.pdf}
+
+      \smallskip\small
+
+      \hspace{2em}$N=2$ \hfill $N=7$ \hfill $N=40$ \hspace{1.2em}
     \end{column}
-    \begin{column}{0.33\textwidth}
+  \end{columns}
+\end{frame}
+
+\begin{frame}
+  \frametitle{Machine learning is just curve fitting}
+
+  \begin{columns}
+    \begin{column}{0.8\textwidth}
+      Number of data points $M$ is big: all images on the internet
+
+      \medskip
+
+      Ground truth function is unknown: Probability the image contains a cat
+
+      \medskip
+
+      Fit function is a neural network:
+      \[
+        \hat f(\mathbf x\mid B_1,\ldots B_L)=\sigma\left(B_L \sigma\left( B_{L-1}\cdots\sigma\left(B_2\sigma (B_1\mathbf x)\right)\cdots\right)\right)
+      \]
+
+      \medskip
+
+      $\chi^2$ is called \emph{training error}
+
+      \medskip
+
+      MSE is called \emph{test} or \emph{generalization error}
     \end{column}
   \end{columns}
 \end{frame}