\documentclass[aspectratio=169,usenames,dvipsnames]{beamer}

\setbeamerfont{title}{family=\bf}
\setbeamerfont{frametitle}{family=\bf}
\setbeamerfont{normal text}{family=\rm}
\setbeamertemplate{navigation symbols}{}
\setbeamercolor{titlelike}{parent=structure,fg=cyan}

\usepackage{enumitem}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{pifont}
\usepackage{graphicx}
\usepackage{xcolor}
\usepackage{tikz}

\definecolor{ictpblue}{HTML}{0471b9}
\definecolor{ictpgreen}{HTML}{0c8636}

\definecolor{mb}{HTML}{5e81b5}
\definecolor{my}{HTML}{e19c24}
\definecolor{mg}{HTML}{8fb032}
\definecolor{mr}{HTML}{eb6235}

\setbeamercolor{titlelike}{parent=structure,fg=ictpblue}
\setbeamercolor{itemize item}{fg=ictpblue}

\usepackage[
  style=phys,
  eprint=true,
  maxnames = 100,
  terseinits=true
]{biblatex}


\addbibresource{ictp-saifr_colloquium.bib}

\title{
  Fitting with more parameters than data points\\\normalsize
  Topology of the solutions to overparameterized problems
}
\author{\textbf{Jaron Kent-Dobias}\\Simons--FAPESP Young Investigator}
\date{19 February 2025}

\begin{document}

\begin{frame}
  \maketitle

  \vspace{-6pc}
  \begin{minipage}[c]{10pc}
    \centering
    \includegraphics[height=6pc]{figs/ift-unesp.png}
  \end{minipage}
  \hfill\begin{minipage}[c]{10pc}
    \centering
    \includegraphics[height=6pc]{figs/logo-ictp-saifr.jpg}
  \end{minipage}
  \vspace{2pc}
\end{frame}

\begin{frame}
  \frametitle{Linear least squares}
  \framesubtitle{The bad, the good, and the ugly}

  \begin{columns}
    \begin{column}{0.4\textwidth}
      You have $M$ data points $(x_1,y_1),\ldots,(x_M,y_M)$

      \bigskip

      Perhaps a noisy sample of a ground truth function
      $y_i=f(x_i)+\xi$
    \end{column}
    \begin{column}{0.6\textwidth}
      \begin{overprint}
        \onslide<1>\includegraphics[width=\columnwidth]{figs/fit_data.pdf}
        \onslide<2>\includegraphics[width=\columnwidth]{figs/fit_data_truth.pdf}
        \onslide<3>\includegraphics[width=\columnwidth]{figs/fit_data.pdf}
      \end{overprint}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \frametitle{Linear least squares}
  \framesubtitle{The bad, the good, and the ugly}

  \begin{columns}
    \begin{column}{0.4\textwidth}
      Pick a basis of $N$ functions
      \[b_1(x), \ldots, b_N(x)\]

      \smallskip

      Approximate the ground truth
      \[
        \hat f(x\mid a_1,\ldots, a_N)=\sum_{j=1}^Na_jb_j(x)
      \]

      Find $\pmb a=[a_1, \ldots, a_N]$ minimizing
      \[
        \chi^2(\pmb a\mid\pmb x,\pmb y)
        =\sum_{i=1}^M\left(y_i-\hat f(x_i\mid\pmb a)\right)^2
      \]
    \end{column}
    \begin{column}{0.6\textwidth}
      \begin{overprint}
        \onslide<1>\includegraphics[width=\columnwidth]{figs/fit_basis_poly.pdf}
      \end{overprint}
    \end{column}
  \end{columns}
\end{frame}
\begin{frame}
  \frametitle{Linear least squares}
  \framesubtitle{The bad, the good, and the ugly}

  \begin{columns}
    \begin{column}{0.333\textwidth}
      \centering
      $M=40$, $N=2$

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_underfit_poly.pdf}

      \bigskip

      Underfit

      \smallskip

      Too few parameters

      \smallskip

      $\chi^2$ is large

      \smallskip

      Best fit is \emph{biased}
    \end{column}
    \begin{column}{0.333\textwidth}
      \centering
      $M=40$, $N=7$

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_goodfit_poly.pdf}

      \bigskip

      Good fit!

      \smallskip

      Right number of parameters

      \smallskip

      $\chi^2$ is moderate

      \smallskip

      \vphantom{Best fit}
    \end{column}
    \begin{column}{0.333\textwidth}
      \centering
      $M=40$, $N=40$

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_overfit_poly.pdf}

      \bigskip

      Overfit

      \smallskip

      Too many parameters

      \smallskip

      $\chi^2$ is zero

      \smallskip

      Best fit has \emph{high variance}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \frametitle{Linear least squares}
  \framesubtitle{The bad, the good, and the ugly}

  \begin{columns}
    \begin{column}{0.5\textwidth}
      Knowing the ground truth, fit error is
      \[
        \text{MSE}=\int dx\left(f(x)-\hat f(x\mid\pmb a)\right)^2
      \]

      \smallskip

      Trade-off between \emph{bias} and \emph{variance}:
      \begin{itemize}
        \item \textbf{Bias} reflects missing qualitative features of the data
      \item \textbf{Variance} reflects strong dependence on the noise
      \end{itemize}
    \end{column}
    \begin{column}{0.5\textwidth}
      \includegraphics[width=\columnwidth]{figs/fit_bias-variance_poly.pdf}

      \medskip

      \includegraphics[width=0.32\columnwidth]{figs/fit_underfit_poly.pdf}
      \hfill
      \includegraphics[width=0.32\columnwidth]{figs/fit_goodfit_poly.pdf}
      \hfill
      \includegraphics[width=0.32\columnwidth]{figs/fit_overfit_poly.pdf}

      \smallskip\small

      \hspace{2em}$N=2$ \hfill $N=7$ \hfill $N=40$ \hspace{1.2em}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \frametitle{Machine learning is just \emph{non}linear least squares}

  \begin{columns}
    \begin{column}{0.9\textwidth}
      Number of data points $M$ is big: all images on the internet

      \medskip

      Ground truth function is unknown: Probability the image contains a cat

      \medskip

      Fit function is a neural network:
      \[
        \hat f(\pmb x\mid B_1,\ldots B_L)=\sigma\left(B_L \sigma\left( B_{L-1}\cdots\sigma\left(B_2\sigma (B_1\pmb x)\right)\cdots\right)\right)
      \]

      \medskip

    $\chi^2(\pmb a\mid\text{data})$ is called the \emph{cost} or \emph{loss function}

      \medskip

      $\chi^2(\pmb a^*\mid\text{data})$ is called the \emph{training error}

      \medskip

      MSE is called the \emph{test} or \emph{generalization error}

      \bigskip

      \textbf{BUT:} machine learning uses many more parameters $N$ than data points $M$
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \frametitle{Linear least squares}
  \framesubtitle{The bad, the good, the ugly, and the weird}
  \begin{columns}
    \begin{column}{0.25\textwidth}
      \centering
      $M=40$, $N=2$

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_underfit_poly.pdf}

      \bigskip

      Underfit

      \smallskip

      $\chi^2$ is large

      \smallskip

      Best fit has \emph{high bias} \phantom{variance}
    \end{column}
    \begin{column}{0.25\textwidth}
      \centering
      $M=40$, $N=7$

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_goodfit_poly.pdf}

      \bigskip

      Good fit!

      \smallskip

      $\chi^2$ is moderate

      \smallskip

      Best fit has \emph{low variance} and \emph{low bias}
    \end{column}
    \begin{column}{0.25\textwidth}
      \centering
      $M=40$, $N=40$

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_overfit_poly.pdf}

      \bigskip

      Overfit

      \smallskip

      $\chi^2$ is zero

      \smallskip

      Best fit has \emph{high variance}
    \end{column}
    \begin{column}{0.25\textwidth}
      \centering
      $M=40$, $N=80$

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly.pdf}

      \bigskip

      Good fit?

      \smallskip

      $\chi^2$ is zero

      \smallskip

      Best fit has \emph{low variance} and \emph{low bias}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \frametitle{Linear least squares}
  \framesubtitle{The bad, the good, the ugly, and the weird}

  \begin{columns}
    \begin{column}{0.5\textwidth}
      \centering
      $M=40$, $N=80$

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly.pdf}
    \end{column}
    \begin{column}{0.5\textwidth}
      \centering
      \includegraphics[width=\textwidth]{figs/fit_bias-variance2_poly.pdf}

      \bigskip

      Bias--variance trade-off is blown up!
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \frametitle{Linear least squares}
  \framesubtitle{Overparameterized solutions are not unique}
  \begin{columns}
    \begin{column}{0.5\textwidth}
      Underparameterized fitting ($M>N$) has a unique minimizing solution

      \medskip

      Overparameterized fits are not unique: $\chi^2=0$ gives $M$ constraints
      \[
        0=y_i-\hat f(x_i\mid\pmb a)\qquad\text{for all $1\leq i\leq M$}
      \]
      with $N$ unknowns $\pmb a=[a_1,\ldots, a_N]$ gives a manifold of $N-M$ dimensions

      \medskip

      What leads to the `good' solutions instead of `bad' ones?
    \end{column}
    \begin{column}{0.5\textwidth}
      \centering
      $M=40$, $N=80$

      \bigskip

      \begin{overprint}
        \onslide<1>\includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly.pdf}
        \onslide<2>\includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly_1.pdf}
        \onslide<3>\includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly_2.pdf}
        \onslide<4>\includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly_3.pdf}
        \onslide<5>\includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly_6.pdf}
        \onslide<6>\includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly_7.pdf}
        \onslide<7>\includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly_8.pdf}
        \onslide<8>\includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly_9.pdf}
      \end{overprint}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \frametitle{Linear least squares}
  \framesubtitle{Gradient descent and implicit regularization}
  \begin{columns}
    \begin{column}{0.5\textwidth}
      Overparameterized fits found with gradient descent algorithm: take small
      steps in the direction $\nabla\chi^2$ until $\|\nabla\chi^2\|<\epsilon$

      \medskip

      Result of descent depends on initial condition: what $\pmb a$ to start with?

      \medskip

      \textbf{Unexpected fact:} gradient descent with small initialization equivalent to unique optimum in \emph{regularized} problem:
      \[
        \chi^2_\text{eff}(\pmb a\mid\text{data})=\chi^2(\pmb a\mid\text{data})+\lambda\|\pmb a\|^2
      \]
    \end{column}
    \begin{column}{0.5\textwidth}
      \begin{overprint}
        \onslide<1>\includegraphics[width=\columnwidth]{figs/fit_gradient_1.pdf}
        \onslide<2>\includegraphics[width=\columnwidth]{figs/fit_gradient_2.pdf}
        \onslide<3>\includegraphics[width=\columnwidth]{figs/fit_gradient_3.pdf}
        \onslide<4>\includegraphics[width=\columnwidth]{figs/fit_gradient_4.pdf}
        \onslide<5>\includegraphics[width=\columnwidth]{figs/fit_gradient_5.pdf}
        \onslide<6>\includegraphics[width=\columnwidth]{figs/fit_gradient_6.pdf}
      \end{overprint}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \frametitle{Linear least squares}
  \framesubtitle{Choice of basis}

  \begin{columns}
    \begin{column}{0.5\textwidth}
      \centering
      \textbf{Polynomial basis}

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_basis_poly.pdf}
    \end{column}
    \begin{column}{0.5\textwidth}
      \centering
      \textbf{Absolute value basis}

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_basis_abs.pdf}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \frametitle{Linear least squares}
  \framesubtitle{Choice of basis}
  \centering
  \Large\textbf{Polynomial basis}\normalsize

  \bigskip

  \begin{columns}
    \begin{column}{0.25\textwidth}
      \centering
      $M=40$, $N=2$

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_underfit_poly.pdf}

      \bigskip

      Underfit

      \smallskip

      $\chi^2$ is large

      \smallskip

      Best fit has \emph{high bias} \phantom{variance}
    \end{column}
    \begin{column}{0.25\textwidth}
      \centering
      $M=40$, $N=7$

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_goodfit_poly.pdf}

      \bigskip

      Good fit!

      \smallskip

      $\chi^2$ is moderate

      \smallskip

      Best fit has \emph{low variance} and \emph{low bias}
    \end{column}
    \begin{column}{0.25\textwidth}
      \centering
      $M=40$, $N=40$

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_overfit_poly.pdf}

      \bigskip

      Overfit

      \smallskip

      $\chi^2$ is zero

      \smallskip

      Best fit has \emph{high variance}
    \end{column}
    \begin{column}{0.25\textwidth}
      \centering
      $M=40$, $N=80$

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly.pdf}

      \bigskip

      Good fit?

      \smallskip

      $\chi^2$ is zero

      \smallskip

      Best fit has \emph{low variance} and \emph{low bias}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \frametitle{Linear least squares}
  \framesubtitle{Choice of basis}
  \centering
  \Large\textbf{Absolute value basis\vphantom{y}}\normalsize

  \bigskip

  \begin{columns}
    \begin{column}{0.25\textwidth}
      \centering
      $M=40$, $N=2$

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_underfit_abs.pdf}

      \bigskip

      Underfit

      \smallskip

      $\chi^2$ is large

      \smallskip

      Best fit has \emph{high bias} \phantom{variance}
    \end{column}
    \begin{column}{0.25\textwidth}
      \centering
      $M=40$, $N=7$

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_goodfit_abs.pdf}

      \bigskip

      Good fit!

      \smallskip

      $\chi^2$ is moderate

      \smallskip

      Best fit has \emph{low variance} and \emph{low bias}
    \end{column}
    \begin{column}{0.25\textwidth}
      \centering
      $M=40$, $N=40$

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_overfit_abs.pdf}

      \bigskip

      Good fit?

      \smallskip

      $\chi^2$ is zero

      \smallskip

      Best fit has \emph{low variance} and \emph{low bias}
    \end{column}
    \begin{column}{0.25\textwidth}
      \centering
      $M=40$, $N=80$

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_overparamfit_abs.pdf}

      \bigskip

      Good fit?

      \smallskip

      $\chi^2$ is zero

      \smallskip

      Best fit has \emph{low variance} and \emph{low bias}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \frametitle{Linear least squares}
  \framesubtitle{Choice of basis}

  \begin{columns}
    \begin{column}{0.5\textwidth}
      \centering
      \textbf{Polynomial basis}

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_bias-variance2_poly.pdf}
    \end{column}
    \begin{column}{0.5\textwidth}
      \centering
      \textbf{Absolute value basis}

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_bias-variance_abs.pdf}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \frametitle{Linear least squares}
  \framesubtitle{Sparseness and level of noise}

  \begin{columns}
    \begin{column}{0.5\textwidth}
      \includegraphics[width=\columnwidth]{figs/fit_data.pdf}
    \end{column}
    \begin{column}{0.5\textwidth}
      \includegraphics[width=\columnwidth]{figs/fit_data_abs2.pdf}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \frametitle{Linear least squares}
  \framesubtitle{Sparseness and level of noise}
  \begin{columns}
    \begin{column}{0.25\textwidth}
      \centering
      $M=10$, $N=4$

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_underfit_abs2.pdf}

      \bigskip

      Underfit

      \smallskip

      $\chi^2$ is large

      \smallskip

      Best fit has \emph{high bias} \phantom{variance}
    \end{column}
    \begin{column}{0.25\textwidth}
      \centering
      $M=10$, $N=6$

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_goodfit_abs2.pdf}

      \bigskip

      Good fit!

      \smallskip

      $\chi^2$ is moderate

      \smallskip

      Best fit has \emph{low variance} and \emph{low bias}
    \end{column}
    \begin{column}{0.25\textwidth}
      \centering
      $M=10$, $N=10$

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_overfit_abs2.pdf}

      \bigskip

      Good fit?

      \smallskip

      $\chi^2$ is zero

      \smallskip

      Best fit has \emph{low variance} and \emph{low bias}
    \end{column}
    \begin{column}{0.25\textwidth}
      \centering
      $M=10$, $N=15$

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_overparamfit_abs2.pdf}

      \bigskip

      Better fit?!

      \smallskip

      $\chi^2$ is zero

      \smallskip

      Best fit has \emph{low variance} and \emph{low bias}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \frametitle{Linear least squares}
  \framesubtitle{Sparseness and level of noise}

  \begin{columns}
    \begin{column}{0.5\textwidth}
      \centering
      $M=10$, $N=15$

      \bigskip

      \includegraphics[width=\columnwidth]{figs/fit_overparamfit_abs2.pdf}
    \end{column}
    \begin{column}{0.5\textwidth}
      \includegraphics[width=\columnwidth]{figs/fit_bias-variance_abs2.pdf}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \frametitle{Machine learning is just \emph{non}linear least squares}
  \begin{columns}
    \begin{column}{0.5\textwidth}
      Gradient descent produces poor solutions to many machine learning problems

      \bigskip

      \textbf{BUT:} no one uses gradient descent

      \bigskip

      \emph{Stochastic} gradient descent (SGD): follow approximated gradient of $\chi^2$ calculated using small subsets (batches) of the data

      \bigskip

      Approximated gradient takes \emph{fewer} steps to find \emph{better} solutions
    \end{column}
    \begin{column}{0.5\textwidth}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \begin{columns}
    \begin{column}{0.9\textwidth}
      \begin{overprint}
        \onslide<1>\includegraphics[width=\columnwidth]{figs/gradient_vs_sgd_1.png}
        \onslide<2>\includegraphics[width=\columnwidth]{figs/gradient_vs_sgd_2.png}
        \onslide<3>\includegraphics[width=\columnwidth]{figs/gradient_vs_sgd_3.png}
        \onslide<4>\includegraphics[width=\columnwidth]{figs/gradient_vs_sgd_4.png}
        \onslide<5>\includegraphics[width=\columnwidth]{figs/gradient_vs_sgd_5.png}
      \end{overprint}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \centering
  \begin{tikzpicture}
    \draw (0,0) node[align=center] {Overparameterization works\\\includegraphics[width=3cm]{figs/fit_overparamfit_abs2.pdf}};
    \draw (4,2) node[align=center] {Gradient descent\\implicitly regularizes\\\includegraphics[height=2cm]{figs/fit_gradient_5.pdf}};
    \draw (-4,2) node[align=center] {Neural networks\\are good bases\\\includegraphics[height=2cm]{figs/fit_basis_abs.pdf}};
    \draw (-4,-2) node[align=center] {Data is sparse\\and high-dimensional\\\includegraphics[height=2cm]{figs/fit_data_abs2.pdf}};
    \draw (4,-2) node[align=center] {SGD finds\\high-entropy solutions\\\includegraphics[height=2cm]{figs/gradient_vs_sgd_4.png}};
  \end{tikzpicture}
\end{frame}

\begin{frame}
  \frametitle{Machine learning is just \emph{non}linear least squares}
  \begin{columns}
    \begin{column}{0.5\textwidth}
      Structure and geometry of manifold of "perfect" solutions integral to understanding overparameterized fits

      \medskip

      \textbf{BUT:} extremely little is known outside of the linear case
    \end{column}
    \begin{column}{0.5\textwidth}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \frametitle{The Euler characteristic \boldmath{$\chi$}}
  \begin{columns}
    \begin{column}{0.5\textwidth}
      The Euler characteristic $\chi(\Omega)$ is a topological invariant of a manifold $\Omega$

      \medskip

      Defined by tiling the manifold, then taking the alternating sum
      \begin{align*}
        \chi(\Omega_{\text{cow}})
        &=
        {\only<2,5->{\color{Red}}\#_\text{vertices}}
        &&\hspace{-1em}-
        {\only<3,5->{\color{ictpgreen}}\#_\text{edges}}
        &&\hspace{-1em}+
        {\only<4,5->{\color{ictpblue}}\#_\text{faces}}
        \\
        &\color{White}\only<2->{\color{Black}}=
        {\only<2,5->{\color{Red}}2904}
        &&\hspace{-1em}\color{White}\only<3->{\color{Black}}-
        {\only<3,5->{\color{ictpgreen}}8706}
        &&\hspace{-1em}\color{White}\only<4->{\color{Black}}+
        {\only<4,5->{\color{ictpblue}}5804} \\
        &\color{White}\only<5->{\color{Black}}=2
      \end{align*}
      \[
        \color{White}\only<6->{\color{Black}}\chi(\Omega_\text{football})
        ={\only<6->{\color{Red}}60}-{\only<6->{\color{ictpgreen}}90}+{\only<6->{\color{ictpblue}}32}=2
      \]

      \color{White}\only<7>{\color{Black}}Cow is homeomorphic to a sphere
    \end{column}
    \begin{column}{0.5\textwidth}
      \begin{overprint}
        \onslide<1,5>\includegraphics[width=\textwidth]{figs/cow.png}
        \onslide<2>\includegraphics[width=\textwidth]{figs/cow_vert.png}
        \onslide<3>\includegraphics[width=\textwidth]{figs/cow_edge.png}
        \onslide<4>\includegraphics[width=\textwidth]{figs/cow_face.png}
        \onslide<6->\hspace{2em}\includegraphics{figs/Football_Pallo_valmiina-cropped.jpg}
      \end{overprint}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \frametitle{Characteristics of the Euler characteristic }

  \begin{columns}
    \begin{column}{0.5\textwidth}
      For closed, connected 2-dimensional manifolds, related to genus $g$ by
      $\chi=2-2g$

      \medskip

      General properties:
      \vspace{-0.5em}
      \[
        \chi(\Omega)=0 \text{ for odd-dimensional $\Omega$}
      \]
      \vspace{-1.6em}
      \[
        \chi(S^D)=2\text{ for even }D
      \]
      \[
        \chi(\Omega_1\sqcup\Omega_2)=\chi(\Omega_1)+\chi(\Omega_2)
      \]
      \[
        \chi(\Omega_1\times\Omega_2)=\chi(\Omega_1)\times\chi(\Omega_2)
      \]

      \smallskip

      Examples:
      \vspace{-0.5em}
      \[\chi(M\text{ even-$D$ spheres})=2M\]
      \vspace{-1.6em}
      \[\chi(S^1\times\text{anything})=0\]
    \end{column}
    \begin{column}{0.5\textwidth}
      \includegraphics[width=\textwidth]{figs/genus.png}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \begin{columns}
    \begin{column}{0.16\textwidth}
      \Large
      \textbf{\color{ictpblue}\boldmath{$\chi$} for constant energy level sets}
      \vspace{11em}
    \end{column}
    \begin{column}{0.7\textwidth}
      \begin{overprint}
        \onslide<1>\centering\rotatebox{90}{\includegraphics[height=\textwidth]{figs/Stillinger-0.png}}
        \onslide<2>\centering\rotatebox{90}{\includegraphics[height=\textwidth]{figs/Stillinger-1.png}}
        \onslide<3>\centering\rotatebox{90}{\includegraphics[height=\textwidth]{figs/Stillinger-2.png}}
      \end{overprint}
    \end{column}
    \begin{column}{0.16\textwidth}
      \begin{overprint}
        \onslide<2>\centering High energy

        \vspace{0.5em}

        $\chi(\Omega)\ll0$

        \vspace{0.5em}

        hole\\
        dominated
        \onslide<3>\centering Low energy

        \vspace{0.5em}

        $\chi(\Omega)\gg0$

        \vspace{0.5em}

        component\\
        dominated
      \end{overprint}
      \vspace{15em}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \frametitle{Computing the Euler characteristic}
  \begin{columns}
    \begin{column}{0.5\textwidth}
      Morse theory: gradient flow on an arbitrary ``height'' function $h$ makes a complex
      \begin{align*}
        \chi(\Omega)
        &=
        {\only<2,5>{\color{Red}}\#_\text{vertices}}
        -
        {\only<3,5>{\color{ictpgreen}}\#_\text{edges}}
        +
        {\only<4,5>{\color{ictpblue}}\#_\text{faces}}
        +\cdots \\
        &=
        {\only<6>{\color{ictpblue}}\#_\text{index 0}}
        -
        {\only<6>{\color{ictpgreen}}\#_\text{index 1}}
        +
        {\only<6>{\color{Red}}\#_\text{index 2}}
        +\cdots \\
        &=\sum_{i=0}^D(-1)^i\#_\text{index i}
      \end{align*}
      \[
        \hspace{-1em}\operatorname{sgn}\big(\det\operatorname{Hess}(\pmb x)\big)
        =
        \operatorname{sgn}\left(\prod_{i=1}^D\lambda_i\right)
        =(-1)^{\text{index}}
      \]
      \[
        \chi(\Omega)
        =\int_\Omega d\pmb x\,\delta\big(\nabla h(\pmb x)\big)
        \,\det\operatorname{Hess}h(\pmb x)
      \]
      \emph{Kac--Rice without the absolute value!}
    \end{column}
    \begin{column}{0.5\textwidth}
      \begin{overprint}
        \onslide<1>\includegraphics[width=\textwidth]{figs/other_sphere.png}
        \onslide<2>\includegraphics[width=\textwidth]{figs/other_sphere_vert.png}
        \onslide<3>\includegraphics[width=\textwidth]{figs/other_sphere_edge.png}
        \onslide<4>\includegraphics[width=\textwidth]{figs/other_sphere_face.png}
        \onslide<5>\includegraphics[width=\textwidth]{figs/other_sphere_all.png}
        \onslide<6>\includegraphics[width=\textwidth]{figs/other_sphere_crit.png}
      \end{overprint}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \frametitle{Computing the Euler characteristic of level sets}

  \begin{columns}
    \begin{column}{0.6\textwidth}
      Pick whatever height function $h:\Omega\to\mathbb R$ you like: $h(\pmb x)=\frac1N\pmb
      x_0\cdot\pmb x$ for arbitrary $\pmb x_0$.
      \[
        \chi(\Omega)
        =\int_\Omega d\pmb x\,\delta\big(\nabla h(\pmb x)\big)\,\det\operatorname{Hess}h(\pmb x)
      \]
      Level set $\Omega$ defined by $H(\pmb x)=EN$ and $\|\pmb x\|^2=N$

      \bigskip

      Lagrange multipliers replace differential geometry:
      \[
        L(\pmb x,\pmb\omega)=h(\pmb x)+\omega_0(\|\pmb x\|^2-N)+\omega_1(H(\pmb x)-EN)
      \]
    \end{column}
    \begin{column}{0.4\textwidth}
      \begin{overprint}
        \onslide<1>\includegraphics[width=\textwidth]{figs/function-0.png}
        \onslide<2>\includegraphics[width=\textwidth]{figs/function-1.png}
        \onslide<3>\includegraphics[width=\textwidth]{figs/function-2.png}
      \end{overprint}
    \end{column}
  \end{columns}
      \[
        \chi(\Omega)
        =\int_{\mathbb R^{N+2}} d\pmb x\,d\pmb\omega\,\delta\big(\begin{bmatrix}\frac{\partial L}{\partial\pmb x}&\frac{\partial L}{\partial\pmb\omega}\end{bmatrix}\big)
        \,\det\begin{bmatrix}\frac{\partial^2L}{\partial\pmb x^2}&\frac{\partial^2L}{\partial\pmb x\partial\pmb\omega}\\\frac{\partial^2L}{\partial\pmb x\partial\pmb\omega}&\frac{\partial^2L}{\partial\pmb\omega^2}\end{bmatrix}
      \]
\end{frame}

\begin{frame}
  \begin{columns}
    \begin{column}{\textwidth}
      \includegraphics[width=\textwidth]{figs/slice.png}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \frametitle{Results: \boldmath{$3+s$} mixed spherical models}
  \begin{columns}
    \begin{column}{0.5\textwidth}
      \begin{align*}
        H(\pmb x)=\lambda_s\sum_{i_1,i_2,i_3}^NJ_{i_1,i_2,i_3}x_{i_1}x_{i_2}x_{i_3} \hspace{4em} \\
        +(1-\lambda_s)\sum_{i_1,\ldots,i_s}^NJ_{i_1,\ldots,i_s}x_{i_1}\cdots x_{i_s}
      \end{align*}

      \textcolor{mb}{\textbf{\boldmath{$E_\text{gs}$:}} ground state, energy of lowest minima}

      \smallskip

      \textcolor{mg}{\textbf{\boldmath{$E_\text{alg}$:}} algorithmic bound, set by OGP}

      \smallskip

      \textcolor{my}{\textbf{\boldmath{$E_\text{th}$:}} `threshold', marginal minima dominate}

      \smallskip

      \textcolor{mr}{\textbf{\boldmath{$E_\text{sh}$:}} `shattering', $\chi$ changes sign}

      \bigskip

      \tiny
      \fullcite{Kent-Dobias_2024_On}
    \end{column}
    \begin{column}{0.5\textwidth}
      \includegraphics[width=\textwidth]{figs/folena_new.pdf}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \frametitle{Preliminary results: other models?}
  \begin{columns}
    \begin{column}{0.5\textwidth}
      Example: non-Gaussian landscapes
      \[
        H(\pmb x)=\frac12\sum_{i=1}^{\alpha N}V_i(\pmb x)^2
      \]
      for spherical $\pmb x$ and Gaussian functions $V_i$

      \medskip

      $E_\text{sh}$ consistent with gradient descent? More work needed...

      \bigskip\tiny

      \fullcite{Kent-Dobias_2024_Algorithm-independent}

      \smallskip

      \fullcite{Kent-Dobias_2024_On}
    \end{column}
    \begin{column}{0.5\textwidth}
        \vspace{-1em}

        \begin{overprint}
          \onslide<1>\includegraphics[width=\textwidth]{figs/most_squares_nonzoom.pdf}
          \onslide<2>\includegraphics[width=\textwidth]{figs/extrapolation.pdf}
        \end{overprint}

        \vspace{-0.4em}

        \includegraphics[width=\textwidth]{figs/most_squares_zoom_2.pdf}

        \vspace{1em}
    \end{column}
  \end{columns}
\end{frame}

\begin{frame}
  \frametitle{Outlook, other applications, future directions}
  \begin{columns}
    \begin{column}{0.5\textwidth}
      Euler characteristic reveals structure of problems with no energy function:\\ e.g., the set of $\pmb x$ such that
      \[
        V_i(\pmb x)=\sqrt NV_0 \qquad i=1,\ldots,\alpha N
      \]
      for independent Gaussian $V_i$

      \medskip

      \tiny
      \fullcite{Kent-Dobias_2024_On}

      \bigskip\normalsize

      \textcolor{ictpgreen}{\textbf{To Do:}}

      Resolve GD question: better DMFT, direct reasoning for relationship to topology

      \medskip

      Extend topological arguments beyond GD
    \end{column}
    \begin{column}{0.5\textwidth}
      \includegraphics[width=\textwidth]{figs/spheres.png}

      \medskip

      \includegraphics[width=\textwidth]{figs/phases.png}
    \end{column}
  \end{columns}
\end{frame}

\end{document}