\documentclass[aspectratio=169,usenames,dvipsnames]{beamer} \setbeamerfont{title}{family=\bf} \setbeamerfont{frametitle}{family=\bf} \setbeamerfont{normal text}{family=\rm} \setbeamertemplate{navigation symbols}{} \setbeamercolor{titlelike}{parent=structure,fg=cyan} \usepackage{enumitem} \usepackage[utf8]{inputenc} \usepackage[T1]{fontenc} \usepackage{pifont} \usepackage{graphicx} \usepackage{xcolor} \usepackage{tikz} \definecolor{ictpblue}{HTML}{0471b9} \definecolor{ictpgreen}{HTML}{0c8636} \definecolor{mb}{HTML}{5e81b5} \definecolor{my}{HTML}{e19c24} \definecolor{mg}{HTML}{8fb032} \definecolor{mr}{HTML}{eb6235} \setbeamercolor{titlelike}{parent=structure,fg=ictpblue} \setbeamercolor{itemize item}{fg=ictpblue} \usepackage[ style=phys, eprint=true, maxnames = 100, terseinits=true ]{biblatex} \addbibresource{ictp-saifr_colloquium.bib} \title{ Fitting with more parameters than data points\\\normalsize Topology of the solutions to overparameterized problems } \author{\textbf{Jaron Kent-Dobias}\\Simons--FAPESP Young Investigator} \date{19 February 2025} \begin{document} \begin{frame} \maketitle \vspace{-6pc} \begin{minipage}[c]{10pc} \centering \includegraphics[height=6pc]{figs/ift-unesp.png} \end{minipage} \hfill\begin{minipage}[c]{10pc} \centering \includegraphics[height=6pc]{figs/logo-ictp-saifr.jpg} \end{minipage} \vspace{2pc} \end{frame} \begin{frame} \frametitle{Linear least squares} \framesubtitle{The bad, the good, and the ugly} \begin{columns} \begin{column}{0.4\textwidth} You have $M$ data points $(x_1,y_1),\ldots,(x_M,y_M)$ \bigskip Perhaps a noisy sample of a ground truth function $y_i=f(x_i)+\xi$ \end{column} \begin{column}{0.6\textwidth} \begin{overprint} \onslide<1>\includegraphics[width=\columnwidth]{figs/fit_data.pdf} \onslide<2>\includegraphics[width=\columnwidth]{figs/fit_data_truth.pdf} \onslide<3>\includegraphics[width=\columnwidth]{figs/fit_data.pdf} \end{overprint} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Linear least squares} \framesubtitle{The bad, the good, and the ugly} \begin{columns} \begin{column}{0.4\textwidth} Pick a basis of $N$ functions \[b_1(x), \ldots, b_N(x)\] \smallskip Approximate the ground truth \[ \hat f(x\mid a_1,\ldots, a_N)=\sum_{j=1}^Na_jb_j(x) \] Find $\pmb a=[a_1, \ldots, a_N]$ minimizing \[ \chi^2(\pmb a\mid\pmb x,\pmb y) =\sum_{i=1}^M\left(y_i-\hat f(x_i\mid\pmb a)\right)^2 \] \end{column} \begin{column}{0.6\textwidth} \begin{overprint} \onslide<1>\includegraphics[width=\columnwidth]{figs/fit_basis_poly.pdf} \end{overprint} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Linear least squares} \framesubtitle{The bad, the good, and the ugly} \begin{columns} \begin{column}{0.333\textwidth} \centering $M=40$, $N=2$ \bigskip \includegraphics[width=\columnwidth]{figs/fit_underfit_poly.pdf} \bigskip Underfit \smallskip Too few parameters \smallskip $\chi^2$ is large \smallskip Best fit is \emph{biased} \end{column} \begin{column}{0.333\textwidth} \centering $M=40$, $N=7$ \bigskip \includegraphics[width=\columnwidth]{figs/fit_goodfit_poly.pdf} \bigskip Good fit! \smallskip Right number of parameters \smallskip $\chi^2$ is moderate \smallskip \vphantom{Best fit} \end{column} \begin{column}{0.333\textwidth} \centering $M=40$, $N=40$ \bigskip \includegraphics[width=\columnwidth]{figs/fit_overfit_poly.pdf} \bigskip Overfit \smallskip Too many parameters \smallskip $\chi^2$ is zero \smallskip Best fit has \emph{high variance} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Linear least squares} \framesubtitle{The bad, the good, and the ugly} \begin{columns} \begin{column}{0.5\textwidth} Knowing the ground truth, fit error is \[ \text{MSE}=\int dx\left(f(x)-\hat f(x\mid\pmb a)\right)^2 \] \smallskip Trade-off between \emph{bias} and \emph{variance}: \begin{itemize} \item \textbf{Bias} reflects missing qualitative features of the data \item \textbf{Variance} reflects strong dependence on the noise \end{itemize} \end{column} \begin{column}{0.5\textwidth} \includegraphics[width=\columnwidth]{figs/fit_bias-variance_poly.pdf} \medskip \includegraphics[width=0.32\columnwidth]{figs/fit_underfit_poly.pdf} \hfill \includegraphics[width=0.32\columnwidth]{figs/fit_goodfit_poly.pdf} \hfill \includegraphics[width=0.32\columnwidth]{figs/fit_overfit_poly.pdf} \smallskip\small \hspace{2em}$N=2$ \hfill $N=7$ \hfill $N=40$ \hspace{1.2em} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Machine learning is just \emph{non}linear least squares} \begin{columns} \begin{column}{0.9\textwidth} Number of data points $M$ is big: all images on the internet \medskip Ground truth function is unknown: Probability the image contains a cat \medskip Fit function is a neural network: \[ \hat f(\pmb x\mid B_1,\ldots B_L)=\sigma\left(B_L \sigma\left( B_{L-1}\cdots\sigma\left(B_2\sigma (B_1\pmb x)\right)\cdots\right)\right) \] \medskip $\chi^2(\pmb a\mid\text{data})$ is called the \emph{cost} or \emph{loss function} \medskip $\chi^2(\pmb a^*\mid\text{data})$ is called the \emph{training error} \medskip MSE is called the \emph{test} or \emph{generalization error} \bigskip \textbf{BUT:} machine learning uses many more parameters $N$ than data points $M$ \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Linear least squares} \framesubtitle{The bad, the good, the ugly, and the weird} \begin{columns} \begin{column}{0.25\textwidth} \centering $M=40$, $N=2$ \bigskip \includegraphics[width=\columnwidth]{figs/fit_underfit_poly.pdf} \bigskip Underfit \smallskip $\chi^2$ is large \smallskip Best fit has \emph{high bias} \phantom{variance} \end{column} \begin{column}{0.25\textwidth} \centering $M=40$, $N=7$ \bigskip \includegraphics[width=\columnwidth]{figs/fit_goodfit_poly.pdf} \bigskip Good fit! \smallskip $\chi^2$ is moderate \smallskip Best fit has \emph{low variance} and \emph{low bias} \end{column} \begin{column}{0.25\textwidth} \centering $M=40$, $N=40$ \bigskip \includegraphics[width=\columnwidth]{figs/fit_overfit_poly.pdf} \bigskip Overfit \smallskip $\chi^2$ is zero \smallskip Best fit has \emph{high variance} \end{column} \begin{column}{0.25\textwidth} \centering $M=40$, $N=80$ \bigskip \includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly.pdf} \bigskip Good fit? \smallskip $\chi^2$ is zero \smallskip Best fit has \emph{low variance} and \emph{low bias} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Linear least squares} \framesubtitle{The bad, the good, the ugly, and the weird} \begin{columns} \begin{column}{0.5\textwidth} \centering $M=40$, $N=80$ \bigskip \includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly.pdf} \end{column} \begin{column}{0.5\textwidth} \centering \includegraphics[width=\textwidth]{figs/fit_bias-variance2_poly.pdf} \bigskip Bias--variance trade-off is blown up! \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Linear least squares} \framesubtitle{Overparameterized solutions are not unique} \begin{columns} \begin{column}{0.5\textwidth} Underparameterized fitting ($M>N$) has a unique minimizing solution \medskip Overparameterized fits are not unique: $\chi^2=0$ gives $M$ constraints \[ 0=y_i-\hat f(x_i\mid\pmb a)\qquad\text{for all $1\leq i\leq M$} \] with $N$ unknowns $\pmb a=[a_1,\ldots, a_N]$ gives a manifold of $N-M$ dimensions \medskip What leads to the `good' solutions instead of `bad' ones? \end{column} \begin{column}{0.5\textwidth} \centering $M=40$, $N=80$ \bigskip \begin{overprint} \onslide<1>\includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly.pdf} \onslide<2>\includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly_1.pdf} \onslide<3>\includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly_2.pdf} \onslide<4>\includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly_3.pdf} \onslide<5>\includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly_6.pdf} \onslide<6>\includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly_7.pdf} \onslide<7>\includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly_8.pdf} \onslide<8>\includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly_9.pdf} \end{overprint} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Linear least squares} \framesubtitle{Gradient descent and implicit regularization} \begin{columns} \begin{column}{0.5\textwidth} Overparameterized fits found with gradient descent algorithm: take small steps in the direction $\nabla\chi^2$ until $\|\nabla\chi^2\|<\epsilon$ \medskip Result of descent depends on initial condition: what $\pmb a$ to start with? \medskip \textbf{Unexpected fact:} gradient descent with small initialization equivalent to unique optimum in \emph{regularized} problem: \[ \chi^2_\text{eff}(\pmb a\mid\text{data})=\chi^2(\pmb a\mid\text{data})+\lambda\|\pmb a\|^2 \] \end{column} \begin{column}{0.5\textwidth} \begin{overprint} \onslide<1>\includegraphics[width=\columnwidth]{figs/fit_gradient_1.pdf} \onslide<2>\includegraphics[width=\columnwidth]{figs/fit_gradient_2.pdf} \onslide<3>\includegraphics[width=\columnwidth]{figs/fit_gradient_3.pdf} \onslide<4>\includegraphics[width=\columnwidth]{figs/fit_gradient_4.pdf} \onslide<5>\includegraphics[width=\columnwidth]{figs/fit_gradient_5.pdf} \onslide<6>\includegraphics[width=\columnwidth]{figs/fit_gradient_6.pdf} \end{overprint} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Linear least squares} \framesubtitle{Choice of basis} \begin{columns} \begin{column}{0.5\textwidth} \centering \textbf{Polynomial basis} \bigskip \includegraphics[width=\columnwidth]{figs/fit_basis_poly.pdf} \end{column} \begin{column}{0.5\textwidth} \centering \textbf{Absolute value basis} \bigskip \includegraphics[width=\columnwidth]{figs/fit_basis_abs.pdf} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Linear least squares} \framesubtitle{Choice of basis} \centering \Large\textbf{Polynomial basis}\normalsize \bigskip \begin{columns} \begin{column}{0.25\textwidth} \centering $M=40$, $N=2$ \bigskip \includegraphics[width=\columnwidth]{figs/fit_underfit_poly.pdf} \bigskip Underfit \smallskip $\chi^2$ is large \smallskip Best fit has \emph{high bias} \phantom{variance} \end{column} \begin{column}{0.25\textwidth} \centering $M=40$, $N=7$ \bigskip \includegraphics[width=\columnwidth]{figs/fit_goodfit_poly.pdf} \bigskip Good fit! \smallskip $\chi^2$ is moderate \smallskip Best fit has \emph{low variance} and \emph{low bias} \end{column} \begin{column}{0.25\textwidth} \centering $M=40$, $N=40$ \bigskip \includegraphics[width=\columnwidth]{figs/fit_overfit_poly.pdf} \bigskip Overfit \smallskip $\chi^2$ is zero \smallskip Best fit has \emph{high variance} \end{column} \begin{column}{0.25\textwidth} \centering $M=40$, $N=80$ \bigskip \includegraphics[width=\columnwidth]{figs/fit_overparamfit_poly.pdf} \bigskip Good fit? \smallskip $\chi^2$ is zero \smallskip Best fit has \emph{low variance} and \emph{low bias} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Linear least squares} \framesubtitle{Choice of basis} \centering \Large\textbf{Absolute value basis\vphantom{y}}\normalsize \bigskip \begin{columns} \begin{column}{0.25\textwidth} \centering $M=40$, $N=2$ \bigskip \includegraphics[width=\columnwidth]{figs/fit_underfit_abs.pdf} \bigskip Underfit \smallskip $\chi^2$ is large \smallskip Best fit has \emph{high bias} \phantom{variance} \end{column} \begin{column}{0.25\textwidth} \centering $M=40$, $N=7$ \bigskip \includegraphics[width=\columnwidth]{figs/fit_goodfit_abs.pdf} \bigskip Good fit! \smallskip $\chi^2$ is moderate \smallskip Best fit has \emph{low variance} and \emph{low bias} \end{column} \begin{column}{0.25\textwidth} \centering $M=40$, $N=40$ \bigskip \includegraphics[width=\columnwidth]{figs/fit_overfit_abs.pdf} \bigskip Good fit? \smallskip $\chi^2$ is zero \smallskip Best fit has \emph{low variance} and \emph{low bias} \end{column} \begin{column}{0.25\textwidth} \centering $M=40$, $N=80$ \bigskip \includegraphics[width=\columnwidth]{figs/fit_overparamfit_abs.pdf} \bigskip Good fit? \smallskip $\chi^2$ is zero \smallskip Best fit has \emph{low variance} and \emph{low bias} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Linear least squares} \framesubtitle{Choice of basis} \begin{columns} \begin{column}{0.5\textwidth} \centering \textbf{Polynomial basis} \bigskip \includegraphics[width=\columnwidth]{figs/fit_bias-variance2_poly.pdf} \end{column} \begin{column}{0.5\textwidth} \centering \textbf{Absolute value basis} \bigskip \includegraphics[width=\columnwidth]{figs/fit_bias-variance_abs.pdf} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Linear least squares} \framesubtitle{Sparseness and level of noise} \begin{columns} \begin{column}{0.5\textwidth} \includegraphics[width=\columnwidth]{figs/fit_data.pdf} \end{column} \begin{column}{0.5\textwidth} \includegraphics[width=\columnwidth]{figs/fit_data_abs2.pdf} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Linear least squares} \framesubtitle{Sparseness and level of noise} \begin{columns} \begin{column}{0.25\textwidth} \centering $M=10$, $N=4$ \bigskip \includegraphics[width=\columnwidth]{figs/fit_underfit_abs2.pdf} \bigskip Underfit \smallskip $\chi^2$ is large \smallskip Best fit has \emph{high bias} \phantom{variance} \end{column} \begin{column}{0.25\textwidth} \centering $M=10$, $N=6$ \bigskip \includegraphics[width=\columnwidth]{figs/fit_goodfit_abs2.pdf} \bigskip Good fit! \smallskip $\chi^2$ is moderate \smallskip Best fit has \emph{low variance} and \emph{low bias} \end{column} \begin{column}{0.25\textwidth} \centering $M=10$, $N=10$ \bigskip \includegraphics[width=\columnwidth]{figs/fit_overfit_abs2.pdf} \bigskip Good fit? \smallskip $\chi^2$ is zero \smallskip Best fit has \emph{low variance} and \emph{low bias} \end{column} \begin{column}{0.25\textwidth} \centering $M=10$, $N=15$ \bigskip \includegraphics[width=\columnwidth]{figs/fit_overparamfit_abs2.pdf} \bigskip Better fit?! \smallskip $\chi^2$ is zero \smallskip Best fit has \emph{low variance} and \emph{low bias} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Linear least squares} \framesubtitle{Sparseness and level of noise} \begin{columns} \begin{column}{0.5\textwidth} \centering $M=10$, $N=15$ \bigskip \includegraphics[width=\columnwidth]{figs/fit_overparamfit_abs2.pdf} \end{column} \begin{column}{0.5\textwidth} \includegraphics[width=\columnwidth]{figs/fit_bias-variance_abs2.pdf} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Machine learning is just \emph{non}linear least squares} \begin{columns} \begin{column}{0.5\textwidth} Gradient descent produces poor solutions to many machine learning problems \bigskip \textbf{BUT:} no one uses gradient descent \bigskip \emph{Stochastic} gradient descent (SGD): follow approximated gradient of $\chi^2$ calculated using small subsets (batches) of the data \bigskip Approximated gradient takes \emph{fewer} steps to find \emph{better} solutions \end{column} \begin{column}{0.5\textwidth} \end{column} \end{columns} \end{frame} \begin{frame} \begin{columns} \begin{column}{0.9\textwidth} \begin{overprint} \onslide<1>\includegraphics[width=\columnwidth]{figs/gradient_vs_sgd_1.png} \onslide<2>\includegraphics[width=\columnwidth]{figs/gradient_vs_sgd_2.png} \onslide<3>\includegraphics[width=\columnwidth]{figs/gradient_vs_sgd_3.png} \onslide<4>\includegraphics[width=\columnwidth]{figs/gradient_vs_sgd_4.png} \onslide<5>\includegraphics[width=\columnwidth]{figs/gradient_vs_sgd_5.png} \end{overprint} \end{column} \end{columns} \end{frame} \begin{frame} \centering \begin{tikzpicture} \draw (0,0) node[align=center] {Overparameterization works\\\includegraphics[width=3cm]{figs/fit_overparamfit_abs2.pdf}}; \draw (4,2) node[align=center] {Gradient descent\\implicitly regularizes\\\includegraphics[height=2cm]{figs/fit_gradient_5.pdf}}; \draw (-4,2) node[align=center] {Neural networks\\are good bases\\\includegraphics[height=2cm]{figs/fit_basis_abs.pdf}}; \draw (-4,-2) node[align=center] {Data is sparse\\and high-dimensional\\\includegraphics[height=2cm]{figs/fit_data_abs2.pdf}}; \draw (4,-2) node[align=center] {SGD finds\\high-entropy solutions\\\includegraphics[height=2cm]{figs/gradient_vs_sgd_4.png}}; \end{tikzpicture} \end{frame} \begin{frame} \frametitle{Machine learning is just \emph{non}linear least squares} \begin{columns} \begin{column}{0.5\textwidth} Structure and geometry of manifold of "perfect" solutions integral to understanding overparameterized fits \medskip \textbf{BUT:} extremely little is known outside of the linear case \end{column} \begin{column}{0.5\textwidth} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{The Euler characteristic \boldmath{$\chi$}} \begin{columns} \begin{column}{0.5\textwidth} The Euler characteristic $\chi(\Omega)$ is a topological invariant of a manifold $\Omega$ \medskip Defined by tiling the manifold, then taking the alternating sum \begin{align*} \chi(\Omega_{\text{cow}}) &= {\only<2,5->{\color{Red}}\#_\text{vertices}} &&\hspace{-1em}- {\only<3,5->{\color{ictpgreen}}\#_\text{edges}} &&\hspace{-1em}+ {\only<4,5->{\color{ictpblue}}\#_\text{faces}} \\ &\color{White}\only<2->{\color{Black}}= {\only<2,5->{\color{Red}}2904} &&\hspace{-1em}\color{White}\only<3->{\color{Black}}- {\only<3,5->{\color{ictpgreen}}8706} &&\hspace{-1em}\color{White}\only<4->{\color{Black}}+ {\only<4,5->{\color{ictpblue}}5804} \\ &\color{White}\only<5->{\color{Black}}=2 \end{align*} \[ \color{White}\only<6->{\color{Black}}\chi(\Omega_\text{football}) ={\only<6->{\color{Red}}60}-{\only<6->{\color{ictpgreen}}90}+{\only<6->{\color{ictpblue}}32}=2 \] \color{White}\only<7>{\color{Black}}Cow is homeomorphic to a sphere \end{column} \begin{column}{0.5\textwidth} \begin{overprint} \onslide<1,5>\includegraphics[width=\textwidth]{figs/cow.png} \onslide<2>\includegraphics[width=\textwidth]{figs/cow_vert.png} \onslide<3>\includegraphics[width=\textwidth]{figs/cow_edge.png} \onslide<4>\includegraphics[width=\textwidth]{figs/cow_face.png} \onslide<6->\hspace{2em}\includegraphics{figs/Football_Pallo_valmiina-cropped.jpg} \end{overprint} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Characteristics of the Euler characteristic } \begin{columns} \begin{column}{0.5\textwidth} For closed, connected 2-dimensional manifolds, related to genus $g$ by $\chi=2-2g$ \medskip General properties: \vspace{-0.5em} \[ \chi(\Omega)=0 \text{ for odd-dimensional $\Omega$} \] \vspace{-1.6em} \[ \chi(S^D)=2\text{ for even }D \] \[ \chi(\Omega_1\sqcup\Omega_2)=\chi(\Omega_1)+\chi(\Omega_2) \] \[ \chi(\Omega_1\times\Omega_2)=\chi(\Omega_1)\times\chi(\Omega_2) \] \smallskip Examples: \vspace{-0.5em} \[\chi(M\text{ even-$D$ spheres})=2M\] \vspace{-1.6em} \[\chi(S^1\times\text{anything})=0\] \end{column} \begin{column}{0.5\textwidth} \includegraphics[width=\textwidth]{figs/genus.png} \end{column} \end{columns} \end{frame} \begin{frame} \begin{columns} \begin{column}{0.16\textwidth} \Large \textbf{\color{ictpblue}\boldmath{$\chi$} for constant energy level sets} \vspace{11em} \end{column} \begin{column}{0.7\textwidth} \begin{overprint} \onslide<1>\centering\rotatebox{90}{\includegraphics[height=\textwidth]{figs/Stillinger-0.png}} \onslide<2>\centering\rotatebox{90}{\includegraphics[height=\textwidth]{figs/Stillinger-1.png}} \onslide<3>\centering\rotatebox{90}{\includegraphics[height=\textwidth]{figs/Stillinger-2.png}} \end{overprint} \end{column} \begin{column}{0.16\textwidth} \begin{overprint} \onslide<2>\centering High energy \vspace{0.5em} $\chi(\Omega)\ll0$ \vspace{0.5em} hole\\ dominated \onslide<3>\centering Low energy \vspace{0.5em} $\chi(\Omega)\gg0$ \vspace{0.5em} component\\ dominated \end{overprint} \vspace{15em} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Computing the Euler characteristic} \begin{columns} \begin{column}{0.5\textwidth} Morse theory: gradient flow on an arbitrary ``height'' function $h$ makes a complex \begin{align*} \chi(\Omega) &= {\only<2,5>{\color{Red}}\#_\text{vertices}} - {\only<3,5>{\color{ictpgreen}}\#_\text{edges}} + {\only<4,5>{\color{ictpblue}}\#_\text{faces}} +\cdots \\ &= {\only<6>{\color{ictpblue}}\#_\text{index 0}} - {\only<6>{\color{ictpgreen}}\#_\text{index 1}} + {\only<6>{\color{Red}}\#_\text{index 2}} +\cdots \\ &=\sum_{i=0}^D(-1)^i\#_\text{index i} \end{align*} \[ \hspace{-1em}\operatorname{sgn}\big(\det\operatorname{Hess}(\pmb x)\big) = \operatorname{sgn}\left(\prod_{i=1}^D\lambda_i\right) =(-1)^{\text{index}} \] \[ \chi(\Omega) =\int_\Omega d\pmb x\,\delta\big(\nabla h(\pmb x)\big) \,\det\operatorname{Hess}h(\pmb x) \] \emph{Kac--Rice without the absolute value!} \end{column} \begin{column}{0.5\textwidth} \begin{overprint} \onslide<1>\includegraphics[width=\textwidth]{figs/other_sphere.png} \onslide<2>\includegraphics[width=\textwidth]{figs/other_sphere_vert.png} \onslide<3>\includegraphics[width=\textwidth]{figs/other_sphere_edge.png} \onslide<4>\includegraphics[width=\textwidth]{figs/other_sphere_face.png} \onslide<5>\includegraphics[width=\textwidth]{figs/other_sphere_all.png} \onslide<6>\includegraphics[width=\textwidth]{figs/other_sphere_crit.png} \end{overprint} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Computing the Euler characteristic of level sets} \begin{columns} \begin{column}{0.6\textwidth} Pick whatever height function $h:\Omega\to\mathbb R$ you like: $h(\pmb x)=\frac1N\pmb x_0\cdot\pmb x$ for arbitrary $\pmb x_0$. \[ \chi(\Omega) =\int_\Omega d\pmb x\,\delta\big(\nabla h(\pmb x)\big)\,\det\operatorname{Hess}h(\pmb x) \] Level set $\Omega$ defined by $H(\pmb x)=EN$ and $\|\pmb x\|^2=N$ \bigskip Lagrange multipliers replace differential geometry: \[ L(\pmb x,\pmb\omega)=h(\pmb x)+\omega_0(\|\pmb x\|^2-N)+\omega_1(H(\pmb x)-EN) \] \end{column} \begin{column}{0.4\textwidth} \begin{overprint} \onslide<1>\includegraphics[width=\textwidth]{figs/function-0.png} \onslide<2>\includegraphics[width=\textwidth]{figs/function-1.png} \onslide<3>\includegraphics[width=\textwidth]{figs/function-2.png} \end{overprint} \end{column} \end{columns} \[ \chi(\Omega) =\int_{\mathbb R^{N+2}} d\pmb x\,d\pmb\omega\,\delta\big(\begin{bmatrix}\frac{\partial L}{\partial\pmb x}&\frac{\partial L}{\partial\pmb\omega}\end{bmatrix}\big) \,\det\begin{bmatrix}\frac{\partial^2L}{\partial\pmb x^2}&\frac{\partial^2L}{\partial\pmb x\partial\pmb\omega}\\\frac{\partial^2L}{\partial\pmb x\partial\pmb\omega}&\frac{\partial^2L}{\partial\pmb\omega^2}\end{bmatrix} \] \end{frame} \begin{frame} \begin{columns} \begin{column}{\textwidth} \includegraphics[width=\textwidth]{figs/slice.png} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Results: \boldmath{$3+s$} mixed spherical models} \begin{columns} \begin{column}{0.5\textwidth} \begin{align*} H(\pmb x)=\lambda_s\sum_{i_1,i_2,i_3}^NJ_{i_1,i_2,i_3}x_{i_1}x_{i_2}x_{i_3} \hspace{4em} \\ +(1-\lambda_s)\sum_{i_1,\ldots,i_s}^NJ_{i_1,\ldots,i_s}x_{i_1}\cdots x_{i_s} \end{align*} \textcolor{mb}{\textbf{\boldmath{$E_\text{gs}$:}} ground state, energy of lowest minima} \smallskip \textcolor{mg}{\textbf{\boldmath{$E_\text{alg}$:}} algorithmic bound, set by OGP} \smallskip \textcolor{my}{\textbf{\boldmath{$E_\text{th}$:}} `threshold', marginal minima dominate} \smallskip \textcolor{mr}{\textbf{\boldmath{$E_\text{sh}$:}} `shattering', $\chi$ changes sign} \bigskip \tiny \fullcite{Kent-Dobias_2024_On} \end{column} \begin{column}{0.5\textwidth} \includegraphics[width=\textwidth]{figs/folena_new.pdf} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Preliminary results: other models?} \begin{columns} \begin{column}{0.5\textwidth} Example: non-Gaussian landscapes \[ H(\pmb x)=\frac12\sum_{i=1}^{\alpha N}V_i(\pmb x)^2 \] for spherical $\pmb x$ and Gaussian functions $V_i$ \medskip $E_\text{sh}$ consistent with gradient descent? More work needed... \bigskip\tiny \fullcite{Kent-Dobias_2024_Algorithm-independent} \smallskip \fullcite{Kent-Dobias_2024_On} \end{column} \begin{column}{0.5\textwidth} \vspace{-1em} \begin{overprint} \onslide<1>\includegraphics[width=\textwidth]{figs/most_squares_nonzoom.pdf} \onslide<2>\includegraphics[width=\textwidth]{figs/extrapolation.pdf} \end{overprint} \vspace{-0.4em} \includegraphics[width=\textwidth]{figs/most_squares_zoom_2.pdf} \vspace{1em} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Outlook, other applications, future directions} \begin{columns} \begin{column}{0.5\textwidth} Euler characteristic reveals structure of problems with no energy function:\\ e.g., the set of $\pmb x$ such that \[ V_i(\pmb x)=\sqrt NV_0 \qquad i=1,\ldots,\alpha N \] for independent Gaussian $V_i$ \medskip \tiny \fullcite{Kent-Dobias_2024_On} \bigskip\normalsize \textcolor{ictpgreen}{\textbf{To Do:}} Resolve GD question: better DMFT, direct reasoning for relationship to topology \medskip Extend topological arguments beyond GD \end{column} \begin{column}{0.5\textwidth} \includegraphics[width=\textwidth]{figs/spheres.png} \medskip \includegraphics[width=\textwidth]{figs/phases.png} \end{column} \end{columns} \end{frame} \end{document}