\documentclass[aspectratio=169,usenames,dvipsnames,fleqn]{beamer} \setbeamerfont{title}{family=\bf} \setbeamerfont{frametitle}{family=\bf} \setbeamerfont{normal text}{family=\rm} \setbeamertemplate{navigation symbols}{} \setbeamercolor{titlelike}{parent=structure,fg=cyan} \usepackage{enumitem} \usepackage[utf8]{inputenc} \usepackage[T1]{fontenc} \usepackage{pifont} \usepackage{graphicx} \usepackage{xcolor} \definecolor{ictpblue}{HTML}{0471b9} \definecolor{ictpgreen}{HTML}{0c8636} \definecolor{mb}{HTML}{5e81b5} \definecolor{my}{HTML}{e19c24} \definecolor{mg}{HTML}{8fb032} \definecolor{mr}{HTML}{eb6235} \setbeamercolor{titlelike}{parent=structure,fg=ictpblue} \setbeamercolor{itemize item}{fg=ictpblue} \usepackage[ style=phys, eprint=true, maxnames = 100, terseinits=true ]{biblatex} \addbibresource{ictp-saifr_colloquium.bib} \title{ Structural barriers to random optimization } \author{\textbf{Jaron Kent-Dobias}\\Simons--FAPESP Young Investigator} \date{19 February 2025} \begin{document} \begin{frame} \maketitle \vspace{-6pc} \begin{minipage}[c]{10pc} \centering \includegraphics[height=6pc]{figs/ift-unesp.png} \end{minipage} \hfill\begin{minipage}[c]{10pc} \centering \includegraphics[height=6pc]{figs/logo-ictp-saifr.jpg} \end{minipage} \vspace{2pc} \end{frame} \begin{frame} \frametitle{Overview} \begin{columns} \begin{column}{\textwidth} \huge \color{ictpgreen}{\textbf{Introduction}} \bigskip \color{ictpgreen}{\textbf{Complexity \& marginal complexity}} \bigskip \color{ictpgreen}{\textbf{Level set topology}} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Complexity of random landscapes} \begin{columns} \begin{column}{0.4\textwidth} Complexity $\Sigma=\frac1N\overline{\log\#_\text{points}}$ describes typical number of stationary points \bigskip Complexity of marginal minima crucial for understanding dynamics in mixed $p$-spin models \bigskip Lucky accident: natural parameter sets type of stationary point \end{column} \begin{column}{0.6\textwidth} \begin{overprint} \onslide<1>\includegraphics[width=\textwidth]{figs/folena_2020.png} \onslide<2>\includegraphics[width=\textwidth]{figs/folena_2020_2.png} \end{overprint} \smallskip \tiny\fullcite{Folena_2020_Rethinking} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{How to count: Kac--Rice} Number of stationary points with $\nabla H(\pmb x)=0$ given by integral over Kac--Rice measure \begin{align*} \#_\text{points} &=\int_\Omega d\pmb x\,\delta\big(\nabla H(\pmb x)\big)\,\big|\det\operatorname{Hess}H(\pmb x)\big| \end{align*} Note absolute value of the determinant: want to account for curvature but not add $-1$ \bigskip Can specify properties of points by inserting $\delta$-functions: \begin{align*} \#_\text{points}\alert<2>{(E)} &=\int_\Omega d\pmb x\,\delta\big(\nabla H(\pmb x)\big)\,\big|\det\operatorname{Hess}H(\pmb x)\big| \alert<2>{\,\delta\big(H(\pmb x)-NE\big)} \end{align*} How can \emph{marginality} be specified? \end{frame} \begin{frame} \frametitle{Hessian shifts and stationary point stability} \begin{columns} \begin{column}{0.5\textwidth} In spherical spin glasses, all points have the same Hessian spectral density: semicircle with radius $\mu_\text m$, but different shifts \[ \mu=\frac1N\operatorname{Tr}\operatorname{Hess} H(\pmb x) \] \bigskip Condition on marginal minima by inserting \[ \delta\big(\operatorname{Tr}\operatorname{Hess}H(\pmb x)-N\mu_\text{m}\big) \] \medskip \alert<7>{In generic models, spectral density depends on stationarity, energy, etc!} \end{column} \begin{column}{0.5\textwidth} \begin{overlayarea}{\textwidth}{14.5em} \only<1-2>{\includegraphics[width=\columnwidth]{figs/mu_0.75.pdf}\\\hphantom{ello}\includegraphics[width=0.8\columnwidth]{figs/land_0.75.pdf}} \only<3>{\includegraphics[width=\columnwidth]{figs/mu_1.5.pdf}\\\hphantom{ello}\includegraphics[width=0.8\columnwidth]{figs/land_1.5.pdf}} \only<4>{\includegraphics[width=\columnwidth]{figs/mu_2.25.pdf}\\\hphantom{ello}\includegraphics[width=0.8\columnwidth]{figs/land_2.25.pdf}} \only<5>{\includegraphics[width=\columnwidth]{figs/mu_3.5.pdf}\\\hphantom{ello}\includegraphics[width=0.8\columnwidth]{figs/land_3.5.pdf}} \only<6>{\includegraphics[width=\columnwidth]{figs/mu_2.pdf}\\\hphantom{ello}\includegraphics[width=0.8\columnwidth]{figs/land_2.pdf}} \only<7>{\includegraphics[width=0.9\columnwidth]{figs/msg_marg_spectra.pdf}} \end{overlayarea} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Towards generic marginal complexity} \begin{columns} \begin{column}{0.5\textwidth} \begin{itemize}[leftmargin=4em] \item[\color{ictpgreen}\bf Trick \#1:] condition stationary points on \emph{value of smallest eigenvalue} \end{itemize} { \small \begin{align*} \hspace{-3em}&\delta(\lambda_\text{min}(A)) \\ \hspace{-3em}&=\lim_{\beta\to\infty}\int \frac{d\pmb s\,\delta(N-\|\pmb s\|^2)e^{-\beta\pmb s^TA\pmb s}} {\int d\pmb s'\,\delta(N-\|\pmb s'\|^2)e^{-\beta\pmb s'^TA\pmb s'}} \delta\left(\frac{\pmb s^TA\pmb s}N\right) \end{align*} } \medskip \begin{itemize}[leftmargin=4em] \item[\color{ictpgreen}\bf Trick \#2:] adjust $\mu\propto\operatorname{Tr}\operatorname{Hess}H$ until order-$N$ large deviation breaks \end{itemize} \bigskip \tiny \fullcite{Kent-Dobias_2024_Conditioning} \end{column} \begin{column}{0.5\textwidth} \hspace{0.9em} \includegraphics[scale=0.8]{figs/spectrum_less.pdf} \hspace{-1.6em} \includegraphics[scale=0.8]{figs/spectrum_eq.pdf} \hspace{-1.6em} \includegraphics[scale=0.8]{figs/spectrum_more.pdf} \\ \includegraphics[scale=0.8]{figs/large_deviation.pdf} \vspace{-1em} \small \begin{align*} \hspace{-0.5em}G_0(\mu)=\frac 1N\log\Big\langle\delta\big(\lambda_\text{min}(A-\lambda\mu)\big)\Big\rangle_{A\in\text{GOE}(N)} \end{align*} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Marginal complexity: example} \begin{columns} \begin{column}{0.5\textwidth} Example: non-Gaussian landscapes \[ H(\pmb x)=\frac12\sum_{i=1}^{\alpha N}V_i(\pmb x)^2 \] for spherical $\pmb x$ and Gaussian functions $V_i$ \[ \overline{V_i(\pmb x)V_j(\pmb x')}=\delta_{ij}f\bigg(\frac{\pmb x\cdot\pmb x'}N\bigg) \] \vspace{-2em} \begin{overprint} \onslide<1-2>\[ f(q)=\tfrac12q^2+\tfrac12q^3 \] \onslide<3>\[ f(q)=\kappa q+(1-\kappa)q^2 \] \end{overprint} \bigskip \tiny \fullcite{Kent-Dobias_2024_Conditioning} \smallskip \fullcite{Kent-Dobias_2024_Algorithm-independent} \end{column} \begin{column}{0.5\textwidth} \begin{overprint} \onslide<1>\vspace{4em}\includegraphics[width=\textwidth]{figs/most_squares_complex.pdf} \onslide<2>\vspace{-1.75em}\includegraphics[width=\textwidth]{figs/most_squares_complexity.pdf} \vspace{-1.95em} \hspace{-0.25em}\colorbox{white}{\includegraphics[width=\textwidth]{figs/most_squares_stability.pdf}} \onslide<3>\vspace{-1em} \includegraphics[width=\textwidth]{figs/most_squares_nonzoom.pdf} \vspace{-0.4em} \includegraphics[width=\textwidth]{figs/most_squares_zoom.pdf} \vspace{1em} \end{overprint} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Which marginal minima attract the dynamics?} \begin{columns} \begin{column}{0.5\textwidth} `Best case' performance: lowest marginal minima without the \emph{Overlap Gap Property} \smallskip \tiny \fullcite{Gamarnik_2021-10_The} \normalsize \medskip `Worst case' performance: ??? \smallskip \tiny\fullcite{Folena_2023_On} \normalsize \medskip \textcolor{mb}{\textbf{\boldmath{$E_\text{gs}$:}} ground state, energy of lowest minima} \smallskip \textcolor{mg}{\textbf{\boldmath{$E_\text{alg}$:}} algorithmic bound, set by OGP} \smallskip \textcolor{my}{\textbf{\boldmath{$E_\text{th}$:}} `threshold', marginal minima dominate} \medskip Gradient descent destination depends on \emph{global} property: basin of attraction size; stationary point analysis is only \emph{local} \medskip \end{column} \begin{column}{0.5\textwidth} \begin{overprint} \onslide<1>\includegraphics[width=\textwidth]{figs/folena_2023.png} \onslide<2>\includegraphics[width=\textwidth]{figs/folena_new_2.pdf} \end{overprint} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{The Euler characteristic \boldmath{$\chi$}} \begin{columns} \begin{column}{0.5\textwidth} The Euler characteristic $\chi(\Omega)$ is a topological invariant of a manifold $\Omega$ \medskip Defined by tiling the manifold, then taking the alternating sum \begin{align*} \chi(\Omega_{\text{cow}}) &= {\only<2,5->{\color{Red}}\#_\text{vertices}} &&\hspace{-1em}- {\only<3,5->{\color{ictpgreen}}\#_\text{edges}} &&\hspace{-1em}+ {\only<4,5->{\color{ictpblue}}\#_\text{faces}} \\ &\color{White}\only<2->{\color{Black}}= {\only<2,5->{\color{Red}}2904} &&\hspace{-1em}\color{White}\only<3->{\color{Black}}- {\only<3,5->{\color{ictpgreen}}8706} &&\hspace{-1em}\color{White}\only<4->{\color{Black}}+ {\only<4,5->{\color{ictpblue}}5804} \\ &\color{White}\only<5->{\color{Black}}=2 \end{align*} \[ \color{White}\only<6->{\color{Black}}\chi(\Omega_\text{football}) ={\only<6->{\color{Red}}60}-{\only<6->{\color{ictpgreen}}90}+{\only<6->{\color{ictpblue}}32}=2 \] \color{White}\only<7>{\color{Black}}Cow is homeomorphic to a sphere \end{column} \begin{column}{0.5\textwidth} \begin{overprint} \onslide<1,5>\includegraphics[width=\textwidth]{figs/cow.png} \onslide<2>\includegraphics[width=\textwidth]{figs/cow_vert.png} \onslide<3>\includegraphics[width=\textwidth]{figs/cow_edge.png} \onslide<4>\includegraphics[width=\textwidth]{figs/cow_face.png} \onslide<6->\hspace{2em}\includegraphics{figs/Football_Pallo_valmiina-cropped.jpg} \end{overprint} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Characteristics of the Euler characteristic } \begin{columns} \begin{column}{0.5\textwidth} For closed, connected 2-dimensional manifolds, related to genus $g$ by $\chi=2-2g$ \medskip General properties: \vspace{-0.5em} \[ \chi(\Omega)=0 \text{ for odd-dimensional $\Omega$} \] \vspace{-1.6em} \[ \chi(S^D)=2\text{ for even }D \] \[ \chi(\Omega_1\sqcup\Omega_2)=\chi(\Omega_1)+\chi(\Omega_2) \] \[ \chi(\Omega_1\times\Omega_2)=\chi(\Omega_1)\times\chi(\Omega_2) \] \smallskip Examples: \vspace{-0.5em} \[\chi(M\text{ even-$D$ spheres})=2M\] \vspace{-1.6em} \[\chi(S^1\times\text{anything})=0\] \end{column} \begin{column}{0.5\textwidth} \includegraphics[width=\textwidth]{figs/genus.png} \end{column} \end{columns} \end{frame} \begin{frame} \begin{columns} \begin{column}{0.16\textwidth} \Large \textbf{\color{ictpblue}\boldmath{$\chi$} for constant energy level sets} \vspace{11em} \end{column} \begin{column}{0.7\textwidth} \begin{overprint} \onslide<1>\centering\rotatebox{90}{\includegraphics[height=\textwidth]{figs/Stillinger-0.png}} \onslide<2>\centering\rotatebox{90}{\includegraphics[height=\textwidth]{figs/Stillinger-1.png}} \onslide<3>\centering\rotatebox{90}{\includegraphics[height=\textwidth]{figs/Stillinger-2.png}} \end{overprint} \end{column} \begin{column}{0.16\textwidth} \begin{overprint} \onslide<2>\centering High energy \vspace{0.5em} $\chi(\Omega)\ll0$ \vspace{0.5em} hole\\ dominated \onslide<3>\centering Low energy \vspace{0.5em} $\chi(\Omega)\gg0$ \vspace{0.5em} component\\ dominated \end{overprint} \vspace{15em} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Computing the Euler characteristic} \begin{columns} \begin{column}{0.5\textwidth} Morse theory: gradient flow on an arbitrary ``height'' function $h$ makes a complex \begin{align*} \chi(\Omega) &= {\only<2,5>{\color{Red}}\#_\text{vertices}} - {\only<3,5>{\color{ictpgreen}}\#_\text{edges}} + {\only<4,5>{\color{ictpblue}}\#_\text{faces}} +\cdots \\ &= {\only<6>{\color{ictpblue}}\#_\text{index 0}} - {\only<6>{\color{ictpgreen}}\#_\text{index 1}} + {\only<6>{\color{Red}}\#_\text{index 2}} +\cdots \\ &=\sum_{i=0}^D(-1)^i\#_\text{index i} \end{align*} \[ \hspace{-1em}\operatorname{sgn}\big(\det\operatorname{Hess}(\pmb x)\big) = \operatorname{sgn}\left(\prod_{i=1}^D\lambda_i\right) =(-1)^{\text{index}} \] \[ \chi(\Omega) =\int_\Omega d\pmb x\,\delta\big(\nabla h(\pmb x)\big) \,\det\operatorname{Hess}h(\pmb x) \] \emph{Kac--Rice without the absolute value!} \end{column} \begin{column}{0.5\textwidth} \begin{overprint} \onslide<1>\includegraphics[width=\textwidth]{figs/other_sphere.png} \onslide<2>\includegraphics[width=\textwidth]{figs/other_sphere_vert.png} \onslide<3>\includegraphics[width=\textwidth]{figs/other_sphere_edge.png} \onslide<4>\includegraphics[width=\textwidth]{figs/other_sphere_face.png} \onslide<5>\includegraphics[width=\textwidth]{figs/other_sphere_all.png} \onslide<6>\includegraphics[width=\textwidth]{figs/other_sphere_crit.png} \end{overprint} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Computing the Euler characteristic of level sets} \begin{columns} \begin{column}{0.6\textwidth} Pick whatever height function $h:\Omega\to\mathbb R$ you like: $h(\pmb x)=\frac1N\pmb x_0\cdot\pmb x$ for arbitrary $\pmb x_0$. \[ \chi(\Omega) =\int_\Omega d\pmb x\,\delta\big(\nabla h(\pmb x)\big)\,\det\operatorname{Hess}h(\pmb x) \] Level set $\Omega$ defined by $H(\pmb x)=EN$ and $\|\pmb x\|^2=N$ \bigskip Lagrange multipliers replace differential geometry: \[ L(\pmb x,\pmb\omega)=h(\pmb x)+\omega_0(\|\pmb x\|^2-N)+\omega_1(H(\pmb x)-EN) \] \end{column} \begin{column}{0.4\textwidth} \begin{overprint} \onslide<1>\includegraphics[width=\textwidth]{figs/function-0.png} \onslide<2>\includegraphics[width=\textwidth]{figs/function-1.png} \onslide<3>\includegraphics[width=\textwidth]{figs/function-2.png} \end{overprint} \end{column} \end{columns} \[ \chi(\Omega) =\int_{\mathbb R^{N+2}} d\pmb x\,d\pmb\omega\,\delta\big(\begin{bmatrix}\frac{\partial L}{\partial\pmb x}&\frac{\partial L}{\partial\pmb\omega}\end{bmatrix}\big) \,\det\begin{bmatrix}\frac{\partial^2L}{\partial\pmb x^2}&\frac{\partial^2L}{\partial\pmb x\partial\pmb\omega}\\\frac{\partial^2L}{\partial\pmb x\partial\pmb\omega}&\frac{\partial^2L}{\partial\pmb\omega^2}\end{bmatrix} \] \end{frame} \begin{frame} \begin{columns} \begin{column}{\textwidth} \includegraphics[width=\textwidth]{figs/slice.png} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Results: \boldmath{$3+s$} mixed spherical models} \begin{columns} \begin{column}{0.5\textwidth} \begin{align*} H(\pmb x)=\lambda_s\sum_{i_1,i_2,i_3}^NJ_{i_1,i_2,i_3}x_{i_1}x_{i_2}x_{i_3} \hspace{4em} \\ +(1-\lambda_s)\sum_{i_1,\ldots,i_s}^NJ_{i_1,\ldots,i_s}x_{i_1}\cdots x_{i_s} \end{align*} \textcolor{mb}{\textbf{\boldmath{$E_\text{gs}$:}} ground state, energy of lowest minima} \smallskip \textcolor{mg}{\textbf{\boldmath{$E_\text{alg}$:}} algorithmic bound, set by OGP} \smallskip \textcolor{my}{\textbf{\boldmath{$E_\text{th}$:}} `threshold', marginal minima dominate} \smallskip \textcolor{mr}{\textbf{\boldmath{$E_\text{sh}$:}} `shattering', $\chi$ changes sign} \bigskip \tiny \fullcite{Kent-Dobias_2024_On} \end{column} \begin{column}{0.5\textwidth} \includegraphics[width=\textwidth]{figs/folena_new.pdf} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Preliminary results: other models?} \begin{columns} \begin{column}{0.5\textwidth} Example: non-Gaussian landscapes \[ H(\pmb x)=\frac12\sum_{i=1}^{\alpha N}V_i(\pmb x)^2 \] for spherical $\pmb x$ and Gaussian functions $V_i$ \medskip $E_\text{sh}$ consistent with gradient descent? More work needed... \bigskip\tiny \fullcite{Kent-Dobias_2024_Algorithm-independent} \smallskip \fullcite{Kent-Dobias_2024_On} \end{column} \begin{column}{0.5\textwidth} \vspace{-1em} \begin{overprint} \onslide<1>\includegraphics[width=\textwidth]{figs/most_squares_nonzoom.pdf} \onslide<2>\includegraphics[width=\textwidth]{figs/extrapolation.pdf} \end{overprint} \vspace{-0.4em} \includegraphics[width=\textwidth]{figs/most_squares_zoom_2.pdf} \vspace{1em} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Outlook, other applications, future directions} \begin{columns} \begin{column}{0.5\textwidth} Euler characteristic reveals structure of problems with no energy function:\\ e.g., the set of $\pmb x$ such that \[ V_i(\pmb x)=\sqrt NV_0 \qquad i=1,\ldots,\alpha N \] for independent Gaussian $V_i$ \medskip \tiny \fullcite{Kent-Dobias_2024_On} \bigskip\normalsize \textcolor{ictpgreen}{\textbf{To Do:}} Resolve GD question: better DMFT, direct reasoning for relationship to topology \medskip Extend topological arguments beyond GD \end{column} \begin{column}{0.5\textwidth} \includegraphics[width=\textwidth]{figs/spheres.png} \medskip \includegraphics[width=\textwidth]{figs/phases.png} \end{column} \end{columns} \end{frame} \end{document}