diff options
Diffstat (limited to 'ictp-saifr_colloquium.tex')
-rw-r--r-- | ictp-saifr_colloquium.tex | 644 |
1 files changed, 644 insertions, 0 deletions
diff --git a/ictp-saifr_colloquium.tex b/ictp-saifr_colloquium.tex new file mode 100644 index 0000000..359bf08 --- /dev/null +++ b/ictp-saifr_colloquium.tex @@ -0,0 +1,644 @@ +\documentclass[aspectratio=169,usenames,dvipsnames,fleqn]{beamer} + +\setbeamerfont{title}{family=\bf} +\setbeamerfont{frametitle}{family=\bf} +\setbeamerfont{normal text}{family=\rm} +\setbeamertemplate{navigation symbols}{} +\setbeamercolor{titlelike}{parent=structure,fg=cyan} + +\usepackage{enumitem} +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} +\usepackage{pifont} +\usepackage{graphicx} +\usepackage{xcolor} + +\definecolor{ictpblue}{HTML}{0471b9} +\definecolor{ictpgreen}{HTML}{0c8636} + +\definecolor{mb}{HTML}{5e81b5} +\definecolor{my}{HTML}{e19c24} +\definecolor{mg}{HTML}{8fb032} +\definecolor{mr}{HTML}{eb6235} + +\setbeamercolor{titlelike}{parent=structure,fg=ictpblue} +\setbeamercolor{itemize item}{fg=ictpblue} + +\usepackage[ + style=phys, + eprint=true, + maxnames = 100, + terseinits=true +]{biblatex} + + +\addbibresource{ictp-saifr_colloquium.bib} + +\title{ + Structural barriers to random optimization +} +\author{\textbf{Jaron Kent-Dobias}\\Simons--FAPESP Young Investigator} +\date{19 February 2025} + +\begin{document} + +\begin{frame} + \maketitle + + \vspace{-6pc} + \begin{minipage}[c]{10pc} + \centering + \includegraphics[height=6pc]{figs/ift-unesp.png} + \end{minipage} + \hfill\begin{minipage}[c]{10pc} + \centering + \includegraphics[height=6pc]{figs/logo-ictp-saifr.jpg} + \end{minipage} + \vspace{2pc} +\end{frame} + +\begin{frame} + \frametitle{Overview} + + \begin{columns} + \begin{column}{\textwidth} + \huge + + \color{ictpgreen}{\textbf{Introduction}} + + \bigskip + + \color{ictpgreen}{\textbf{Complexity \& marginal complexity}} + + \bigskip + + \color{ictpgreen}{\textbf{Level set topology}} + \end{column} + \end{columns} +\end{frame} + +\begin{frame} + \frametitle{Complexity of random landscapes} + \begin{columns} + \begin{column}{0.4\textwidth} + Complexity $\Sigma=\frac1N\overline{\log\#_\text{points}}$ describes typical number of stationary points + + \bigskip + + Complexity of marginal minima crucial for understanding dynamics in mixed $p$-spin models + + \bigskip + + Lucky accident: natural parameter + sets type of stationary point + \end{column} + \begin{column}{0.6\textwidth} + \begin{overprint} + \onslide<1>\includegraphics[width=\textwidth]{figs/folena_2020.png} + \onslide<2>\includegraphics[width=\textwidth]{figs/folena_2020_2.png} + \end{overprint} + + \smallskip + + \tiny\fullcite{Folena_2020_Rethinking} + \end{column} + \end{columns} +\end{frame} + +\begin{frame} + \frametitle{How to count: Kac--Rice} + + Number of stationary points with $\nabla H(\pmb x)=0$ given by integral + over Kac--Rice measure + \begin{align*} + \#_\text{points} + &=\int_\Omega d\pmb x\,\delta\big(\nabla H(\pmb x)\big)\,\big|\det\operatorname{Hess}H(\pmb x)\big| + \end{align*} + Note absolute value of the determinant: want to account for curvature but not add $-1$ + + \bigskip + + Can specify properties of points by inserting $\delta$-functions: + \begin{align*} + \#_\text{points}\alert<2>{(E)} + &=\int_\Omega d\pmb x\,\delta\big(\nabla H(\pmb x)\big)\,\big|\det\operatorname{Hess}H(\pmb x)\big| + \alert<2>{\,\delta\big(H(\pmb x)-NE\big)} + \end{align*} + How can \emph{marginality} be specified? +\end{frame} + +\begin{frame} + \frametitle{Hessian shifts and stationary point stability} + \begin{columns} + \begin{column}{0.5\textwidth} + In spherical spin glasses, all points have the same Hessian spectral density: semicircle with radius $\mu_\text m$, but different shifts + \[ + \mu=\frac1N\operatorname{Tr}\operatorname{Hess} H(\pmb x) + \] + + \bigskip + + Condition on marginal minima by inserting + \[ + \delta\big(\operatorname{Tr}\operatorname{Hess}H(\pmb x)-N\mu_\text{m}\big) + \] + + \medskip + + \alert<7>{In generic models, spectral density depends on stationarity, energy, etc!} + \end{column} + \begin{column}{0.5\textwidth} + \begin{overlayarea}{\textwidth}{14.5em} + \only<1-2>{\includegraphics[width=\columnwidth]{figs/mu_0.75.pdf}\\\hphantom{ello}\includegraphics[width=0.8\columnwidth]{figs/land_0.75.pdf}} + \only<3>{\includegraphics[width=\columnwidth]{figs/mu_1.5.pdf}\\\hphantom{ello}\includegraphics[width=0.8\columnwidth]{figs/land_1.5.pdf}} + \only<4>{\includegraphics[width=\columnwidth]{figs/mu_2.25.pdf}\\\hphantom{ello}\includegraphics[width=0.8\columnwidth]{figs/land_2.25.pdf}} + \only<5>{\includegraphics[width=\columnwidth]{figs/mu_3.5.pdf}\\\hphantom{ello}\includegraphics[width=0.8\columnwidth]{figs/land_3.5.pdf}} + \only<6>{\includegraphics[width=\columnwidth]{figs/mu_2.pdf}\\\hphantom{ello}\includegraphics[width=0.8\columnwidth]{figs/land_2.pdf}} + \only<7>{\includegraphics[width=0.9\columnwidth]{figs/msg_marg_spectra.pdf}} + \end{overlayarea} + \end{column} + \end{columns} +\end{frame} + +\begin{frame} + \frametitle{Towards generic marginal complexity} + \begin{columns} + \begin{column}{0.5\textwidth} + \begin{itemize}[leftmargin=4em] + \item[\color{ictpgreen}\bf Trick \#1:] condition stationary points on \emph{value of smallest eigenvalue} + \end{itemize} + { + \small + \begin{align*} + \hspace{-3em}&\delta(\lambda_\text{min}(A)) \\ + \hspace{-3em}&=\lim_{\beta\to\infty}\int + \frac{d\pmb s\,\delta(N-\|\pmb s\|^2)e^{-\beta\pmb s^TA\pmb s}} + {\int d\pmb s'\,\delta(N-\|\pmb s'\|^2)e^{-\beta\pmb s'^TA\pmb s'}} + \delta\left(\frac{\pmb s^TA\pmb s}N\right) + \end{align*} + } + + \medskip + + \begin{itemize}[leftmargin=4em] + \item[\color{ictpgreen}\bf Trick \#2:] adjust $\mu\propto\operatorname{Tr}\operatorname{Hess}H$ until order-$N$ large deviation breaks + \end{itemize} + + \bigskip + + \tiny + \fullcite{Kent-Dobias_2024_Conditioning} + + \end{column} + \begin{column}{0.5\textwidth} + \hspace{0.9em} + \includegraphics[scale=0.8]{figs/spectrum_less.pdf} + \hspace{-1.6em} + \includegraphics[scale=0.8]{figs/spectrum_eq.pdf} + \hspace{-1.6em} + \includegraphics[scale=0.8]{figs/spectrum_more.pdf} + \\ + \includegraphics[scale=0.8]{figs/large_deviation.pdf} + + \vspace{-1em} + + \small + \begin{align*} + \hspace{-0.5em}G_0(\mu)=\frac 1N\log\Big\langle\delta\big(\lambda_\text{min}(A-\lambda\mu)\big)\Big\rangle_{A\in\text{GOE}(N)} + \end{align*} + \end{column} + \end{columns} +\end{frame} + +\begin{frame} + \frametitle{Marginal complexity: example} + \begin{columns} + \begin{column}{0.5\textwidth} + Example: non-Gaussian landscapes + \[ + H(\pmb x)=\frac12\sum_{i=1}^{\alpha N}V_i(\pmb x)^2 + \] + for spherical $\pmb x$ and Gaussian functions $V_i$ + \[ + \overline{V_i(\pmb x)V_j(\pmb x')}=\delta_{ij}f\bigg(\frac{\pmb x\cdot\pmb x'}N\bigg) + \] + + \vspace{-2em} + + \begin{overprint} + \onslide<1-2>\[ + f(q)=\tfrac12q^2+\tfrac12q^3 + \] + \onslide<3>\[ + f(q)=\kappa q+(1-\kappa)q^2 + \] + \end{overprint} + + \bigskip + + \tiny + \fullcite{Kent-Dobias_2024_Conditioning} + + \smallskip + + \fullcite{Kent-Dobias_2024_Algorithm-independent} + \end{column} + \begin{column}{0.5\textwidth} + \begin{overprint} + \onslide<1>\vspace{4em}\includegraphics[width=\textwidth]{figs/most_squares_complex.pdf} + \onslide<2>\vspace{-1.75em}\includegraphics[width=\textwidth]{figs/most_squares_complexity.pdf} + + \vspace{-1.95em} + + \hspace{-0.25em}\colorbox{white}{\includegraphics[width=\textwidth]{figs/most_squares_stability.pdf}} + \onslide<3>\vspace{-1em} + + \includegraphics[width=\textwidth]{figs/most_squares_nonzoom.pdf} + + \vspace{-0.4em} + + \includegraphics[width=\textwidth]{figs/most_squares_zoom.pdf} + + \vspace{1em} + \end{overprint} + \end{column} + \end{columns} +\end{frame} + +\begin{frame} + \frametitle{Which marginal minima attract the dynamics?} + \begin{columns} + \begin{column}{0.5\textwidth} + `Best case' performance: lowest marginal minima without the \emph{Overlap Gap Property} + + \smallskip + + \tiny + \fullcite{Gamarnik_2021-10_The} + + \normalsize + \medskip + + `Worst case' performance: ??? + + \smallskip + \tiny\fullcite{Folena_2023_On} + + \normalsize + \medskip + + \textcolor{mb}{\textbf{\boldmath{$E_\text{gs}$:}} ground state, energy of lowest minima} + + \smallskip + + \textcolor{mg}{\textbf{\boldmath{$E_\text{alg}$:}} algorithmic bound, set by OGP} + + \smallskip + + \textcolor{my}{\textbf{\boldmath{$E_\text{th}$:}} `threshold', marginal minima dominate} + + \medskip + + Gradient descent destination depends on \emph{global} property: basin of attraction size; + stationary point analysis is only \emph{local} + + \medskip + + \end{column} + \begin{column}{0.5\textwidth} + \begin{overprint} + \onslide<1>\includegraphics[width=\textwidth]{figs/folena_2023.png} + \onslide<2>\includegraphics[width=\textwidth]{figs/folena_new_2.pdf} + \end{overprint} + \end{column} + \end{columns} +\end{frame} + +\begin{frame} + \frametitle{The Euler characteristic \boldmath{$\chi$}} + \begin{columns} + \begin{column}{0.5\textwidth} + The Euler characteristic $\chi(\Omega)$ is a topological invariant of a manifold $\Omega$ + + \medskip + + Defined by tiling the manifold, then taking the alternating sum + \begin{align*} + \chi(\Omega_{\text{cow}}) + &= + {\only<2,5->{\color{Red}}\#_\text{vertices}} + &&\hspace{-1em}- + {\only<3,5->{\color{ictpgreen}}\#_\text{edges}} + &&\hspace{-1em}+ + {\only<4,5->{\color{ictpblue}}\#_\text{faces}} + \\ + &\color{White}\only<2->{\color{Black}}= + {\only<2,5->{\color{Red}}2904} + &&\hspace{-1em}\color{White}\only<3->{\color{Black}}- + {\only<3,5->{\color{ictpgreen}}8706} + &&\hspace{-1em}\color{White}\only<4->{\color{Black}}+ + {\only<4,5->{\color{ictpblue}}5804} \\ + &\color{White}\only<5->{\color{Black}}=2 + \end{align*} + \[ + \color{White}\only<6->{\color{Black}}\chi(\Omega_\text{football}) + ={\only<6->{\color{Red}}60}-{\only<6->{\color{ictpgreen}}90}+{\only<6->{\color{ictpblue}}32}=2 + \] + + \color{White}\only<7>{\color{Black}}Cow is homeomorphic to a sphere + \end{column} + \begin{column}{0.5\textwidth} + \begin{overprint} + \onslide<1,5>\includegraphics[width=\textwidth]{figs/cow.png} + \onslide<2>\includegraphics[width=\textwidth]{figs/cow_vert.png} + \onslide<3>\includegraphics[width=\textwidth]{figs/cow_edge.png} + \onslide<4>\includegraphics[width=\textwidth]{figs/cow_face.png} + \onslide<6->\hspace{2em}\includegraphics{figs/Football_Pallo_valmiina-cropped.jpg} + \end{overprint} + \end{column} + \end{columns} +\end{frame} + +\begin{frame} + \frametitle{Characteristics of the Euler characteristic } + + \begin{columns} + \begin{column}{0.5\textwidth} + For closed, connected 2-dimensional manifolds, related to genus $g$ by + $\chi=2-2g$ + + \medskip + + General properties: + \vspace{-0.5em} + \[ + \chi(\Omega)=0 \text{ for odd-dimensional $\Omega$} + \] + \vspace{-1.6em} + \[ + \chi(S^D)=2\text{ for even }D + \] + \[ + \chi(\Omega_1\sqcup\Omega_2)=\chi(\Omega_1)+\chi(\Omega_2) + \] + \[ + \chi(\Omega_1\times\Omega_2)=\chi(\Omega_1)\times\chi(\Omega_2) + \] + + \smallskip + + Examples: + \vspace{-0.5em} + \[\chi(M\text{ even-$D$ spheres})=2M\] + \vspace{-1.6em} + \[\chi(S^1\times\text{anything})=0\] + \end{column} + \begin{column}{0.5\textwidth} + \includegraphics[width=\textwidth]{figs/genus.png} + \end{column} + \end{columns} +\end{frame} + +\begin{frame} + \begin{columns} + \begin{column}{0.16\textwidth} + \Large + \textbf{\color{ictpblue}\boldmath{$\chi$} for constant energy level sets} + \vspace{11em} + \end{column} + \begin{column}{0.7\textwidth} + \begin{overprint} + \onslide<1>\centering\rotatebox{90}{\includegraphics[height=\textwidth]{figs/Stillinger-0.png}} + \onslide<2>\centering\rotatebox{90}{\includegraphics[height=\textwidth]{figs/Stillinger-1.png}} + \onslide<3>\centering\rotatebox{90}{\includegraphics[height=\textwidth]{figs/Stillinger-2.png}} + \end{overprint} + \end{column} + \begin{column}{0.16\textwidth} + \begin{overprint} + \onslide<2>\centering High energy + + \vspace{0.5em} + + $\chi(\Omega)\ll0$ + + \vspace{0.5em} + + hole\\ + dominated + \onslide<3>\centering Low energy + + \vspace{0.5em} + + $\chi(\Omega)\gg0$ + + \vspace{0.5em} + + component\\ + dominated + \end{overprint} + \vspace{15em} + \end{column} + \end{columns} +\end{frame} + +\begin{frame} + \frametitle{Computing the Euler characteristic} + \begin{columns} + \begin{column}{0.5\textwidth} + Morse theory: gradient flow on an arbitrary ``height'' function $h$ makes a complex + \begin{align*} + \chi(\Omega) + &= + {\only<2,5>{\color{Red}}\#_\text{vertices}} + - + {\only<3,5>{\color{ictpgreen}}\#_\text{edges}} + + + {\only<4,5>{\color{ictpblue}}\#_\text{faces}} + +\cdots \\ + &= + {\only<6>{\color{ictpblue}}\#_\text{index 0}} + - + {\only<6>{\color{ictpgreen}}\#_\text{index 1}} + + + {\only<6>{\color{Red}}\#_\text{index 2}} + +\cdots \\ + &=\sum_{i=0}^D(-1)^i\#_\text{index i} + \end{align*} + \[ + \hspace{-1em}\operatorname{sgn}\big(\det\operatorname{Hess}(\pmb x)\big) + = + \operatorname{sgn}\left(\prod_{i=1}^D\lambda_i\right) + =(-1)^{\text{index}} + \] + \[ + \chi(\Omega) + =\int_\Omega d\pmb x\,\delta\big(\nabla h(\pmb x)\big) + \,\det\operatorname{Hess}h(\pmb x) + \] + \emph{Kac--Rice without the absolute value!} + \end{column} + \begin{column}{0.5\textwidth} + \begin{overprint} + \onslide<1>\includegraphics[width=\textwidth]{figs/other_sphere.png} + \onslide<2>\includegraphics[width=\textwidth]{figs/other_sphere_vert.png} + \onslide<3>\includegraphics[width=\textwidth]{figs/other_sphere_edge.png} + \onslide<4>\includegraphics[width=\textwidth]{figs/other_sphere_face.png} + \onslide<5>\includegraphics[width=\textwidth]{figs/other_sphere_all.png} + \onslide<6>\includegraphics[width=\textwidth]{figs/other_sphere_crit.png} + \end{overprint} + \end{column} + \end{columns} +\end{frame} + +\begin{frame} + \frametitle{Computing the Euler characteristic of level sets} + + \begin{columns} + \begin{column}{0.6\textwidth} + Pick whatever height function $h:\Omega\to\mathbb R$ you like: $h(\pmb x)=\frac1N\pmb + x_0\cdot\pmb x$ for arbitrary $\pmb x_0$. + \[ + \chi(\Omega) + =\int_\Omega d\pmb x\,\delta\big(\nabla h(\pmb x)\big)\,\det\operatorname{Hess}h(\pmb x) + \] + Level set $\Omega$ defined by $H(\pmb x)=EN$ and $\|\pmb x\|^2=N$ + + \bigskip + + Lagrange multipliers replace differential geometry: + \[ + L(\pmb x,\pmb\omega)=h(\pmb x)+\omega_0(\|\pmb x\|^2-N)+\omega_1(H(\pmb x)-EN) + \] + \end{column} + \begin{column}{0.4\textwidth} + \begin{overprint} + \onslide<1>\includegraphics[width=\textwidth]{figs/function-0.png} + \onslide<2>\includegraphics[width=\textwidth]{figs/function-1.png} + \onslide<3>\includegraphics[width=\textwidth]{figs/function-2.png} + \end{overprint} + \end{column} + \end{columns} + \[ + \chi(\Omega) + =\int_{\mathbb R^{N+2}} d\pmb x\,d\pmb\omega\,\delta\big(\begin{bmatrix}\frac{\partial L}{\partial\pmb x}&\frac{\partial L}{\partial\pmb\omega}\end{bmatrix}\big) + \,\det\begin{bmatrix}\frac{\partial^2L}{\partial\pmb x^2}&\frac{\partial^2L}{\partial\pmb x\partial\pmb\omega}\\\frac{\partial^2L}{\partial\pmb x\partial\pmb\omega}&\frac{\partial^2L}{\partial\pmb\omega^2}\end{bmatrix} + \] +\end{frame} + +\begin{frame} + \begin{columns} + \begin{column}{\textwidth} + \includegraphics[width=\textwidth]{figs/slice.png} + \end{column} + \end{columns} +\end{frame} + +\begin{frame} + \frametitle{Results: \boldmath{$3+s$} mixed spherical models} + \begin{columns} + \begin{column}{0.5\textwidth} + \begin{align*} + H(\pmb x)=\lambda_s\sum_{i_1,i_2,i_3}^NJ_{i_1,i_2,i_3}x_{i_1}x_{i_2}x_{i_3} \hspace{4em} \\ + +(1-\lambda_s)\sum_{i_1,\ldots,i_s}^NJ_{i_1,\ldots,i_s}x_{i_1}\cdots x_{i_s} + \end{align*} + + \textcolor{mb}{\textbf{\boldmath{$E_\text{gs}$:}} ground state, energy of lowest minima} + + \smallskip + + \textcolor{mg}{\textbf{\boldmath{$E_\text{alg}$:}} algorithmic bound, set by OGP} + + \smallskip + + \textcolor{my}{\textbf{\boldmath{$E_\text{th}$:}} `threshold', marginal minima dominate} + + \smallskip + + \textcolor{mr}{\textbf{\boldmath{$E_\text{sh}$:}} `shattering', $\chi$ changes sign} + + \bigskip + + \tiny + \fullcite{Kent-Dobias_2024_On} + \end{column} + \begin{column}{0.5\textwidth} + \includegraphics[width=\textwidth]{figs/folena_new.pdf} + \end{column} + \end{columns} +\end{frame} + +\begin{frame} + \frametitle{Preliminary results: other models?} + \begin{columns} + \begin{column}{0.5\textwidth} + Example: non-Gaussian landscapes + \[ + H(\pmb x)=\frac12\sum_{i=1}^{\alpha N}V_i(\pmb x)^2 + \] + for spherical $\pmb x$ and Gaussian functions $V_i$ + + \medskip + + $E_\text{sh}$ consistent with gradient descent? More work needed... + + \bigskip\tiny + + \fullcite{Kent-Dobias_2024_Algorithm-independent} + + \smallskip + + \fullcite{Kent-Dobias_2024_On} + \end{column} + \begin{column}{0.5\textwidth} + \vspace{-1em} + + \begin{overprint} + \onslide<1>\includegraphics[width=\textwidth]{figs/most_squares_nonzoom.pdf} + \onslide<2>\includegraphics[width=\textwidth]{figs/extrapolation.pdf} + \end{overprint} + + \vspace{-0.4em} + + \includegraphics[width=\textwidth]{figs/most_squares_zoom_2.pdf} + + \vspace{1em} + \end{column} + \end{columns} +\end{frame} + +\begin{frame} + \frametitle{Outlook, other applications, future directions} + \begin{columns} + \begin{column}{0.5\textwidth} + Euler characteristic reveals structure of problems with no energy function:\\ e.g., the set of $\pmb x$ such that + \[ + V_i(\pmb x)=\sqrt NV_0 \qquad i=1,\ldots,\alpha N + \] + for independent Gaussian $V_i$ + + \medskip + + \tiny + \fullcite{Kent-Dobias_2024_On} + + \bigskip\normalsize + + \textcolor{ictpgreen}{\textbf{To Do:}} + + Resolve GD question: better DMFT, direct reasoning for relationship to topology + + \medskip + + Extend topological arguments beyond GD + \end{column} + \begin{column}{0.5\textwidth} + \includegraphics[width=\textwidth]{figs/spheres.png} + + \medskip + + \includegraphics[width=\textwidth]{figs/phases.png} + \end{column} + \end{columns} +\end{frame} + +\end{document} |