diff options
Diffstat (limited to 'ictp-saifr_colloquium.tex')
-rw-r--r-- | ictp-saifr_colloquium.tex | 273 |
1 files changed, 3 insertions, 270 deletions
diff --git a/ictp-saifr_colloquium.tex b/ictp-saifr_colloquium.tex index d07ecd3..5df9672 100644 --- a/ictp-saifr_colloquium.tex +++ b/ictp-saifr_colloquium.tex @@ -35,7 +35,8 @@ \addbibresource{ictp-saifr_colloquium.bib} \title{ - Structural barriers to random optimization + Fitting with more parameters than data points\\\normalsize + Topology of the solutions to overparameterized problems } \author{\textbf{Jaron Kent-Dobias}\\Simons--FAPESP Young Investigator} \date{19 February 2025} @@ -58,280 +59,12 @@ \end{frame} \begin{frame} - \frametitle{Overview} + \frametitle{Curve fitting: the good, the bad, and the weird} \begin{columns} - \begin{column}{\textwidth} - - \Large\color{ictpblue}{\textbf{Introduction and background of existing work}} - \large - - \medskip - - \hspace{1em}\color{ictpgreen}{\textbf{Introduction to random optimization problems}} - - \bigskip - - \hspace{1em}\color{ictpgreen}{\textbf{Worst-case versus typical-case performance}} - - \bigskip - - \hspace{1em}\color{ictpgreen}{\textbf{Structural barriers to typical-case performance of the best algorithms}} - - \bigskip - - \Large\color{ictpblue}{\textbf{Original contributions}} - \large - - \medskip - - \hspace{1em}\color{ictpgreen}{\textbf{Structural barriers to performance of common algorithms}} - - \bigskip - - \hspace{1em}\color{ictpgreen}{\textbf{Structural barriers to performance of the most common algorithm}} - - \bigskip - - \hspace{1em}\color{ictpgreen}{\textbf{Connections to other problems}} - - \end{column} - \end{columns} -\end{frame} - -\begin{frame} - \frametitle{Complexity of random landscapes} - \begin{columns} - \begin{column}{0.4\textwidth} - Complexity $\Sigma=\frac1N\overline{\log\#_\text{points}}$ describes typical number of stationary points - - \bigskip - - Complexity of marginal minima crucial for understanding dynamics in mixed $p$-spin models - - \bigskip - - Lucky accident: natural parameter - sets type of stationary point - \end{column} - \begin{column}{0.6\textwidth} - \begin{overprint} - \onslide<1>\includegraphics[width=\textwidth]{figs/folena_2020.png} - \onslide<2>\includegraphics[width=\textwidth]{figs/folena_2020_2.png} - \end{overprint} - - \smallskip - - \tiny\fullcite{Folena_2020_Rethinking} - \end{column} - \end{columns} -\end{frame} - -\begin{frame} - \frametitle{How to count: Kac--Rice} - - Number of stationary points with $\nabla H(\pmb x)=0$ given by integral - over Kac--Rice measure - \begin{align*} - \#_\text{points} - &=\int_\Omega d\pmb x\,\delta\big(\nabla H(\pmb x)\big)\,\big|\det\operatorname{Hess}H(\pmb x)\big| - \end{align*} - Note absolute value of the determinant: want to account for curvature but not add $-1$ - - \bigskip - - Can specify properties of points by inserting $\delta$-functions: - \begin{align*} - \#_\text{points}\alert<2>{(E)} - &=\int_\Omega d\pmb x\,\delta\big(\nabla H(\pmb x)\big)\,\big|\det\operatorname{Hess}H(\pmb x)\big| - \alert<2>{\,\delta\big(H(\pmb x)-NE\big)} - \end{align*} - How can \emph{marginality} be specified? -\end{frame} - -\begin{frame} - \frametitle{Hessian shifts and stationary point stability} - \begin{columns} - \begin{column}{0.5\textwidth} - In spherical spin glasses, all points have the same Hessian spectral density: semicircle with radius $\mu_\text m$, but different shifts - \[ - \mu=\frac1N\operatorname{Tr}\operatorname{Hess} H(\pmb x) - \] - - \bigskip - - Condition on marginal minima by inserting - \[ - \delta\big(\operatorname{Tr}\operatorname{Hess}H(\pmb x)-N\mu_\text{m}\big) - \] - - \medskip - - \alert<7>{In generic models, spectral density depends on stationarity, energy, etc!} - \end{column} \begin{column}{0.5\textwidth} - \begin{overlayarea}{\textwidth}{14.5em} - \only<1-2>{\includegraphics[width=\columnwidth]{figs/mu_0.75.pdf}\\\hphantom{ello}\includegraphics[width=0.8\columnwidth]{figs/land_0.75.pdf}} - \only<3>{\includegraphics[width=\columnwidth]{figs/mu_1.5.pdf}\\\hphantom{ello}\includegraphics[width=0.8\columnwidth]{figs/land_1.5.pdf}} - \only<4>{\includegraphics[width=\columnwidth]{figs/mu_2.25.pdf}\\\hphantom{ello}\includegraphics[width=0.8\columnwidth]{figs/land_2.25.pdf}} - \only<5>{\includegraphics[width=\columnwidth]{figs/mu_3.5.pdf}\\\hphantom{ello}\includegraphics[width=0.8\columnwidth]{figs/land_3.5.pdf}} - \only<6>{\includegraphics[width=\columnwidth]{figs/mu_2.pdf}\\\hphantom{ello}\includegraphics[width=0.8\columnwidth]{figs/land_2.pdf}} - \only<7>{\includegraphics[width=0.9\columnwidth]{figs/msg_marg_spectra.pdf}} - \end{overlayarea} \end{column} - \end{columns} -\end{frame} - -\begin{frame} - \frametitle{Towards generic marginal complexity} - \begin{columns} \begin{column}{0.5\textwidth} - \begin{itemize}[leftmargin=4em] - \item[\color{ictpgreen}\bf Trick \#1:] condition stationary points on \emph{value of smallest eigenvalue} - \end{itemize} - { - \small - \begin{align*} - \hspace{-3em}&\delta(\lambda_\text{min}(A)) \\ - \hspace{-3em}&=\lim_{\beta\to\infty}\int - \frac{d\pmb s\,\delta(N-\|\pmb s\|^2)e^{-\beta\pmb s^TA\pmb s}} - {\int d\pmb s'\,\delta(N-\|\pmb s'\|^2)e^{-\beta\pmb s'^TA\pmb s'}} - \delta\left(\frac{\pmb s^TA\pmb s}N\right) - \end{align*} - } - - \medskip - - \begin{itemize}[leftmargin=4em] - \item[\color{ictpgreen}\bf Trick \#2:] adjust $\mu\propto\operatorname{Tr}\operatorname{Hess}H$ until order-$N$ large deviation breaks - \end{itemize} - - \bigskip - - \tiny - \fullcite{Kent-Dobias_2024_Conditioning} - - \end{column} - \begin{column}{0.5\textwidth} - \hspace{0.9em} - \includegraphics[scale=0.8]{figs/spectrum_less.pdf} - \hspace{-1.6em} - \includegraphics[scale=0.8]{figs/spectrum_eq.pdf} - \hspace{-1.6em} - \includegraphics[scale=0.8]{figs/spectrum_more.pdf} - \\ - \includegraphics[scale=0.8]{figs/large_deviation.pdf} - - \vspace{-1em} - - \small - \begin{align*} - \hspace{-0.5em}G_0(\mu)=\frac 1N\log\Big\langle\delta\big(\lambda_\text{min}(A-\lambda\mu)\big)\Big\rangle_{A\in\text{GOE}(N)} - \end{align*} - \end{column} - \end{columns} -\end{frame} - -\begin{frame} - \frametitle{Marginal complexity: example} - \begin{columns} - \begin{column}{0.5\textwidth} - Example: non-Gaussian landscapes - \[ - H(\pmb x)=\frac12\sum_{i=1}^{\alpha N}V_i(\pmb x)^2 - \] - for spherical $\pmb x$ and Gaussian functions $V_i$ - \[ - \overline{V_i(\pmb x)V_j(\pmb x')}=\delta_{ij}f\bigg(\frac{\pmb x\cdot\pmb x'}N\bigg) - \] - - \vspace{-2em} - - \begin{overprint} - \onslide<1-2>\[ - f(q)=\tfrac12q^2+\tfrac12q^3 - \] - \onslide<3>\[ - f(q)=\kappa q+(1-\kappa)q^2 - \] - \end{overprint} - - \bigskip - - \tiny - \fullcite{Kent-Dobias_2024_Conditioning} - - \smallskip - - \fullcite{Kent-Dobias_2024_Algorithm-independent} - \end{column} - \begin{column}{0.5\textwidth} - \begin{overprint} - \onslide<1>\vspace{4em}\includegraphics[width=\textwidth]{figs/most_squares_complex.pdf} - \onslide<2>\vspace{-1.75em}\includegraphics[width=\textwidth]{figs/most_squares_complexity.pdf} - - \vspace{-1.95em} - - \hspace{-0.25em}\colorbox{white}{\includegraphics[width=\textwidth]{figs/most_squares_stability.pdf}} - \onslide<3>\vspace{-1em} - - \includegraphics[width=\textwidth]{figs/most_squares_nonzoom.pdf} - - \vspace{-0.4em} - - \includegraphics[width=\textwidth]{figs/most_squares_zoom.pdf} - - \vspace{1em} - \end{overprint} - \end{column} - \end{columns} -\end{frame} - -\begin{frame} - \frametitle{Which marginal minima attract the dynamics?} - \begin{columns} - \begin{column}{0.5\textwidth} - `Best case' performance: lowest marginal minima without the \emph{Overlap Gap Property} - - \smallskip - - \tiny - \fullcite{Gamarnik_2021-10_The} - - \normalsize - \medskip - - `Worst case' performance: ??? - - \smallskip - \tiny\fullcite{Folena_2023_On} - - \normalsize - \medskip - - \textcolor{mb}{\textbf{\boldmath{$E_\text{gs}$:}} ground state, energy of lowest minima} - - \smallskip - - \textcolor{mg}{\textbf{\boldmath{$E_\text{alg}$:}} algorithmic bound, set by OGP} - - \smallskip - - \textcolor{my}{\textbf{\boldmath{$E_\text{th}$:}} `threshold', marginal minima dominate} - - \medskip - - Gradient descent destination depends on \emph{global} property: basin of attraction size; - stationary point analysis is only \emph{local} - - \medskip - - \end{column} - \begin{column}{0.5\textwidth} - \begin{overprint} - \onslide<1>\includegraphics[width=\textwidth]{figs/folena_2023.png} - \onslide<2>\includegraphics[width=\textwidth]{figs/folena_new_2.pdf} - \end{overprint} \end{column} \end{columns} \end{frame} |