From eafb8806951c5a097114d351b541699cf75d1e8a Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Sat, 17 Jun 2023 17:15:13 +0200 Subject: Lots of polishing. --- figs/phases_34.pdf | Bin 37589 -> 37679 bytes when_annealed.bib | 14 +++++ when_annealed.tex | 153 +++++++++++++++++++++++++++-------------------------- 3 files changed, 93 insertions(+), 74 deletions(-) diff --git a/figs/phases_34.pdf b/figs/phases_34.pdf index ee6afae..ad8deb9 100644 Binary files a/figs/phases_34.pdf and b/figs/phases_34.pdf differ diff --git a/when_annealed.bib b/when_annealed.bib index 64dde6b..42c2e31 100644 --- a/when_annealed.bib +++ b/when_annealed.bib @@ -66,6 +66,20 @@ doi = {10.1103/physrevb.57.11251} } +@article{Crisanti_1992_The, + author = {Crisanti, A. and Sommers, H.-J.}, + title = {The spherical $p$-spin interaction spin glass model: the statics}, + journal = {Zeitschrift für Physik B Condensed Matter}, + publisher = {Springer Science and Business Media LLC}, + year = {1992}, + month = {10}, + number = {3}, + volume = {87}, + pages = {341--354}, + url = {https://doi.org/10.1007%2Fbf01309287}, + doi = {10.1007/bf01309287} +} + @article{Crisanti_2004_Spherical, author = {Crisanti, A. and Leuzzi, L.}, title = {Spherical $2+p$ Spin-Glass Model: An Exactly Solvable Model for Glass to Spin-Glass Transition}, diff --git a/when_annealed.tex b/when_annealed.tex index c723035..2d50dd2 100644 --- a/when_annealed.tex +++ b/when_annealed.tex @@ -36,21 +36,20 @@ A common measure of a function's complexity is the count of its stationary points. For complicated functions, this count grows exponentially with the volume and dimension of their domain. In practice, the count is averaged over - a class of such functions (the annealed average), but the large numbers - involved can result in averages biased by extremely rare samples. Typical + a class of functions (the annealed average), but the large numbers + involved can produce averages biased by extremely rare samples. Typical counts are reliably found by taking the average of the logarithm (the quenched average), which is more difficult and not often done in practice. When most stationary points are uncorrelated with each other, quenched and - anneals averages are equal. There are heuristics from equilibrium - calculations that guarantee when most of the lowest minima will be - uncorrelated. We show that these equilibrium heuristics cannot be used - to draw conclusions about other minima and saddles. We produce examples among - Gaussian-correlated functions on the hypersphere where the count of certain - saddles and minima has different quenched and annealed averages, despite - being guaranteed `safe' in the equilibrium setting. We determine conditions - for the emergence of nontrivial correlations between saddles, and discuss the - implications for the geometry of those functions and what out-of-equilibrium - settings might be affected. + anneals averages are equal. Equilibrium heuristics can guarantee when most of + the lowest minima will be uncorrelated. We show that these equilibrium + heuristics cannot be used to draw conclusions about other minima and saddles + by producing examples among Gaussian-correlated functions on the hypersphere + where the count of certain saddles and minima has different quenched and + annealed averages, despite being guaranteed `safe' in the equilibrium + setting. We determine conditions for the emergence of nontrivial correlations + between saddles, and discuss the implications for the geometry of those + functions and what out-of-equilibrium settings might be affected. \end{abstract} Random high-dimensional energies, cost functions, and interaction networks are @@ -72,10 +71,10 @@ approximation takes this average anyway, risking a systematic bias by rare and atypical samples. The annealed approximation is known to be exact for certain models and in certain circumstances, but it is used outside those circumstances without much reflection \cite{Wainrib_2013_Topological, Kent-Dobias_2021_Complex, -Gershenzon_2023_On-Site}. In a few cases researches have made instead the +Gershenzon_2023_On-Site}. In a few cases researchers have instead made the better-controlled quenched average, which averages the logarithm of the number of stationary points, and find deviations from the annealed approximation with -important implications for the system's behavior \cite{Muller_2006_Marginal, +important implications for the behavior \cite{Muller_2006_Marginal, Ros_2019_Complex, Kent-Dobias_2023_How, Ros_2023_Quenched}. Generically, the annealed approximation to the complexity is wrong when a nonvanishing fraction of pairs of stationary points have nontrivial correlations in their @@ -84,11 +83,11 @@ mutual position. A heuristic line of reasoning for the appropriateness of the annealed approximation is sometimes made when the approximation is correct for an equilibrium calculation on the same system. The argument goes like this: since -the limit of zero temperature or noise in an equilibrium calculation -concentrates the measure onto the lowest set of minima, the equilibrium free -energy in the limit to zero temperature should be governed by the same -statistics as the count of that lowest set of minima. This argument is valid, -but only for the lowest set of minima, which at least in glassy problems are +the limit of zero temperature in an equilibrium calculation +concentrates the Boltzmann measure onto the lowest set of minima, the equilibrium free +energy in the limit to zero temperature will be governed by the same +statistics as the count of that lowest set of minima. This argument is strictly +valid only for the lowest minima, which at least in glassy problems are rarely relevant to dynamical behavior. What about the \emph{rest} of the stationary points? @@ -104,7 +103,7 @@ population of saddle points is nevertheless correlated. We study the mixed spherical models, which are models of Gaussian-correlated random functions with isotropic statistics on the $(N-1)$-sphere. Each model consists of a class of functions $H:S^{N-1}\to\mathbb R$ defined by the -covariance between the function evaluated at two different points +covariance between the functions evaluated at two different points $\pmb\sigma_1,\pmb\sigma_2\in S^{N-1}$, which is a function of the scalar product (or overlap) between the two configurations: \begin{equation} \label{eq:covariance} @@ -123,10 +122,10 @@ literature and host a zoo of complex orders and phase transitions Crisanti_2011_Statistical}. There are several well-established results on the equilibrium of this model. -First, if the function $f$ is convex then it is not possible for the +First, if the function $\chi(q)=f''(q)^{-1/2}$ is convex then it is not possible for the equilibrium solution to have nontrivial correlations between states at any -temperature.\footnote{ - More specifically, convex $f$ cannot have an equilibrium order with more than +temperature \cite{Crisanti_1992_The}.\footnote{ + More specifically, convex $\chi$ cannot have an equilibrium order with more than {\oldstylenums1\textsc{rsb}} order among the configurations. In equilibrium, {\oldstylenums1\textsc{rsb}} corresponds to trivial correlations between thermodynamic states, but nontrivial correlations exist \emph{within} a state @@ -137,33 +136,35 @@ temperature.\footnote{ equilibrium trivial because it does not imply any nontrivial correlations between states. } -This is a very strong condition on the form of equilibrium order. Note that -non-convex $f$ does not imply that you will see nontrivial correlations between +This is a strong condition on the form of equilibrium order. Note that +non-convex $\chi$ does not imply that you will see nontrivial correlations between states at some temperature. In the $3+s$ models we consider here, models with -$s>8$ have non-convex $f$ and those with $s\leq8$ have convex $f$ independent +$s>8$ have non-convex $\chi$ and those with $s\leq8$ have convex $\chi$ independent of $\lambda$. Second, the characterization of the ground state has been made \cite{Crisanti_2004_Spherical, Crisanti_2006_Spherical, Crisanti_2011_Statistical, Auffinger_2022_The}. In the $3+s$ models we consider, for $s>12.430...$ nontrivial ground state configurations appear in -certain ranges of $\lambda$. These bounds on equilibrium order are shown in -Fig.~\ref{fig:phases}, along with our result in this paper for where the -complexity has nontrivial \textsc{rsb}. As evidenced in that figure, -\textsc{rsb} among saddles is possible well outside the bounds from +a range of $\lambda$. These bounds on equilibrium order are shown in +Fig.~\ref{fig:phases}, along with our result for where the complexity has +nontrivial correlations between some stationary points. As evidenced in that +figure, \textsc{rsb} among saddles is possible well outside the bounds from equilibrium. There are two important features which differentiate stationary points $\pmb\sigma^*$ in the spherical models: their \emph{energy density} $E=\frac1NH(\pmb\sigma^*)$ and their \emph{stability} $\mu=\frac1N\operatorname{\mathrm{Tr}}\operatorname{\mathrm{Hess}}H(\pmb\sigma^*)$. -The energy density should be familiar, as the `height' in the landscape. The -stability is so-called because it governs the spectrum of the stationary point. +The energy density gives the `height' in the landscape, while the +stability governs the spectrum of the stationary point. In each spherical model, the spectrum of every stationary point is a Wigner semicircle of the same width $\mu_\mathrm m=\sqrt{4f''(1)}$, but shifted by constant. The stability $\mu$ sets this constant shift. When $\mu<\mu_\mathrm m$, the spectrum still has support over zero and we have saddles with an extensive number of downward directions. When $\mu>\mu_\mathrm m$ the spectrum -has support only over positive eigenvalues, and we have stable minima. When -$\mu=\mu_\mathrm m$, the spectrum has a pseudogap, and we have marginal minima. +has support only over positive eigenvalues, and we have stable minima.\footnote{ + Saddle points with a subextensive number of downward directions also exist + via large deviations of some number of eigenvalues from the average spectrum. +} When $\mu=\mu_\mathrm m$, the spectrum has a pseudogap, and we have marginal minima. \begin{figure} \centering @@ -171,25 +172,28 @@ $\mu=\mu_\mathrm m$, the spectrum has a pseudogap, and we have marginal minima. \caption{ A phase diagram of the boundaries we discuss in this paper for the $3+s$ model with $f=\frac12\big(\lambda q^3+(1-\lambda)q^s\big)$. The blue region - shows models which have some stationary points with nontrivial - {\oldstylenums1}\textsc{rsb} structure, and is given by $G_f>0$ where $G_f$ - is found in \eqref{eq:condition}. The yellow region shows where $f$ is not - convex and therefore nontrivial \textsc{rsb} solutions are possible in - equilibrium. The green region shows where \textsc{rsb} solutions are - correct at the ground state, adapted from \cite{Auffinger_2022_The}. + shows models which have some stationary points with nontrivial correlated + (\textsc{rsb}) structure, and is given by $G_f>0$ where $G_f$ is found in + \eqref{eq:condition}. The yellow region shows where $\chi(q)=f''(q)^{-1/2}$ + is not convex and therefore nontrivial correlations between states are + possible in equilibrium. The green region shows where nontrivial + solutions are correct at the ground state, adapted from + \cite{Auffinger_2022_The}. We find that models where correlations between + equilibrium states are forbidden can nonetheless harbor + correlated stationary points. } \label{fig:phases} \end{figure} The number $\mathcal N(E,\mu)$ of stationary points with energy density $E$ and -stability $\mu$ is exponential in $N$ for these models. Their complexity -$\Sigma(E,\mu)$ is defined by the average of the logarithm of their number, or +stability $\mu$ is exponential in $N$. Their complexity +$\Sigma(E,\mu)$ is defined by the average of the logarithm of their number: $\Sigma(E,\mu)=\frac1N\overline{\log\mathcal N(E,\mu)}$. More often the annealed complexity is calculated, where the average is taken before the logarithm: $\Sigma_\mathrm a(E,\mu)=\frac1N\log\overline{\mathcal N(E,\mu)}$. The annealed complexity has been computed for these models \cite{BenArous_2019_Geometry, Folena_2020_Rethinking}, and the quenched -complexity has been computed for a couple examples which have correlations -among ground-state minima \cite{Kent-Dobias_2023_How}. +complexity has been computed for a couple examples which have nontrivial ground +states \cite{Kent-Dobias_2023_How}. In these models, trivial correlations between stationary points correspond with zero overlap: almost all stationary points are orthogonal to each other. This @@ -217,7 +221,7 @@ by extremizing an effective action, =\lim_{n\to0}\int dq_1\,dx\,\mathcal S_{\oldstylenums1\textsc{rsb}}(q_1,x\mid E,\mu)e^{nN\mathcal S_{\oldstylenums1\textsc{rsb}}(q_1,x\mid E,\mu)} =\mathop{\mathrm{extremum}}_{q_1,x}\mathcal S_{\oldstylenums1\textsc{rsb}}(q_1,x\mid E,\mu) \end{equation} -for the action $\mathcal S$ given by +for the action $\mathcal S_{\oldstylenums1\textsc{rsb}}$ given by \begin{equation} \begin{aligned} &\mathcal S_{\oldstylenums1\textsc{rsb}}(q_1,x\mid E,\mu) @@ -251,14 +255,13 @@ where $\Delta x=1-x$ and -\log\left(\left|\frac{\mu}{\mu_\text m}\right|-\sqrt{\big(\frac\mu{\mu_\text m}\big)^2-1}\right) & \mu^2>\mu_\text m^2 \end{cases} \end{equation} +The details of the derivation of these expressions can be found in \cite{Kent-Dobias_2023_How}. The extremal problem in $\hat\beta$, $r_\mathrm d$, $r_1$, $d_\mathrm d$, and $d_1$ has a unique solution and can be found explicitly, but the resulting -formula is much more complicated so we do not include it here. There can be -multiple extrema at which to evaluate $\mathcal -S_{\oldstylenums1\textsc{rsb}}$, in this case the one for which $\Sigma$ is +formula is unwieldy. The action can have multiple extrema, but the one for which the complexity is \emph{smallest} gives the correct solution. There is always a solution for -$x=1$ which is independent of $q_1$, which corresponds to the replica symmetric -case and which is equal to the annealed calculation, so $\Sigma_\mathrm +$x=1$ which is independent of $q_1$, corresponding to the replica symmetric +case, and with $\Sigma_\mathrm a(E,\mu)=\mathcal S_{\oldstylenums1\textsc{rsb}}(E,\mu\mid q_1,1)$. The crux of this paper will be to determine when this solution is not the global one. @@ -267,11 +270,10 @@ It isn't accurate to say that a solution to the saddle point equations is not a variational problem, so there is nothing to be maximized or minimized, and in general even global solutions are not even local minima of the action. However, the stability of the action can still tell us something about the -emergence of new solutions: when another solution bifurcates from an existing -one, the action will have a flat direction. Unfortunately this is a difficult -procedure to apply in general, since one must know the parameters of the new -solution, and some parameters, e.g., $q_1$, are unconstrained and can take any -value in the old solution. +emergence of new solutions: when a new solution bifurcates from an existing +one, the action will have a flat direction. Unfortunately this is difficult to +search out, since one must know the parameters of the new solution, and $q_1$ +is unconstrained and can take any value in the old solution. There is one place where we can consistently search for a bifurcating solution to the saddle point equations: along the zero complexity line $\Sigma_\mathrm @@ -279,7 +281,7 @@ a(E,\mu)=0$. Going along this line in the replica symmetric solution, the {\oldstylenums1}\textsc{rsb} complexity transitions at a critical point where $x=q_1=1$ \cite{Kent-Dobias_2023_How}. Since all the parameters in the bifurcating solution are known at this point, we can search for it by looking -for a flat direction in the way described above. In the annealed solution for +for a flat direction. In the annealed solution for points describing saddles ($\mu<\mu_\mathrm m$), this line is \begin{equation} \label{eq:extremal.line} \mu_0=-\frac1{z_f}\left(2Ef'f''+\sqrt{2f''u_f\bigg(\log\frac{f''}{f'}z_f-E^2(f''-f')\bigg)}\right) @@ -325,9 +327,9 @@ by \qquad d=\frac{w_f}{f'f''} \end{equation} -Changing variables to $y$ from $\mu$ is a convenient choice because the branch +Changing variables from $\mu$ to $y$ is convenient because the branch of \eqref{eq:extremal.line} is chosen by the sign of $y$ (the lower-energy -branch we are interested in corresponds with $y>0$) and the relationship +branch we are interested in corresponds with $y>0$). The relationship between $y$ and $E$ on the extremal line is $g=2hy^2+eE^2$, where the constants $e$, $g$, and $h$ are given by \begin{equation} @@ -345,8 +347,8 @@ $e$, $g$, and $h$ are given by \caption{ Stationary point statistics as a function of energy density $E$ and stability $\mu$ for a $3+5$ model with $\lambda=\frac12$. The dashed black - line shows the line of zero complexity, where stationary points vanish, and - enclosed inside they are found in exponential number. The red region (blown + line shows the line of zero annealed complexity, where stationary points vanish, and + enclosed inside they are found in exponential number. The solid black line (only visible in the inset) gives the line of zero {\oldstylenums1\textsc{rsb}} complexity. The red region (blown up in the inset) shows where the annealed complexity gives the wrong count and a {\oldstylenums1}\textsc{rsb} complexity in necessary. The red points show where $\det M=0$. The left point, which is only an upper bound on the @@ -368,7 +370,7 @@ energies that satisfy \end{equation} This predicts two points where a {\oldstylenums1}\textsc{rsb} solution can bifurcate from the annealed one. The remainder of the transition line can be -found by trying to solve the extremal problem for the action very close to one +found by solving the extremal problem for the action very close to one of these solutions, and then taking small steps in the parameters $E$ and $\mu$ until it terminates. In many cases considered here, the line of transitions in the complexity that begins at $E_{\oldstylenums1\textsc{rsb}}^+$, the higher @@ -427,7 +429,7 @@ $G_f$ is positive is shown in Fig.~\ref{fig:phases}. } \label{fig:energy_ranges} \end{figure} -Fig.~\ref{fig:energy_ranges} shows the range of energies at which nontrivial +Fig.~\ref{fig:energy_ranges} shows the range of energies where nontrivial correlations are found between stationary points in several $3+s$ models as $\lambda$ is varied. For models with smaller $s$, such correlations are found only among saddles, with the boundary never dipping beneath the minimum energy @@ -439,7 +441,7 @@ $s$, the range passes into minima, which is excepted as these models have nontrivial complexity of their ground states. This also seems to correspond with the decoupling of the \textsc{rsb} solutions connected to $E_{\oldstylenums1\textsc{rsb}}^+$ and $E_{\oldstylenums1\textsc{rsb}}^-$, with -the two phase boundaries not corresponding, as in Fig.~\ref{fig:order}. In +the two phase boundaries no longer corresponding, as in Fig.~\ref{fig:order}. In these cases, $E_{\oldstylenums1\textsc{rsb}}^-$ sometimes gives the lower bound, but sometimes it is given by the termination of the phase boundary extended from $E_{\oldstylenums1\textsc{rsb}}^+$. @@ -480,8 +482,7 @@ Consider a specific $H$ with +\frac{\sqrt{1-\lambda}}{s!}\sum_{i_1\cdots i_s}J^{(s)}_{i_1\cdots i_s}\sigma_{i_1}\cdots\sigma_{i_s} \end{equation} where the interaction tensors $J$ are drawn from zero-mean normal distributions -with $\overline{(J^{(p)})^2}=p!/2N^{p-1}$ and likewise for $J^{(s)}$. It is -straightforward to confirm that $H$ defined this way has the covariance +with $\overline{(J^{(p)})^2}=p!/2N^{p-1}$ and likewise for $J^{(s)}$. Functions $H$ defined this way have the covariance property \eqref{eq:covariance} with $f(q)=\frac12\big(\lambda q^p+(1-\lambda)q^s\big)$. With the $J$s drawn in this way and fixed for $p=3$ and $s=14$, we can vary $\lambda$, and according to Fig.~\ref{fig:phases} we @@ -494,10 +495,10 @@ uncorrelated low-lying states splitting apart into correlated clusters. Where existing stationary points do appear to split apart, when $\lambda$ is decreased from large values, is among saddles, not minima. -Similar reasoning can be made for other mixed models, like the $2+s$, which +A imilar analysis can be made for other mixed models, like the $2+s$, which should see complexities with other forms of \textsc{rsb}. For instance, in \cite{Kent-Dobias_2023_How} we show that the complexity transitions from -\textsc{rs} to full \textsc{rsb} when +\textsc{rs} to full \textsc{rsb} (\textsc{frsb}) along the line \begin{equation} \mu =-\frac{(f'+f''(0))u_f}{(2f-f')f'f''(0)^{1/2}} @@ -505,23 +506,23 @@ should see complexities with other forms of \textsc{rsb}. For instance, in \end{equation} which can only be realized when $f''(0)\neq0$, as in the $2+s$ models. For $s>2$, this transition line \emph{always} intersects the extremal line -\eqref{eq:extremal.line}, and so \textsc{rsb} complexity should always be found +\eqref{eq:extremal.line}, and so \textsc{rsb} complexity will always be found among some population of stationary points. However, it is likely that for much of the parameter space the so-called one-full \textsc{rsb} -({\oldstylenums1\textsc{frsb}}) is the correct solution, as it likely is for -large $s$ in the $3+s$ model at hand. Further work to find the conditions for -transitions of the complexity to these forms of order is necessary. For values +({\oldstylenums1\textsc{frsb}}), rather than \textsc{frsb}, is the correct solution, as it likely is for +large $s$ and certain $\lambda$ in the $3+s$ models studied here. Further work to find the conditions for +transitions of the complexity to {\oldstylenums1\textsc{frsb}} and {\oldstylenums2\textsc{frsb}} is necessary. For values of $s$ where there is no \textsc{rsb} of any kind in the ground state, we expect that the {\oldstylenums1\textsc{rsb}} complexity is correct. What are the implications for dynamics? We find that nontrivial correlations -tend to exist among saddle points with the maximum or minimum index possible at +tend to exist among saddle points with the largest or smallest possible index at a given energy density, which are quite atypical in the landscape. However, these strangely correlated saddle points must descend to uncorrelated minima, which raises questions about whether structure on the boundary of a basin of attraction is influential to the dynamics that descends into that basin. These saddles might act as early-time separatrices for descent trajectories. With -large open problems in even the gradient decent dynamics on these models, it +open problems in even the gradient decent dynamics on these models (itself attracted to an atypical subset of marginal minima), it remains to be seen whether such structures could be influential \cite{Folena_2020_Rethinking, Folena_2021_Gradient, Folena_2023_On}. This structure among saddles cannot be the only influence, since it seems that the $3+4$ model is `safe' @@ -532,7 +533,11 @@ spherical models has different quenched and annealed averages, as the result of nontrivial correlations between stationary points. We saw that these conditions can arise among certain populations of saddle points even when the model is guaranteed to lack such correlations between equilibrium states, and exist for -saddle points at a wide range of energies. +saddle points at a wide range of energies. This suggests that studies using +complexity calculations cannot reliably use equilibrium behavior to defend +making the annealed approximation. Our result has direct implications for the +geometry of these landscapes, and perhaps could be influential to certain +out-of-equilibrium dynamics. \paragraph{Funding information} -- cgit v1.2.3-70-g09d2