Lots of writing.

author: Jaron Kent-Dobias <jaron@kent-dobias.com> 2024-09-07 19:48:36 +0200
committer: Jaron Kent-Dobias <jaron@kent-dobias.com> 2024-09-07 19:48:36 +0200
commit: 4d6c19367c5eff3eaf29376d663369730d36cd74 (patch)
tree: 5de52e430c5c351c6b850694fd3c4aecfcdf6f76
parent: fb98534843c67fa994657a9628f3f315856f3ece (diff)
download: SciPostPhys_18_158-4d6c19367c5eff3eaf29376d663369730d36cd74.tar.gz
SciPostPhys_18_158-4d6c19367c5eff3eaf29376d663369730d36cd74.tar.bz2
SciPostPhys_18_158-4d6c19367c5eff3eaf29376d663369730d36cd74.zip
2 files changed, 247 insertions, 92 deletions
diff --git a/topology.bib b/topology.bib
index 6d09b4e..81d2df8 100644
--- a/topology.bib
+++ b/topology.bib
@@ -39,6 +39,20 @@
  translator = {Erné, Reinie}
 }
 
+@unpublished{Auffinger_2022_The,
+ author = {Auffinger, Antonio and Zhou, Yuxin},
+ title = {The Spherical $p+s$ Spin Glass At Zero Temperature},
+ year = {2022},
+ month = {9},
+ url = {http://arxiv.org/abs/2209.03866v1},
+ doi = {10.48550/arXiv.2209.03866},
+ note = {arXiv preprint},
+ date = {2022-09-08T15:08:26Z},
+ eprint = {2209.03866v1},
+ eprintclass = {math.PR},
+ eprinttype = {arxiv}
+}
+
 @article{Baldassi_2016_Unreasonable,
  author = {Baldassi, Carlo and Borgs, Christian and Chayes, Jennifer T. and Ingrosso, Alessandro and Lucibello, Carlo and Saglietti, Luca and Zecchina, Riccardo},
  title = {Unreasonable effectiveness of learning neural networks: From accessible states and robust ensembles to basic algorithmic schemes},
@@ -409,6 +423,15 @@
  doi = {10.2307/2371510}
 }
 
+@phdthesis{Tublin_2022_A,
+ author = {Tublin, Rashel},
+ title = {A Few Results in Random Matrix Theory and Random Optimization},
+ year = {2022},
+ month = {September},
+ url = {https://kclpure.kcl.ac.uk/portal/en/studentTheses/a-few-results-in-random-matrix-theory-and-random-optimization},
+ school = {King's College London}
+}
+
 @article{Urbani_2023_A,
  author = {Urbani, Pierfrancesco},
  title = {A continuous constraint satisfaction problem for the rigidity transition in confluent tissues},
diff --git a/topology.tex b/topology.tex
index 6252c96..4354db3 100644
--- a/topology.tex
+++ b/topology.tex
@@ -126,7 +126,7 @@ zero-gradient solutions to overparameterized smooth neural networks and in verte
 In such problems, there is great interest in characterizing structure in the
 set of solutions, which can be influential in how algorithms behave when trying
 to solve them \cite{Baldassi_2016_Unreasonable, Baldassi_2019_Properties,
-Beneventano_2023_On}. Here, we show how \emph{topological} information about
+Beneventano_2023_On}. Here, we show how topological information about
 the set of solutions can be calculated in a simple model of satisfying random
 nonlinear equalities. This allows us to reason about the connectivity of this
 solution set. The topological properties revealed by this calculation yield
@@ -157,13 +157,13 @@ with the elements of the tensors $J^{(k,p)}$ as independently distributed
 unit normal random variables satisfies \eqref{eq:covariance}. The size of the
 series coefficients of $f$ therefore control the variances in the coefficients
 of random polynomial constraints. When $M=1$, this problem corresponds to the
-level set of a spherical spin glass with energy density $E=\sqrt{N}V_0$.
+level set of a spherical spin glass with energy density $E=V_0/\sqrt{N}$.
 
 
 This problem or small variations thereof have attracted attention recently for
 their resemblance to encryption, optimization, and vertex models of confluent
 tissues \cite{Fyodorov_2019_A, Fyodorov_2020_Counting,
-  Fyodorov_2022_Optimization, Urbani_2023_A, Kamali_2023_Dynamical,
+  Fyodorov_2022_Optimization, Tublin_2022_A, Vivo_2024_Random, Urbani_2023_A, Kamali_2023_Dynamical,
   Kamali_2023_Stochastic, Urbani_2024_Statistical, Montanari_2023_Solving,
 Montanari_2024_On, Kent-Dobias_2024_Conditioning, Kent-Dobias_2024_Algorithm-independent}. In each of these cases, the authors studied properties of
 the cost function
@@ -277,12 +277,12 @@ The result is the reduction of the average Euler characteristic to an expression
 form
 \begin{equation} \label{eq:pre-saddle.characteristic}
   \overline{\chi(\Omega)}
-  =\left(\frac N{2\pi}\right)^2\int dR\,dD\,dm\,d\hat m\,g(R,D,m,\hat m)\,e^{N\mathcal S_\Omega(R,D,m,\hat m)}
+  =\left(\frac N{2\pi}\right)^2\int dR\,dD\,dm\,d\hat m\,g(R,D,m,\hat m)\,e^{N\mathcal S_\chi(R,D,m,\hat m)}
 \end{equation}
-where $g$ is a prefactor of $o(N^0)$, and $\mathcal S_\Omega$ is an effective action defined by
+where $g$ is a prefactor of $o(N^0)$, and $\mathcal S_\chi$ is an effective action defined by
 \begin{equation} \label{eq:euler.action}
   \begin{aligned}
-    \mathcal S_\Omega(R,D,m,\hat m\mid\alpha,V_0)
+    \mathcal S_\chi(R,D,m,\hat m\mid\alpha,V_0)
     &=\hat m-\frac\alpha2\left[
       \log\left(1+\frac{f(1)D}{f'(1)R^2}\right)
       +\frac{V_0^2}{f(1)}\left(1+\frac{f'(1)R^2}{f(1)D}\right)^{-1}
@@ -311,30 +311,32 @@ new effective action of $m$ alone. This can be solved to give
 \end{equation}
 \begin{equation}
   \begin{aligned}
-    R^*(m)
-    &=\frac{-m(1-m^2)}{2[f(1)-(1-m^2)f'(1)]^2}
+    R^*
+    =\frac{-m(1-m^2)}{2[f(1)-(1-m^2)f'(1)]^2}
     \Bigg[
       \alpha V_0^2f'(1)
-      +(2-\alpha)f(1)\left(\frac{f(1)}{1-m^2}-f'(1)\right) \\
-    &\quad+\alpha\sqrt{
+      +(2-\alpha)f(1)\left(\frac{f(1)}{1-m^2}-f'(1)\right) \quad \\
+    \quad+\alpha\sqrt{
       \tfrac{4V_0^2}\alpha f(1)f'(1)\left[\tfrac{f(1)}{1-m^2}-f'(1)\right]
       +\left[\tfrac{f(1)^2}{1-m^2}-\big(V_0^2+f(1)\big)f'(1)\right]^2
     }
   \Bigg]
   \end{aligned}
 \end{equation}
+with the resulting effective action as a function of $m$ alone given by
 \begin{equation}
-  \mathcal S_\Omega(m)
+  \mathcal S_\chi(m)
   =-\frac\alpha2\bigg[
     \log\left(
-      1-\frac{f(1)}{f'(1)}\frac{1+m/R^*}{1-m^2}
+      1-\frac{f(1)}{f'(1)}\frac{1+\frac m{R^*}}{1-m^2}
     \right)
     +\frac{V_0^2}{f(1)}\left(
-      1-\frac{f'(1)}{f(1)}\frac{1-m^2}{1+m/R^*}
+      1-\frac{f'(1)}{f(1)}\frac{1-m^2}{1+\frac m{R^*}}
     \right)^{-1}
   \bigg]
   +\frac12\log\left(-\frac m{R^*}\right)
 \end{equation}
+This function is plotted as a function of $m$ in Fig.~\ref{fig:action} for a variety of $V_0$ and $f$.
 To finish evaluating the integral, this expression should be maximized with
 respect to $m$. The order parameter $m$ is both physical and interpretable, as
 it gives the overlap of the configuration $\mathbf x$ with the height axis
@@ -342,75 +344,132 @@ $\mathbf x_0$. Therefore, the value $m^*$ that maximizes this action can be
 understood as the latitude on the sphere where most of the contribution to the
 Euler characteristic is made.
 
-The action $\mathcal S_\Omega$ is extremized with respect to $m$ at $m=0$ or $m=\pm m^*=\mp R^*(m^*)$.
+\begin{figure}
+  \includegraphics{figs/action_1.pdf}
+  \hspace{-3.5em}
+  \includegraphics{figs/action_3.pdf}
+
+  \caption{
+    \textbf{Effective action for the Euler characteristic.}
+    The effective action governing the average Euler characteristic as a function of the overlap
+    $m=\frac1N\mathbf x\cdot\mathbf x_0$ with the height direction for two
+    different homogeneous polynomial functions and a variety of target values $V_0$. In both
+    plots $\alpha=\frac12$. \textbf{Left:} With linear functions there are two
+    regimes. For small $V_0$, there are maxima at $m=\pm m^*$ where the action
+    is zero, while after the satisfiability transition at $V_0=V_\text{\textsc{sat}}=1$, $m^*$
+    goes to zero and the action becomes negative. \textbf{Right:} With nonlinear
+    functions there are four regimes. For small $V_0$ the behavior is the same
+    as in the linear case, with zero action. After an onset transition at
+    $V_0=V_\text{on}\simeq1.099$ the maxima are at the edge of validity of the
+    action and the action is positive. At a shattering transition at
+    $V_0=V_\text{sh}\simeq1.394$, the maximum goes to zero and the action is positive.
+    Finally, at the satisfiability transition
+    $V_0=V_\text{\textsc{sat}}\simeq1.440$ the action becomes negative.
+  } \label{fig:action}
+\end{figure}
+
+The action $\mathcal S_\chi$ is extremized with respect to $m$ at $m=0$ or $m=\pm m^*=\mp R^*(m^*)$.
 In the latter case, $m^*$ takes the value
 \begin{equation}
   m^*=\sqrt{1-\frac{\alpha}{f'(1)}\big(V_0^2+f(1)\big)}
 \end{equation}
-and $\mathcal S_\Omega(m^*)=0$. If this solution was always well-defined, it would vanish when the argument of the square root vanishes for
+and $\mathcal S_\chi(m^*)=0$. If this solution were always well-defined, it would vanish when the argument of the square root vanishes for
 \begin{equation}
   V_0^2>V_{\text{\textsc{sat}}\ast}^2\equiv\frac{f'(1)}\alpha-f(1)
 \end{equation}
-However, when
+This corresponds precisely to the satisfiability transition found in previous work by a replica symmetric analysis of the cost function \eqref{eq:cost} \cite{Fyodorov_2019_A, Fyodorov_2020_Counting, Fyodorov_2022_Optimization, Tublin_2022_A, Vivo_2024_Random}.
+However, when the magnitude of $V_0$ is sufficiently large, with
 \begin{equation}
   V_0^2>V_\text{on}^2\equiv\frac{1-\alpha+\sqrt{1-\alpha}}\alpha f(1)
 \end{equation}
-$R^*(m^*)$ becomes complex and the solution is no longer valid. Since
+$R^*(m^*)$ becomes complex and this solution is no longer valid. Since
 \begin{equation}
   V_\text{on}^2-V_{\text{\textsc{sat}}\ast}^2
   =\frac{f'(1)-f(1)}\alpha-\frac{\sqrt{1-\alpha}}\alpha f(1)
 \end{equation}
-If $f(q)$ is linear, then $f'(1)=f(1)$ and $V_\text{on}^2>V_{\text{\textsc{sat}}\ast}^2$, so the 
+If $f(q)$ is purely linear, then $f'(1)=f(1)$ and
+$V_\text{on}^2>V_{\text{\textsc{sat}}\ast}^2$, so the naive satisfiability
+transition happens first. On the other hand, when $f(q)$ contains powers of $q$
+strictly greater than 1, then $f'(1)\geq 2f(1)$ and $V_\text{on}^2\leq
+V_{\text{\textsc{sat}}\ast}^2$, so the onset happens first. In situations with
+mixed constant, linear, and nonlinear terms in $f$, the order of the
+transitions depends on the precise form of $f$.
 
-Likewise, when
+Likewise, the solution at $m=0$ is not always valid. For $V_0$ of magnitude sufficiently small, with
 \begin{equation}
   V_0^2<V_\text{sh}^2\equiv\frac{2(1+\sqrt{1-\alpha})-\alpha}{\alpha}f(1)\left(1-\frac{f(1)}{f'(1)}\right)
 \end{equation}
-the maximum at $m=0$ becomes complex and that solution is invalid. Examples of
-$\mathcal S_\Omega$ in difference regimes are shown in Fig.~\ref{fig:action}.
+the maximum at $m=0$ becomes complex and that solution is invalid. For purely
+linear $f(q)$, $V_\text{sh}=0$ and the solution at $m=0$ is always real, though
+for $V_0^2<V_{\text{\textsc{sat}}\ast}^2$ it is a minimum rather than a
+maximum.
 These transition values of the target $V_0$ correspond with transition values in $\alpha$ of
 \begin{align}
   \alpha_\text{on}=1-\left(\frac{V_0^2}{V_0^2+f(1)}\right)^2
   &&
   \alpha_\text{sh}=4V_0^2f(1)f'(1)\frac{f'(1)-f(1)}{\big((V_0^2+f(1))f'(1)-f(1)^2\big)^2}
 \end{align}
+Finally, there is another satisfiability transition at
+$V=V_\text{\textsc{sat}}$ corresponding to the vanishing of the effective
+action at the $m=0$ solution. For generic covariance function $f$ it is not
+possible to write an explicit formula for $V_\text{\textsc{sat}}$, and we
+usually calculate it through a numeric root-finding algorithm. However, for
+linear $f(q)$ one can see that
+$V_\text{\textsc{sat}}=V_{\text{\textsc{sat}}\ast}$, and indeed this is the
+case whenever $V_{\text{\textsc{sat}}\ast}^2<V_\text{on}^2$.
 
 \begin{figure}
-  \includegraphics{figs/action_1.pdf}
-  \hspace{-3.5em}
-  \includegraphics{figs/action_3.pdf}
+  \includegraphics{figs/phases_1.pdf}
+  \hspace{-3em}
+  \includegraphics{figs/phases_2.pdf}
+  \hspace{-3em}
+  \includegraphics{figs/phases_3.pdf}
 
   \caption{
-    \textbf{Effective action for the Euler characteristic.}
-    The effective action governing the average Euler characteristic as a function of the overlap
-    $m=\frac1N\mathbf x\cdot\mathbf x_0$ with the height direction for two
-    different homogeneous polynomial functions and a variety of target values $V_0$. In both
-    plots $\alpha=\frac12$. \textbf{Left:} With linear functions there are two
-    regimes. For small $V_0$, there are maxima at $m=\pm m^*$ where the action
-    is zero, while after the satisfiability transition at $V_0=V_\text{\textsc{sat}}=1$, $m^*$
-    goes to zero and the action becomes negative. \textbf{Right:} With nonlinear
-    functions there are four regimes. For small $V_0$ the behavior is the same
-    as in the linear case, with zero action. After an onset transition at
-    $V_0=V_\text{on}\simeq1.099$ the maxima are at the edge of validity of the
-    action and the action is positive. At a shattering transition at
-    $V_0=V_\text{sh}\simeq1.394$, $m^*$ goes to zero and the action is positive.
-    Finally, at the satisfiability transition
-    $V_0=V_\text{\textsc{sat}}\simeq1.440$ the action becomes negative.
-  } \label{fig:action}
+    \textbf{Topological phase diagram.}
+    Topological phases of the model for three different homogeneous covariance
+    functions. The onset transition $V_\text{on}$, shattering transition
+    $V_\text{sh}$, and satisfiability transition $V_\text{\textsc{sat}}$
+    are indicated when they exist. In the limit of $\alpha\to0$, the behavior
+    of level sets of the spherical spin glasses are recovered: the final plot
+    shows how in the pure cubic model the ground state energy $E_\text{gs}$ and threshold energy
+    $E_\text{th}$ correspond with the limits of the satisfiability and
+    shattering transitions, respectively. Note that for mixed models with
+    inhomogeneous covariance functions, $E_\text{th}$ is not the lower limit of
+    $V_\text{sh}$.
+  } \label{fig:phases}
 \end{figure}
 
+The phase diagram implied by these transitions is shown in
+Fig.~\ref{fig:phases} for three different homogeneous $f(q)$. One interesting
+feature occurs in the limit of $\alpha$ to zero. If $V_0$ is likewise rescaled
+in the correct way, the limit of these phase boundaries approaches known
+landmark energy values in the pure spherical spin glasses. In particular, the
+limit to zero $\alpha$ of the scaled satisfiability transition
+$V_\text{\textsc{sat}}\sqrt\alpha$ approaches the ground state energy
+$E_\text{gs}$, while the limit to zero $\alpha$ of the scaled shattering
+transition $V_\text{sh}\sqrt\alpha$ approaches the threshold energy
+$E_\text{th}$. The correspondence between ground state and satisfiability is
+expected: when the energy of a level set is greater in magnitude than the
+ground state, the level set will usually be empty. The correspondence between
+the threshold and shattering energies is also sensible, since the threshold
+energy is typically understood as the point where the landscape fractures into
+pieces. However, this second correspondence is only true for the pure spherical
+models with homogeneous $f(q)$. For any other model with an inhomogeneous
+$f(q)$, $E_\text{sh}^2<E_\text{th}^2$. This may have implications for dynamics
+in such mixed models, see the discussion in Section~\ref{sec:ssg}.
 
-The many positive values the Euler characteristic can take is one problem, but
-a larger one is the many negative values it can take. A manifold with many
+Interpreting the specific topology implied by the Euler characteristic is difficult when the prediction is positive, but
+a larger problem lies with the many negative values it can take. A manifold with many
 handles will have a very negative Euler characteristic, and our calculation
 relying on the saddle point of an exponential integral would be invalid. In the
-regime of $\mathcal S_\Omega(m)$ where the action is complex-valued, is this
+regime of $\mathcal S_\chi(m)$ where the action is complex-valued, is this
 breakdown the result of a negative average characteristic, or is it the result
 of another reason? It is difficult to answer this with the previous calculation
 alone. However, in the next section we will make a complementary calculation
 that rules out the negative Euler characteristic picture. Instead, we will see
 that in the complex region, the large-deviation function in $m$ that $\mathcal
-S_\Omega(m)$ represents breaks down due to the vanishingly small probability of
+S_\chi(m)$ represents breaks down due to the vanishingly small probability of
 finding any stationary points.
 
 
@@ -433,7 +492,7 @@ around the determinant of the Hessian.
 Understanding the number of stationary points as a function of latitude $m$
 will clarify the meaning of our effective action for the average Euler
 characteristic in the range of overlaps $m$ where it takes a complex value. This is because the average number of stationary points is a
-nonnegative number. If the region of complex $\mathcal S_\Omega$ has a
+nonnegative number. If the region of complex $\mathcal S_\chi$ has a
 well-defined number of stationary points, it indicates that we are looking at a
 situation with a negative average Euler characteristic. On the other hand, if
 the average number of stationary points yields a complex value at some latitude
@@ -526,19 +585,28 @@ have characteristics of either 0 or 4.
   \includegraphics{figs/regime_4.pdf}
 
   \caption{
-    \textbf{Behavior of the action in four regimes.} The effective action $\mathcal S_\Omega$ as a function of overlap $m$ with
+    \textbf{Behavior of the action in four regimes.} The effective action $\mathcal S_\chi$ as a function of overlap $m$ with
     the height axis for our model with $f(q)=\frac12q^3$ and $\alpha=\frac12$
     at three different target values $V_0$. \textbf{Left: the connected
-    regime.} The action is maximized with $\mathcal S_\Omega(m^*)=0$, and no
+    regime.} The action is maximized with $\mathcal S_\chi(m^*)=0$, and no
     stationary points are found with overlap less than $m_\text{min}$ with a
     random point on the sphere. \textbf{Center: the onset regime.} The action
-    is maximized with $\mathcal S_\Omega(m_\text{min})>0$, and is positive up
+    is maximized with $\mathcal S_\chi(m_\text{min})>0$, and is positive up
     to $m_\text{max}$. No stationary points are found with overlap less than $m_\text{min}$. \textbf{Right: the shattered regime.} The action is
-    maximized with $\mathcal S_\Omega(0)>0$, and is positive up to
+    maximized with $\mathcal S_\chi(0)>0$, and is positive up to
     $m_\text{max}$.
   }
 \end{figure}
 
+With these caveats in mind, we can combine the information from the previous
+calculations to reason about what the topology and geometry of $\Omega$ should
+be. We know what the average Euler characteristic is, and we know that it
+corresponds to the average number of stationary points. We also know these two
+averages correspond with each other when restricted to arbitrary latitudes $m$
+associated with an arbitrary height axis $\mathbf x_0$, and we know their value
+at each latitude. From this information, we draw the following inferences about
+the form of the solution manifold in our four possible regimes.
+
 \paragraph{The connected regime: \boldmath{$V_0^2<V_\text{on}^2$}.}
 
 In our calculation above, $\overline{\chi(\Omega)}=2$ could mean a fine-tuned
@@ -557,10 +625,10 @@ manifold is exclusively made up of extensive components, because the existence
 of small components would lead to stationary points near the equator with
 respect to a randomly chosen axis $\mathbf x_0$. The solution manifold is
 homeomorphic to a topological space with large Euler characteristic like the
-product of union of many spheres. In the former case we would have one
+product or disjoint union of many spheres. In the former case we would have one
 topologically nontrivial connected component, while in the latter we would have
 many simple disconnected components; the reality could be a combination of the
-two. It the framework of this calculation, it is not possible to distinguish between
+two. In the framework of this calculation, it is not possible to distinguish between
 these scenarios. In any case, the minima and maxima of the height on the
 solution manifold are typically found at latitude $m_\text{min}$ but are found
 in exponential number up to the latitude $m_\text{max}$.
@@ -588,53 +656,54 @@ realizations of the functions $V_k$ the set $\Omega$ is empty.
   \includegraphics{figs/bar.pdf}
 
   \caption{
-    \textbf{Cartoon of the solution manifold.} The arrow shows the vector $\mathbf x_0$ defining the height
+    \textbf{Cartoon of the solution manifold.}
+    A low-dimensional sketch of the solution manifold in black, with stationary
+    points of the height function as red points.
+    The arrow shows the vector $\mathbf x_0$ defining the height
     function. For $V_0<V_\text{on}$, the manifold has a single connected
     component. Above the onset with $V_\text{on}<V_0<V_\text{sh}$, the manifold
-    has a large connected component around the equator, and many disconnected
-    pieces in a certain range of latitudes. Above the shattering transition, or
-    $V_\text{sh}<V_0<V_\text{\textsc{sat}}$, the large connected
-    component vanishes and small disconnected pieces occupy the entire equatorial
-    region. Finally, above the satisfiability transition
+    has large connected components with nontrivial topology. Above the shattering transition, or
+    $V_\text{sh}<V_0<V_\text{\textsc{sat}}$, small disconnected pieces appear. Finally, above the satisfiability transition
     $V_\text{\textsc{sat}}$ the manifold vanishes.
   } \label{fig:cartoons}
 \end{figure}
 
-\begin{figure}
-  \includegraphics{figs/phases_1.pdf}
-  \hspace{-3em}
-  \includegraphics{figs/phases_2.pdf}
-  \hspace{-3em}
-  \includegraphics{figs/phases_3.pdf}
-
-  \caption{
-    \textbf{Topological phase diagram.}
-    Topological phases of the model for three different homogeneous covariance
-    functions. The onset transition $V_\text{onset}$, shattering transition
-    $V_\text{sh}$, and satisfiability transition $V_\text{\textsc{sat}}$
-    are indicated when they exist. In the limit of $\alpha\to0$, the behavior
-    of level sets of the spherical spin glasses are recovered: the final plot
-    shows how in the pure cubic model the ground state energy $E_\text{gs}$ and threshold energy
-    $E_\text{th}$ correspond with the limits of the satisfiability and
-    shattering transitions, respectively. Note that for mixed models with
-    inhomogeneous covariance functions, $E_\text{th}$ is not the lower limit of
-    $V_\text{sh}$.
-  }
-\end{figure}
+\paragraph{}
+Fig.~\ref{fig:cartoons} shows a low-dimensional facsimile of what these
+difference regimes look like. Surprisingly, this approach sees no signal of a
+replica symmetry breaking (\textsc{rsb}) transition previously found in
+\cite{Urbani_2023_A}. The
+instability is predicted to occur when
+\begin{equation} \label{eq:vrsb}
+  V_0^2>V_\text{\textsc{rsb}}^2
+  \equiv\frac{[f(1)-f(0)]^2}{\alpha f''(0)}
+  -f(0)-\frac{f'(0)}{f''(0)}
+\end{equation}
+Some preliminary attempts to find an instability towards finite-\textsc{rsb} in
+this region in the calculation of the Euler characteristic did not turn up anything. We conjecture that the \textsc{rsb}
+instability found in \cite{Urbani_2023_A} is a trait of the cost function
+\eqref{eq:cost}, and is not inherent to the structure of the solution manifold.
+Perhaps the best evidence for this is to consider the limit of $M=1$, or
+$\alpha\to0$ with $E=V_0\sqrt\alpha$ held fixed, where this problem reduces to
+the level sets of the spherical spin glasses. The instability \eqref{eq:vrsb}
+implies for the pure spherical 2-spin model with $f(q)=\frac12q^2$ that
+$E_\textsc{rsb}=\frac12$, though nothing of note is known to occur in the level
+sets of 2-spin model at such an energy.
 
-\section{Implications for dynamic thresholds in the spherical spin glasses}
+\section{Implications for the dynamics of spherical spin glasses}
+\label{sec:ssg}
 
 
 As indicated earlier, for $M=1$ the solution manifold corresponds to the energy
 level set of a spherical spin glass with energy density $E=\sqrt NV_0$. All the
 results from the previous sections follow, and can be translated to the spin
-glasses by taking the limit $\alpha\to0$ while scaling $V_0=\sqrt\alpha E$. With a little algebra this procedure yields
-\begin{equation}
+glasses by taking the limit $\alpha\to0$ while keeping $E=V_0\alpha^{-1/2}$ fixed. With a little algebra this procedure yields
+\begin{align}
   E_\text{on}=\pm\sqrt{2f(1)}
-\end{equation}
-\begin{equation}
+  &&
   E_\text{sh}=\pm\sqrt{4f(1)\left(1-\frac{f(1)}{f'(1)}\right)}
-\end{equation}
+  \label{eq:ssg.energies}
+\end{align}
 for the energies at which level sets of the spherical spin glasses have
 disconnected pieces appear, and that at which a large connected component
 vanishes. For the pure $p$-spin spherical spin glasses with $f(q)=\frac12q^p$,
@@ -660,7 +729,8 @@ asymptotically reach an energy above the threshold energy
 belief that the threshold energy qualitatively coincides with a kind of
 shattering is the origin of the expectation that the dynamic limit should be
 the threshold energy. Given that our work shows the actual shattering energy is
-different, here we compare it with data on the asymptotic limits of dynamics to see if they coincide.
+different, here we compare it with data on the asymptotic limits of dynamics to
+see if they coincide.
 
 \begin{figure}
   \includegraphics{figs/dynamics_2.pdf}
@@ -680,11 +750,73 @@ different, here we compare it with data on the asymptotic limits of dynamics to
   } \label{fig:ssg}
 \end{figure}
 
+Measurements of the asymptotic limits of dynamics were recently taken in
+\cite{Folena_2023_On} for two different classes of models with inhomogeneous
+$f(q)$, with
+\begin{equation}
+  f(q)=\frac12\big[\lambda q^p+(1-\lambda)q^s\big]
+\end{equation}
+They studied models with this covariance for $p=2$ and $p=3$ while varying $s$.
+In both cases, the relative weight $\lambda$ varies with $s$ and was chosen to
+maximize a heuristic to increase the chances of seeing nontrivial behavior. The
+authors numerically integrated the dynamic mean field theory (\textsc{dmft})
+equations for gradient descent on these models from a random initial condition to large but finite time, then attempted to
+extrapolate the infinite-time behavior by two different methods. The black
+symbols in Fig.~\ref{fig:ssg} show the measurements taken from
+\cite{Folena_2023_On}. The difference between the two extrapolations is not
+critical here, see the original paper for details. We simply note that the
+authors of \cite{Folena_2023_On} did not associate an uncertainty with them,
+nor were they confident that they are unbiased estimates of the asymptotic value.
+
+Fig.~\ref{fig:ssg} also shows the shattering and {\oldstylenums1}\textsc{rsb}
+threshold energies as a function of $s$. The solid lines come from using
+\textsl{Mathematica}'s \texttt{Interpolation} function to create a smooth
+function $\lambda(s)$ through the values used in \cite{Folena_2023_On}. For the
+$2+s$ models for sufficiently large $s$, the ground state is described by a
+{\oldstylenums1}\textsc{frsb} order and both the threshold energy and
+shattering energy calculated using an annealed average are likely inaccurate
+\cite{Auffinger_2022_The}. In Appendix~\ref{sec:1frsb} we calculate the
+quenched ground state and shattering energies for these models consistent with
+the {\oldstylenums1}\textsc{frsb} equilibrium order. In the left panel of Fig.~\ref{fig:ssg}, the solid line shows the quenched calculation, while the dashed line shows the annealed formula \eqref{eq:ssg.energies}.
+
+Is the shattering energy consistent with the dynamic threshold for gradient
+descent from a random initial condition? The evidence in Fig.~\ref{fig:ssg} is
+compelling but inconclusive. The difference between the shattering energy and
+the extrapolated \textsc{dmft} data is about the same as the difference between
+the values predicted by the two extrapolation methods. If both extrapolation
+methods suffer from similar systematic biases, it is plausible they true value
+corresponds with the shattering energy. However, better estimates of the
+asymptotic values are needed to support or refute this conjecture. This
+motivates working to integrate the \textsc{dmft} equations to longer times, or
+else look for analytic asymptotic solutions that approach $E_\text{sh}$.
+
+\section{Conclusion}
 
-\section{Conclusions}
+We have shown how to calculate the average Euler characteristic of the solution
+manifold in a simple model of random constraint satisfaction. The results
+constrain the topology and geometry of this manifold, revealing when it is
+connected and trivial, when it is extensive but topologically nontrivial, and
+when it is shattered into disconnected pieces. These inferences are supported
+by a auxiliary calculation of the complexity of stationary points for a test
+function on the same solution manifold.
 
+This calculation has novel implications for the geometry of the energy
+landscape in the spherical spin glasses, where it reveals a previously unknown
+landmark energy $E_\text{sh}$. This shattering energy is where the topological
+calculation implies that the level set of the energy breaks into disconnected
+pieces, and differs from the threshold energy $E_\text{th}$ in mixed models.
+It's possible that $E_\text{sh}$ is the asymptotic limit of gradient descent
+from a random initial condition in such models, but the quality of the
+currently available data makes this conjecture inconclusive.
 
-\cite{Franz_2016_The, Franz_2017_Universality, Franz_2019_Critical, Annesi_2023_Star-shaped, Baldassi_2023_Typical}
+This paper has focused on equality constraints, while most existing studies of
+constraint satisfaction study inequality constraints \cite{Franz_2016_The,
+Franz_2017_Universality, Franz_2019_Critical, Annesi_2023_Star-shaped,
+Baldassi_2023_Typical}. To generalize the technique developed in this paper to
+such cases is not a trivial extension. The set of solutions to such problems
+are manifolds with boundary, and these boundaries are often not smooth. To
+study such cases with these techniques will require using extensions of the
+Morse theory for manifolds with boundary, and will be the subject of future work.
 
 \paragraph{Acknowledgements}
 The authors thank Pierfrancesco Urbani for helpful conversations on these
@@ -977,7 +1109,7 @@ and accumulate $M$ factors of $\operatorname{sign}(-Gf'(C))=\operatorname{sign}(
 integral over Lagrange multipliers and $N$ factors of $\operatorname{sign}(-G)$
 from the Hubbard--Stratonovich transformation. Since at all saddle points $G=-R$, we have
 \begin{equation}
-  \operatorname{sign}(R)^{N+M}e^{N\mathcal S_\Omega(\tilde{\mathbb Q},\mathbb Q,\mathbb M)}
+  \operatorname{sign}(R)^{N+M}e^{N\mathcal S_\chi(\tilde{\mathbb Q},\mathbb Q,\mathbb M)}
 \end{equation}
 
 \subsection{Contribution from integrating the Grassmann order parameters}
@@ -1049,16 +1181,16 @@ function $g(R,D,m,\hat m)$ of \eqref{eq:pre-saddle.characteristic} is given by
   \end{aligned}
 \end{equation}
 In the connected regime, there are two saddle points of the integrand that
-contribute to the asymptotic value of the integral, at $m=\pm m^*$ with $R=-m^*$, $D=0$, and $\hat m=0$. At this saddle point $\mathcal S_\Omega=0$. We can therefore write
+contribute to the asymptotic value of the integral, at $m=\pm m^*$ with $R=-m^*$, $D=0$, and $\hat m=0$. At this saddle point $\mathcal S_\chi=0$. We can therefore write
 \begin{equation}
   \overline{\chi(\Omega)}
   =\sum_{m=\pm m^*}
-    g(-m,0,m,0)\big[\det\partial\partial\mathcal S_\Omega(-m,0,m,0)\big]^{-\frac12}
+    g(-m,0,m,0)\big[\det\partial\partial\mathcal S_\chi(-m,0,m,0)\big]^{-\frac12}
 \end{equation}
 where here $\partial=[\frac{\partial}{\partial R},\frac{\partial}{\partial D},\frac\partial{\partial m},\frac\partial{\partial \hat m}]$ is the
 vector of derivatives with respect to the remaining order parameters. For both of the two saddle points, the determinant of the Hessian of the effective action evaluates to
 \begin{equation}
-  \det\partial\partial\mathcal S_\Omega
+  \det\partial\partial\mathcal S_\chi
   =\left[\frac1{(m^*)^4}\left(1-\frac{\alpha[V_0^2+f(1)]}{f'(1)}\right)\right]^2
 \end{equation}
 whereas
@@ -1422,7 +1554,7 @@ $\chi(1)=0$, necessary for $P$ to be a well-defined probability distribution.
 
 Now the specific form of replica symmetry breaking we expect to see is
 important. We want to study the mixed $2+s$ models in the regime where they may
-have 1-full \textsc{rsb} in equilibrium. For the Euler characteristic like the
+have 1-full \textsc{rsb} in equilibrium \cite{Auffinger_2022_The}. For the Euler characteristic like the
 complexity, this will correspond to full \textsc{rsb}, in an analogous way to
 {\oldstylenums1}\textsc{rsb} equilibria give a \textsc{rs} complexity. Such order is characterized by a piecewise smooth $\chi$ of the form
 \begin{equation}
author	Jaron Kent-Dobias <jaron@kent-dobias.com>	2024-09-07 19:48:36 +0200
committer	Jaron Kent-Dobias <jaron@kent-dobias.com>	2024-09-07 19:48:36 +0200
commit	4d6c19367c5eff3eaf29376d663369730d36cd74 (patch)
tree	5de52e430c5c351c6b850694fd3c4aecfcdf6f76
parent	fb98534843c67fa994657a9628f3f315856f3ece (diff)
download	SciPostPhys_18_158-4d6c19367c5eff3eaf29376d663369730d36cd74.tar.gz SciPostPhys_18_158-4d6c19367c5eff3eaf29376d663369730d36cd74.tar.bz2 SciPostPhys_18_158-4d6c19367c5eff3eaf29376d663369730d36cd74.zip