summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJaron Kent-Dobias <jaron@kent-dobias.com>2024-09-18 17:10:27 +0200
committerJaron Kent-Dobias <jaron@kent-dobias.com>2024-09-18 17:10:27 +0200
commit3cc263a1555917e602d97416a493ba4300885ec4 (patch)
tree9f92bea74e3cbd3d1d6b2cb255bca07c7f6c5171
parentcd9302e207922e62bd223f5dfa6ac1f1bca1db62 (diff)
downloadSciPostPhys_18_158-3cc263a1555917e602d97416a493ba4300885ec4.tar.gz
SciPostPhys_18_158-3cc263a1555917e602d97416a493ba4300885ec4.tar.bz2
SciPostPhys_18_158-3cc263a1555917e602d97416a493ba4300885ec4.zip
Lots of small changes.
-rw-r--r--topology.bib122
-rw-r--r--topology.tex494
2 files changed, 374 insertions, 242 deletions
diff --git a/topology.bib b/topology.bib
index f28564a..716c8b2 100644
--- a/topology.bib
+++ b/topology.bib
@@ -44,7 +44,7 @@
@article{Annibale_2003_The,
author = {Annibale, Alessia and Cavagna, Andrea and Giardina, Irene and Parisi, Giorgio and Trevigne, Elisa},
- title = {The role of the {Becchi}--{Rouet}--{Stora}--{Tyutin} supersymmetry in the calculation of the complexity for the {Sherrington}--{Kirkpatrick} model},
+ title = {The role of the {Becchi--Rouet--Stora--Tyutin} supersymmetry in the calculation of the complexity for the {Sherrington--Kirkpatrick} model},
journal = {Journal of Physics A: Mathematical and General},
publisher = {IOP Publishing},
year = {2003},
@@ -72,7 +72,7 @@
@book{Audin_2014_Morse,
author = {Audin, Michèle and Damian, Mihai},
- title = {Morse theory and {Floer} homology},
+ title = {{Morse} theory and {Floer} homology},
publisher = {Springer},
year = {2014},
address = {London},
@@ -86,11 +86,10 @@
title = {The Spherical $p+s$ Spin Glass At Zero Temperature},
year = {2022},
month = {9},
- url = {http://arxiv.org/abs/2209.03866v1},
- doi = {10.48550/arXiv.2209.03866},
+ url = {http://arxiv.org/abs/2209.03866},
note = {arXiv preprint},
date = {2022-09-08T15:08:26Z},
- eprint = {2209.03866v1},
+ eprint = {2209.03866},
eprintclass = {math.PR},
eprinttype = {arxiv}
}
@@ -139,18 +138,47 @@
issn = {2470-0053}
}
+@article{BenArous_2019_Geometry,
+ author = {Ben Arous, Gérard and Subag, Eliran and Zeitouni, Ofer},
+ title = {Geometry and Temperature Chaos in Mixed Spherical Spin Glasses at Low Temperature: The Perturbative Regime},
+ journal = {Communications on Pure and Applied Mathematics},
+ publisher = {Wiley},
+ year = {2019},
+ month = {11},
+ number = {8},
+ volume = {73},
+ pages = {1732--1828},
+ url = {https://doi.org/10.1002%2Fcpa.21875},
+ doi = {10.1002/cpa.21875}
+}
+
@unpublished{Beneventano_2023_On,
author = {Beneventano, Pierfrancesco},
title = {On the Trajectories of {SGD} Without Replacement},
year = {2023},
month = {dec},
- url = {http://arxiv.org/abs/2312.16143v2},
+ url = {http://arxiv.org/abs/2312.16143},
+ note = {arXiv preprint},
archiveprefix = {arXiv},
- eprint = {2312.16143v2},
+ eprint = {2312.16143},
eprintclass = {cs.LG},
eprinttype = {arxiv}
}
+@article{Bray_1980_Metastable,
+ author = {Bray, A J and Moore, M A},
+ title = {Metastable states in spin glasses},
+ journal = {Journal of Physics C: Solid State Physics},
+ publisher = {IOP Publishing},
+ year = {1980},
+ month = {7},
+ number = {19},
+ volume = {13},
+ pages = {L469--L476},
+ url = {https://doi.org/10.1088%2F0022-3719%2F13%2F19%2F002},
+ doi = {10.1088/0022-3719/13/19/002}
+}
+
@article{Castellani_2005_Spin-glass,
author = {Castellani, Tommaso and Cavagna, Andrea},
title = {Spin-glass theory for pedestrians},
@@ -179,6 +207,20 @@
doi = {10.1007/bf01309287}
}
+@article{Crisanti_1995_Thouless-Anderson-Palmer,
+ author = {Crisanti, A. and Sommers, H.-J.},
+ title = {{Thouless-Anderson-Palmer} Approach to the Spherical $p$-Spin Spin Glass Model},
+ journal = {Journal de Physique I},
+ publisher = {EDP Sciences},
+ year = {1995},
+ month = {7},
+ number = {7},
+ volume = {5},
+ pages = {805--813},
+ url = {https://doi.org/10.1051/jp1:1995164},
+ doi = {10.1051/jp1:1995164}
+}
+
@article{Crisanti_2004_Spherical,
author = {Crisanti, A. and Leuzzi, L.},
title = {Spherical $2+p$ Spin-Glass Model: An Exactly Solvable Model for Glass to Spin-Glass Transition},
@@ -221,7 +263,7 @@
@article{Erba_2024_Quenches,
author = {Erba, Vittorio and Behrens, Freya and Krzakala, Florent and Zdeborová, Lenka},
- title = {Quenches in the {Sherrington}–{Kirkpatrick} model},
+ title = {Quenches in the {Sherrington–Kirkpatrick} model},
journal = {Journal of Statistical Mechanics: Theory and Experiment},
publisher = {IOP Publishing},
year = {2024},
@@ -384,8 +426,8 @@
title = {Algebraic topology},
publisher = {Cambridge University Press},
year = {2002},
- address = {Cambridge ; New York},
- isbn = {9780521791601 9780521795401},
+ address = {Cambridge},
+ isbn = {9780521791601},
keyword = {Algebraic topology}
}
@@ -422,10 +464,10 @@
title = {Stochastic Gradient Descent outperforms Gradient Descent in recovering a high-dimensional signal in a glassy energy landscape},
year = {2023},
month = {sep},
- url = {http://arxiv.org/abs/2309.04788v2},
- note = {},
+ url = {http://arxiv.org/abs/2309.04788},
+ note = {arXiv preprint},
archiveprefix = {arXiv},
- eprint = {2309.04788v2},
+ eprint = {2309.04788},
eprintclass = {cs.LG},
eprinttype = {arxiv}
}
@@ -463,16 +505,33 @@
title = {Algorithm-independent bounds on complex optimization through the statistics of marginal optima},
year = {2024},
url = {https://arxiv.org/abs/2407.02092},
+ note = {arXiv preprint},
archiveprefix = {arXiv},
eprint = {2407.02092},
primaryclass = {cond-mat.dis-nn}
}
+@article{Kent-Dobias_2024_Arrangement,
+ author = {Kent-Dobias, Jaron},
+ title = {Arrangement of nearby minima and saddles in the mixed spherical energy landscapes},
+ journal = {SciPost Physics},
+ publisher = {Stichting SciPost},
+ year = {2024},
+ month = {1},
+ number = {1},
+ volume = {16},
+ pages = {001},
+ url = {http://dx.doi.org/10.21468/SciPostPhys.16.1.001},
+ doi = {10.21468/scipostphys.16.1.001},
+ issn = {2542-4653}
+}
+
@unpublished{Kent-Dobias_2024_Conditioning,
author = {Kent-Dobias, Jaron},
title = {Conditioning the complexity of random landscapes on marginal optima},
year = {2024},
url = {https://arxiv.org/abs/2407.02082},
+ note = {arXiv preprint},
archiveprefix = {arXiv},
eprint = {2407.02082},
primaryclass = {cond-mat.dis-nn}
@@ -498,10 +557,10 @@
title = {Solving overparametrized systems of random equations: I. Model and algorithms for approximate solutions},
year = {2023},
month = {jun},
- url = {http://arxiv.org/abs/2306.13326v1},
- note = {},
+ url = {http://arxiv.org/abs/2306.13326},
+ note = {arXiv preprint},
archiveprefix = {arXiv},
- eprint = {2306.13326v1},
+ eprint = {2306.13326},
eprintclass = {math.PR},
eprinttype = {arxiv}
}
@@ -511,14 +570,28 @@
title = {On {Smale}'s 17th problem over the reals},
year = {2024},
month = {may},
- url = {http://arxiv.org/abs/2405.01735v1},
- note = {},
+ url = {http://arxiv.org/abs/2405.01735},
+ note = {arXiv preprint},
archiveprefix = {arXiv},
- eprint = {2405.01735v1},
+ eprint = {2405.01735},
eprintclass = {cs.DS},
eprinttype = {arxiv}
}
+@article{Muller_2006_Marginal,
+ author = {Müller, Markus and Leuzzi, Luca and Crisanti, Andrea},
+ title = {Marginal states in mean-field glasses},
+ journal = {Physical Review B},
+ publisher = {American Physical Society (APS)},
+ year = {2006},
+ month = {10},
+ number = {13},
+ volume = {74},
+ pages = {134431},
+ url = {https://doi.org/10.1103%2Fphysrevb.74.134431},
+ doi = {10.1103/physrevb.74.134431}
+}
+
@article{Rice_1939_The,
author = {Rice, S. O.},
title = {The Distribution of the Maxima of a Random Curve},
@@ -577,10 +650,10 @@
title = {Statistical physics of complex systems: glasses, spin glasses, continuous constraint satisfaction problems, high-dimensional inference and neural networks},
year = {2024},
month = {may},
- url = {http://arxiv.org/abs/2405.06384v1},
- note = {},
+ url = {http://arxiv.org/abs/2405.06384},
+ note = {arXiv preprint},
archiveprefix = {arXiv},
- eprint = {2405.06384v1},
+ eprint = {2405.06384},
eprintclass = {cond-mat.dis-nn},
eprinttype = {arxiv}
}
@@ -590,9 +663,10 @@
title = {Random Linear Systems with Quadratic Constraints: from Random Matrix Theory to replicas and back},
year = {2024},
month = {jan},
- url = {http://arxiv.org/abs/2401.03209v2},
+ url = {http://arxiv.org/abs/2401.03209},
+ note = {arXiv preprint},
archiveprefix = {arXiv},
- eprint = {2401.03209v2},
+ eprint = {2401.03209},
eprintclass = {cond-mat.stat-mech},
eprinttype = {arxiv}
}
diff --git a/topology.tex b/topology.tex
index 3b6afc1..2ed25aa 100644
--- a/topology.tex
+++ b/topology.tex
@@ -67,8 +67,7 @@ the solution manifold. When $M=1$ there is a correspondence between this
problem and level sets of the energy in the spherical spin glasses. We
conjecture that the transition energy dividing two of the topological phases
corresponds to the asymptotic limit of gradient descent from a random initial
-condition, possibly resolving a recent open problem in out-of-equilibrium
-dynamics.
+condition, possibly resolving an open problem in out-of-equilibrium dynamics.
}
}
@@ -124,11 +123,11 @@ forces between physical objects. Equality constraints naturally appear in the
zero-gradient solutions to overparameterized smooth neural networks and in vertex models of tissues.
In such problems, there is great interest in characterizing structure in the
-set of solutions, which can be influential in how algorithms behave when trying
-to solve them \cite{Baldassi_2016_Unreasonable, Baldassi_2019_Properties,
+set of solutions, which can influence the behavior of algorithms trying
+to find them \cite{Baldassi_2016_Unreasonable, Baldassi_2019_Properties,
Beneventano_2023_On}. Here, we show how topological information about
-the set of solutions can be calculated in a simple model of satisfying random
-nonlinear equalities. This allows us to reason about the connectivity of this
+the set of solutions can be calculated in a simple problem of satisfying random
+nonlinear equalities. This allows us to reason about the connectivity of the
solution set. The topological properties revealed by this calculation yield
surprising results for the well-studied spherical spin glasses, where a
topological transition thought to occur at a threshold energy $E_\text{th}$
@@ -147,34 +146,34 @@ Gaussian random functions with covariance
\end{equation}
for some choice of function $f$. When the covariance function $f$ is polynomial, the
$V_k$ are also polynomial, with a term of degree $p$ in $f$ corresponding to
-all possible terms of degree $p$ in $V_k$. In particular, one can explicitly construct functions that satisfy \eqref{eq:covariance} by taking
+all possible terms of degree $p$ in $V_k$. One can explicitly construct functions that satisfy \eqref{eq:covariance} by taking
\begin{equation}
V_k(\mathbf x)
=\sum_{p=0}^\infty\frac1{p!}\sqrt{\frac{f^{(p)}(0)}{N^p}}
\sum_{i_1\cdots i_p}^NJ^{(k,p)}_{i_1\cdots i_p}x_{i_1}\cdots x_{i_p}
\end{equation}
with the elements of the tensors $J^{(k,p)}$ as independently distributed
-unit normal random variables. The size of the
-series coefficients of $f$ therefore control the variances in the coefficients
-of the random polynomial constraints. When $M=1$, this problem corresponds to the
-level set of a spherical spin glass with energy density $E=V_0/\sqrt{N}$.
-
+unit normal random variables. The
+series coefficients of $f$ therefore control the variances of the random coefficients
+in the random polynomials $V_k$. When $M=1$, this problem corresponds to
+finding the level set of a spherical spin glass at energy density
+$E=V_0/\sqrt{N}$.
This problem or small variations thereof have attracted attention recently for
-their resemblance to encryption, least-squares optimization, and vertex models of confluent
-tissues \cite{Fyodorov_2019_A, Fyodorov_2020_Counting,
- Fyodorov_2022_Optimization, Tublin_2022_A, Vivo_2024_Random, Urbani_2023_A, Kamali_2023_Dynamical,
- Kamali_2023_Stochastic, Urbani_2024_Statistical, Montanari_2023_Solving,
-Montanari_2024_On, Kent-Dobias_2024_Conditioning, Kent-Dobias_2024_Algorithm-independent}. In each of these cases, the authors studied properties of
-the cost function
+their resemblance to encryption, least-squares optimization, and vertex models
+of confluent tissues \cite{Fyodorov_2019_A, Fyodorov_2020_Counting,
+ Fyodorov_2022_Optimization, Tublin_2022_A, Vivo_2024_Random, Urbani_2023_A,
+ Kamali_2023_Dynamical, Kamali_2023_Stochastic, Urbani_2024_Statistical,
+Montanari_2023_Solving, Montanari_2024_On, Kent-Dobias_2024_Conditioning,
+Kent-Dobias_2024_Algorithm-independent}. In each of these cases, the authors
+studied properties of the cost function
\begin{equation} \label{eq:cost}
\mathscr C(\mathbf x)=\frac12\sum_{k=1}^M\big[V_k(\mathbf x)-V_0\big]^2
\end{equation}
which achieves zero only for configurations that satisfy all the constraints.
+From the perspective of the cost function, the set of solutions looks like a network of flat canyons at the bottom of the landscape.
Here we dispense with the cost function and study the set of solutions
-directly.
-This set
-can be written as
+directly. This set can be written as
\begin{equation}
\Omega=\big\{\mathbf x\in\mathbb R^N\mid \|\mathbf x\|^2=N,V_k(\mathbf x)=V_0
\;\forall\;k=1,\ldots,M\big\}
@@ -188,30 +187,50 @@ Because the constraints are all smooth functions, $\Omega$ is almost always a ma
intersect, nor are there self-intersections, without extraordinary fine-tuning.}
We study the topology of the manifold $\Omega$ by computing its
average Euler characteristic, a topological invariant whose value puts
-constraints on the structure of the manifold. The topological phases determined
-by this means are distinguished by the size and sign of the Euler
-characteristic, and the distribution in space of its constituent parts.
+constraints on the manifold's structure. The topological phases determined
+by this measurement are distinguished by the size and sign of the Euler
+characteristic, and the distribution in space of its constituents.
+
+In Section~\ref{sec:euler.1} we describe how to calculate the average Euler
+characteristic, how to interpret the results of that calculation, and what
+topological phases are implied. In Section~\ref{sec:ssg} we examine some
+implications of these results for dynamic thresholds in the spherical spin glasses.
+Finally, in Section~\ref{sec:conclusion} we make some concluding remarks. Many
+of the details of the calculations in the middle sections are found in
+Appendices~A--D.
\section{The average Euler characteristic}
+\label{sec:euler.1}
\subsection{Definition and derivation}
The Euler characteristic $\chi$ of a manifold is a topological invariant \cite{Hatcher_2002_Algebraic}. It is
perhaps most familiar in the context of connected compact orientable surfaces, where it
characterizes the number of handles in the surface: $\chi=2(1-\#)$ for $\#$
-handles. For general $d$, the Euler characteristic of the $d$-sphere is $2$ if $d$ is even and 0 if $d$ is odd. The canonical method for computing the Euler characteristic is done by
-defining a complex on the manifold in question, essentially a
-higher-dimensional generalization of a polygonal tiling. Then $\chi$ is given
-by an alternating sum over the number of cells of increasing dimension, which
-for 2-manifolds corresponds to the number of vertices, minus the number of
-edges, plus the number of faces.
-
-Morse theory offers another way to compute the Euler characteristic of a manifold $\Omega$ using the
-statistics of stationary points in a function $H:\Omega\to\mathbb R$ \cite{Audin_2014_Morse}. For
-functions $H$ without any symmetries with respect to the manifold, the surfaces
-of gradient flow between adjacent stationary points form a complex. The
-alternating sum over cells to compute $\chi$ becomes an alternating sum over
-the count of stationary points of $H$ with increasing index, or
+handles. In higher dimensions it is more difficult to interpret, but there are
+a few basic intuitions. The Euler characteristic of the hypersphere is $2$ in even dimensions and 0 in odd dimensions. In fact, the Euler
+characteristic of an odd-dimensional manifold is always zero. The Euler
+characteristic of the union of two disjoint manifolds is the sum of the Euler
+characteristics of the individual manifolds, and that of the product of two
+manifolds is the product of the Euler characteristics. This means that a
+manifold made of many disconnected sphere-like components will have a large
+positive Euler characteristic. A manifold with many hyper-handles will have a
+large negative Euler characteristic. And no matter the Euler characteristic of
+a manifold, the Euler characteristic of its product with the circle $S^1$ is
+zero.
+
+The canonical method for computing the Euler characteristic is to construct a
+complex on the manifold in question, essentially a higher-dimensional
+generalization of a polygonal tiling. Then $\chi$ is given by an alternating
+sum over the number of cells of increasing dimension, which for 2-manifolds
+corresponds to the number of vertices, minus the number of edges, plus the
+number of faces. Morse theory offers another way to compute the Euler
+characteristic of a manifold $\Omega$ using the statistics of stationary points
+in a function $H:\Omega\to\mathbb R$ \cite{Audin_2014_Morse}. For functions $H$
+without any symmetries with respect to the manifold, the surfaces of gradient
+flow between adjacent stationary points form a complex. The alternating sum
+over cells becomes an alternating sum over the count of stationary points of
+$H$ with increasing index, or
\begin{equation}
\chi(\Omega)=\sum_{i=0}^N(-1)^i\mathcal N_H(\text{index}=i)
\end{equation}
@@ -223,27 +242,27 @@ the sign of the determinant, we arrive at the Euler characteristic, or
\begin{equation} \label{eq:kac-rice}
\chi(\Omega)=\int_\Omega d\mathbf x\,\delta\big(\nabla H(\mathbf x)\big)\det\operatorname{Hess}H(\mathbf x)
\end{equation}
-When the Kac--Rice formula is used to \emph{count} stationary points, one must take pains to preserve the sign of the determinant
-\cite{Fyodorov_2004_Complexity}. Here we are correct to exclude it.
+When the Kac--Rice formula is used to calculate the total number stationary
+points one must take pains to eliminate the sign of the determinant
+\cite{Fyodorov_2004_Complexity}. Here we are correct to include it.
We need to choose a function $H$ for our calculation. Because $\chi$ is
a topological invariant, any choice will work so long as it does not share some
symmetry with the underlying manifold, i.e., that $H$ satisfies the Smale condition. Because our manifold of random
constraints has no symmetries, we can take a simple height function $H(\mathbf
x)=\mathbf x_0\cdot\mathbf x$ for some $\mathbf x_0\in\mathbb R^N$ with
-$\|\mathbf x_0\|^2=N$. $H$ is a height function because when $\mathbf x_0$ is
-used as the polar axis, $H$ gives the height on the sphere relative to the equator.
+$\|\mathbf x_0\|^2=N$. We call $H$ a height function because when $\mathbf x_0$ is
+interpreted as the polar axis of a spherical coordinate system, $H$ gives the height on the sphere relative to the equator.
We treat the integral over the implicitly defined manifold $\Omega$ using the
method of Lagrange multipliers. We introduce one multiplier $\omega_0$ to
-enforce the spherical constraint and $M$ multipliers $\omega_k$ to enforce the vanishing of
-each of the $V_k$, resulting in the Lagrangian
+enforce the spherical constraint and $M$ multipliers $\omega_k$ for $k=1,\ldots,M$ to enforce the $M$ constraints, resulting in the Lagrangian
\begin{equation} \label{eq:lagrangian}
L(\mathbf x,\pmb\omega)
=H(\mathbf x)+\frac12\omega_0\big(\|\mathbf x\|^2-N\big)
+\sum_{k=1}^M\omega_k\big(V_k(\mathbf x)-V_0\big)
\end{equation}
-The integral over $\Omega$ in \eqref{eq:kac-rice} then becomes
+The integral over the solution manifold $\Omega$ in \eqref{eq:kac-rice} becomes
\begin{equation} \label{eq:kac-rice.lagrange}
\chi(\Omega)=\int_{\mathbb R^N} d\mathbf x\int_{\mathbb R^{M+1}}d\pmb\omega
\,\delta\big(\partial L(\mathbf x,\pmb\omega)\big)
@@ -251,15 +270,15 @@ The integral over $\Omega$ in \eqref{eq:kac-rice} then becomes
\end{equation}
where $\partial=[\frac\partial{\partial\mathbf x},\frac\partial{\partial\pmb\omega}]$
is the vector of partial derivatives with respect to all $N+M+1$ variables.
-This integral is now in a form where standard techniques from mean-field theory
-can be applied to calculate it.
+This integral is now in a form where standard techniques from the mean-field
+theory of disordered systems can be applied to calculate it.
-To evaluate the average of $\chi$ over the constraints, we first translate the $\delta$-functions and determinant to integral form, with
+To evaluate the average of $\chi$ over the random constraints, we first translate the $\delta$-functions and determinant to integral form, with
\begin{align}
\label{eq:delta.exp}
\delta\big(\partial L(\mathbf x,\pmb\omega)\big)
&=\int\frac{d\hat{\mathbf x}}{(2\pi)^N}\frac{d\hat{\pmb\omega}}{(2\pi)^{M+1}}
- e^{i[\hat{\mathbf x},\hat{\pmb\omega}]\cdot\partial L(\mathbf x,\pmb\omega)}
+ \,e^{i[\hat{\mathbf x},\hat{\pmb\omega}]\cdot\partial L(\mathbf x,\pmb\omega)}
\\
\label{eq:det.exp}
\det\partial\partial L(\mathbf x,\pmb\omega)
@@ -282,7 +301,7 @@ form
where $g$ is a prefactor of $o(N^0)$, and $\mathcal S_\chi$ is an effective action defined by
\begin{equation} \label{eq:euler.action}
\begin{aligned}
- \mathcal S_\chi(R,D,m,\hat m\mid\alpha,V_0)
+ \mathcal S_\chi(R,D,m,\hat m)
&=-\hat m-\frac\alpha2\left[
\log\left(1+\frac{f(1)D}{f'(1)R^2}\right)
+\frac{V_0^2}{f(1)}\left(1+\frac{f'(1)R^2}{f(1)D}\right)^{-1}
@@ -292,6 +311,7 @@ where $g$ is a prefactor of $o(N^0)$, and $\mathcal S_\chi$ is an effective acti
\right)
\end{aligned}
\end{equation}
+and where we have introduced the ratio $\alpha=M/N$.
The remaining order parameters are defined by the scalar products
\begin{align}
R=-i\frac1N\mathbf x\cdot\hat{\mathbf x}
@@ -305,16 +325,18 @@ The remaining order parameters are defined by the scalar products
\subsection{Features of the effective action}
-This integral can be evaluated to leading order by a saddle point method. For reasons we will
-see, it is best to extremize with respect to $R$, $D$, and $\hat m$, leaving a
-new effective action of $m$ alone. This can be solved to give
+This integral can be evaluated to leading order in $N$ by a saddle point approximation. First
+we extremize with respect to $R$, $D$, and $\hat m$, which take the saddle-point
+values
\begin{equation}
- D=-\frac{m+R_*}{1-m^2}R_* \qquad \hat m=0
+ D=-\frac{m+R_*}{1-m^2}R_*
+ \hspace{8em}
+ \hat m=0
\end{equation}
\begin{equation} \label{eq:rs}
\begin{aligned}
- R_*
- =\frac{-m(1-m^2)}{2[f(1)-(1-m^2)f'(1)]^2}
+ R=R_*
+ \equiv\frac{-m(1-m^2)}{2[f(1)-(1-m^2)f'(1)]^2}
\Bigg[
\alpha V_0^2f'(1)
+(2-\alpha)f(1)\left(\frac{f(1)}{1-m^2}-f'(1)\right) \quad \\
@@ -325,7 +347,7 @@ new effective action of $m$ alone. This can be solved to give
\Bigg]
\end{aligned}
\end{equation}
-with the resulting effective action as a function of $m$ alone given by
+This results in an effective action as a function of $m$ alone given by
\begin{equation} \label{eq:S.m}
\mathcal S_\chi(m)
=-\frac\alpha2\bigg[
@@ -338,59 +360,73 @@ with the resulting effective action as a function of $m$ alone given by
\bigg]
+\frac12\log\left(-\frac m{R_*}\right)
\end{equation}
-This function is plotted as a function of $m$ in Fig.~\ref{fig:action} for a variety of $V_0$ and $f$.
-To finish evaluating the integral, this expression should be maximized with
-respect to $m$. If $m_*$ is such a maximum, then $\overline{\chi(\Omega)}\propto e^{N\mathcal S_\chi(m_*)}$. The order parameter $m$ is the overlap of the configuration $\mathbf x$ with the height axis
-$\mathbf x_0$. Therefore, the value $m$ that maximizes this action can be
-understood as the latitude on the sphere where most of the contribution to the
-Euler characteristic is made.
-
-\begin{figure}
+This function is plotted as a function of $m$ in Fig.~\ref{fig:action} for a
+selection of sample parameters. To finish evaluating the integral, this
+expression should be maximized with respect to $m$. If $m_*$ is such a maximum,
+then the resulting Euler characteristic is $\overline{\chi(\Omega)}\propto
+e^{N\mathcal S_\chi(m_*)}$. The order parameter $m$ is the overlap of the
+configuration $\mathbf x$ with the height axis $\mathbf x_0$. Therefore, the
+value $m$ that maximizes this action can be understood as the latitude on the
+sphere where most of the contribution to the Euler characteristic is made.
+
+\begin{figure}[tbh]
\includegraphics{figs/action_1.pdf}
\hspace{-3.5em}
\includegraphics{figs/action_3.pdf}
\caption{
\textbf{Effective action for the Euler characteristic.}
- The effective action \eqref{eq:S.m} governing the average Euler characteristic as a function of the overlap
- $m=\frac1N\mathbf x\cdot\mathbf x_0$ with the height axis for two
- different homogeneous polynomial constraints and a variety of target values $V_0$. Dashed lines depict $\operatorname{Re}\mathcal S_\chi$ when its imaginary part is nonzero. In both
- plots $\alpha=\frac12$. \textbf{Left:} With linear functions there are two
- regimes. For small $V_0$, there are maxima at $m=\pm m^*$ where the action
- is zero, while after the satisfiability transition at $V_0=V_{\text{\textsc{sat}}\ast}=1$, $m_*$
- goes to zero and the action becomes negative everywhere. \textbf{Right:} With nonlinear
- functions there are other possible regimes. For small $V_0$, there are maxima at $m=\pm m^*$ where the action is zero but the real part of the action is maximized at $m=0$ where the action is complex. For larger $V_0\geq V_\text{on}\simeq1.099$ the maxima at $m=\pm m^*$ disappear. For $V_0\geq V_\text{sh}\simeq1.394$ larger still, the action becomes real everywhere.
- Finally, at a satisfiability transition
- $V_0=V_\text{\textsc{sat}}\simeq1.440$ the action becomes negative everywhere.
+ The action \eqref{eq:S.m} as a function of $m=\frac1N\mathbf x\cdot\mathbf
+ x_0$ for pure polynomial constraints and a selection of target values
+ $V_0$. Dashed lines depict $\operatorname{Re}\mathcal S_\chi$ when its
+ imaginary part is nonzero. In both plots $\alpha=\frac12$. \textbf{Left:}
+ With linear functions there are two regimes. For small $V_0$, there are
+ maxima at $m=\pm m_*$ where the action is zero, while for $V_0>
+ V_{\text{\textsc{sat}}\ast}=1$ the action is negative everywhere.
+ \textbf{Right:} With nonlinear functions there are other possible regimes.
+ For small $V_0$, there are maxima at $m=\pm m_*$ but the real part of the
+ action is maximized at $m=0$ where the action is complex. For larger
+ $V_0\geq V_\text{on}\simeq1.099$ the maxima at $m=\pm m_*$ disappear. For
+ $V_0\geq V_\text{sh}\simeq1.394$ larger still, the action becomes real
+ everywhere. Finally, for $V_0>V_\text{\textsc{sat}}\simeq1.440$ the action
+ is negative everywhere.
} \label{fig:action}
\end{figure}
-The action $\mathcal S_\chi$ is extremized with respect to $m$ at $m=0$ or $m=\pm m_*=\mp R_*(m_*)$ for
+The action $\mathcal S_\chi$ is extremized with respect to $m$ at $m=0$ or at $m=\pm m_*$ for
\begin{equation}
- m^*=\sqrt{1-\frac{\alpha}{f'(1)}\big(V_0^2+f(1)\big)}
+ m_*=\sqrt{1-\frac{\alpha}{f'(1)}\big(V_0^2+f(1)\big)}
\end{equation}
-and $\mathcal S_\chi(m^*)=0$. Zero action implies that
+At these latter extrema, $\mathcal S_\chi(\pm m_*)=0$. Zero action implies that
$\overline{\chi(\Omega)}$ does not vary exponentially with $N$, and in fact we
-show in Appendix~\ref{sec:prefactor} that the contribution from each of these
-maxima is $(-1)^{N-M-1}+o(N^0)$, so that their sum is $2$ in even dimensions and $0$ in odd dimensions. This result is consistent with the topology of an $N-M-1$ sphere.
+show in Appendix~\ref{sec:prefactor} that the contribution from these
+extrema is $1+o(N^0)$ at $-m_*$ and $(-1)^{N-M-1}+o(N^0)$ at $+m_*$, so that
+their sum is $2$ in even dimensions and $0$ in odd dimensions. When these
+extrema exist and maximize the action, this result is consistent with the
+topology of an $N-M-1$ sphere.
If this solution were always well-defined, it would
-vanish when the argument of the square root vanishes at
+vanish when the argument of the square root vanishes for
\begin{equation}
V_0^2>V_{\text{\textsc{sat}}\ast}^2\equiv\frac{f'(1)}\alpha-f(1)
\end{equation}
-This corresponds precisely to the satisfiability transition found in previous work by a replica symmetric analysis of the cost function \eqref{eq:cost} \cite{Fyodorov_2019_A, Fyodorov_2020_Counting, Fyodorov_2022_Optimization, Tublin_2022_A, Vivo_2024_Random}.
-However, the action becomes complex in the region $m^2<m_\text{min}^2$ for
+This corresponds precisely to the satisfiability transition found in previous
+work by a replica symmetric analysis of the cost function \eqref{eq:cost}
+\cite{Fyodorov_2019_A, Fyodorov_2020_Counting, Fyodorov_2022_Optimization,
+Tublin_2022_A, Vivo_2024_Random}. However, the action is not clearly defined in
+the entire range $m^2<1$: it becomes complex in the region
+$m^2<m_\text{min}^2$ where
\begin{equation}
m_\text{min}^2
- =1-\frac{f(1)^2}{f'(1)}\frac{V_0^2(1+\sqrt{1-\alpha})^2-\alpha f(1)}{
+ \equiv1-\frac{f(1)^2}{f'(1)}\times
+ \frac{V_0^2(1+\sqrt{1-\alpha})^2-\alpha f(1)}{
4V_0^2f(1)-\alpha[V_0^2+f(1)]^2
}
\end{equation}
-When $m_*^2<m_\text{min}^2$, the solution at $m=\pm m_*$ no longer maximizes
-the action. This happens when the target value is larger than an onset value $V_\text{on}$ defined by
+When $m_*^2<m_\text{min}^2$, the solutions at $m=\pm m_*$ are no longer maxima of
+the action. This happens when the target value $V_0$ is larger than an onset value $V_\text{on}$ defined by
\begin{equation}
- V_0^2>V_\text{on}^2\equiv\frac{f(1)}\alpha\left(1-\alpha+\sqrt{1-\alpha}\right)
+ V_\text{on}^2\equiv\frac{f(1)}\alpha\left(1-\alpha+\sqrt{1-\alpha}\right)
\end{equation}
Comparing this with the satisfiability transition associated with $m_*$ going to zero, one sees
\begin{equation}
@@ -405,27 +441,26 @@ V_{\text{\textsc{sat}}\ast}^2$, so the onset happens first. In situations with
mixed constant, linear, and nonlinear terms in $f$, the order of the
transitions depends on the precise form of $f$.
-Likewise, the solution at $m=0$ is sometimes complex-valued and sometimes real-valued. For $V_0$ less than a shattering value $V_\text{sh}$ defined by
+Now we return to the extremum at $m=0$. As for those at $\pm m_*$, the action
+evaluated at this solution is sometimes complex-valued and sometimes
+real-valued. For $V_0$ less than a shattering value $V_\text{sh}$ defined by
\begin{equation}
- V_0^2<V_\text{sh}^2\equiv\frac{f(1)}\alpha\left(1-\frac{f(1)}{f'(1)}\right)\left(1+\sqrt{1-\alpha}\right)^2
+ V_\text{sh}^2\equiv\frac{f(1)}\alpha\left(1-\frac{f(1)}{f'(1)}\right)\left(1+\sqrt{1-\alpha}\right)^2
\end{equation}
the maximum at $m=0$ is complex while for $V_0$ greater than this value the action is real. For purely
linear $f(q)$, $V_\text{sh}=0$ and the action at $m=0$ is always real, though
for $V_0^2<V_{\text{\textsc{sat}}\ast}^2$ it is a minimum rather than a
maximum. Finally, there is another satisfiability transition at
-$V=V_\text{\textsc{sat}}$ corresponding to the vanishing of the effective
-action at the $m=0$ solution, with $\mathcal S(0)=0$. For generic covariance
+$V_0=V_\text{\textsc{sat}}$ corresponding to the vanishing of the effective
+action at the $m=0$ solution, with $\mathcal S(0)=0$. For a generic covariance
function $f$ it is not possible to write an explicit formula for
-$V_\text{\textsc{sat}}$, and we usually calculate it through a numeric
+$V_\text{\textsc{sat}}$, and we calculate it through a numeric
root-finding algorithm.
-When $m_\text{min}^2>0$, the solution at $m=0$ is difficult to interpret, since the action takes a complex value. In fact, it is not
-clear what the contribution to the average value of the Euler characteristic should be at all when
-there is some range $-m_\text{min}<m<m_\text{min}$ where the effective action
-is complex. Such a result could arise from the breakdown of the large-deviation
-principle behind the calculation of the effective action, or it could be the
-result of a negative Euler characteristic.
-
+When $m_\text{min}^2>0$, the solution at $m=0$ is difficult to interpret, since
+the action takes a complex value. Such a result could arise from the breakdown
+of the large-deviation principle behind the calculation of the effective
+action, or it could be the result of a negative Euler characteristic.
To address this ambiguity, we compute also the average of the square of the Euler
characteristic, $\overline{\chi(\Omega)^2}$, with details in
Appendix~\ref{sec:rms}. This has the benefit of always being positive, so that
@@ -436,33 +471,38 @@ complex values even when $\overline{\chi(\Omega)}$ is negative. Under the restri
present the replica symmetric (\textsc{rs}) description of this problem can
have $q_0>0$, and $\overline{\chi(\Omega)^2}\neq[\overline{\chi(\Omega)}]^2$
always.
-}we find three
+} we find three
saddle points that could contribute to the value of
-$\overline{\chi(\Omega)^2}$: two at $\pm m^*$ where
+$\overline{\chi(\Omega)^2}$: two at $\pm m_*$ where
$\frac1N\log\overline{\chi(\Omega)^2}=\frac1N\log\overline{\chi(\Omega)}\simeq0$,
and one at $m=0$ where
\begin{equation}
\frac1N\log\overline{\chi(\Omega)^2}=2\operatorname{Re}\mathcal S_\chi(0)
\end{equation}
which is consistent with
-$\overline{\chi(\Omega)^2}\simeq[\overline{\chi(\Omega)}]^2$. It is therefore possible to interpret the average Euler characteristic in the
-regime where its effective action is complex-valued as being negative, with a
-magnitude implied by the real part of the action.
-
-Such a
-correspondence, which indicates that the `annealed' calculation here is also
-representative of typical realizations of the constraints, is not always true.
-With average squared Euler characteristic we find instabilities of the solution
-at $m=0$ to replica symmetry breaking (\textsc{rsb}). We do not explore
-these \textsc{rsb} solutions here, except in the context of $M=1$ in
-Section~\ref{sec:ssg}. However, in the following Figures \ref{fig:phases} and
-\ref{fig:crossover} we shade the unstable region.
+$\overline{\chi(\Omega)^2}\simeq[\overline{\chi(\Omega)}]^2$. We therefore
+conclude that when the effective action is complex-valued, the average Euler
+characteristic is negative and its magnitude is given by the real part of the
+action.
+
+Such a correspondence, which indicates that the `annealed' calculation
+presented here is also representative of typical realizations of the
+constraints, is not always true. Sometimes the average squared Euler
+characteristic has alternative saddle points for which
+$\overline{\chi(\Omega)^2}\neq[\overline{\chi(\Omega)}]^2$, which implies that
+average properties will not be typical of most realizations. With our
+calculation of the average squared Euler characteristic, we can identify
+instabilities of the solution described above toward such replica symmetry
+breaking (\textsc{rsb}) solutions. The analysis of these instabilities can be found in Appendix~\ref{sec:rsb.instability}. We do not explore these \textsc{rsb}
+solutions here, except in the context of $M=1$ and the spherical spin glasses
+in Section~\ref{sec:ssg}. However, in the phase diagrams of Figures \ref{fig:phases}
+and \ref{fig:crossover} we shade the region where our calculation indicates that an instability is present.
\subsection{Topological phases and their interpretation}
The results of the previous section allow us to unambiguously define distinct
topological phases, which differ depending on the presence or absence of the
-local maxima at $m=\pm m^*$, on the presence or absence of the local maximum
+local maxima at $m=\pm m_*$, on the presence or absence of the local maximum
at $m=0$, on the real or complex nature of this maximum, and finally on
whether the action is positive or negative. Below we enumerate these regimes,
which are schematically represented in Fig.~\ref{fig:cartoons}.\footnote{
@@ -473,29 +513,60 @@ which are schematically represented in Fig.~\ref{fig:cartoons}.\footnote{
depending on the evenness or oddness of the manifold dimension.
}
+\begin{figure}
+ \includegraphics[width=0.196\textwidth]{figs/connected.pdf}
+ \includegraphics[width=0.196\textwidth]{figs/middle.pdf}
+ \includegraphics[width=0.196\textwidth]{figs/complex.pdf}
+ \includegraphics[width=0.196\textwidth]{figs/shattered.pdf}
+ \includegraphics[width=0.196\textwidth]{figs/gone.pdf}
+
+ \hspace{1.5em}
+ \textbf{Regime I}
+ \hfill
+ \textbf{Regime II}
+ \hfill
+ \textbf{Regime III}
+ \hfill
+ \textbf{Regime IV}
+ \hfill
+ \textbf{Regime V}
+ \hspace{1.5em}
+
+ \caption{
+ \textbf{Cartoons of the solution manifold in five topological regimes.}
+ The solution manifold is shown as a shaded region, and the height axis
+ $\mathbf x_0$ is a black arrow. In Regime I, the average Euler
+ characteristic is consistent with a manifold with a single simply-connected
+ component. In Regime II, holes occupy the equator but the most polar
+ regions are topologically simple. In Regime III, holes dominate and the
+ edge of the manifold is not necessarily simple. In Regime IV, disconnected
+ components dominate. In Regime V, the manifold is empty.
+ } \label{fig:cartoons}
+\end{figure}
+
\paragraph{Regime I: \boldmath{$\overline{\chi(\Omega)}=2$}.}
This regime is found when the magnitude of the target value $V_0$ is less than
the onset $V_\text{on}$ and $\operatorname{Re}\mathcal S(0)<0$, so that the
-maxima at $m=\pm m^*$ exist and are the dominant contributions to the average
+maxima at $m=\pm m_*$ exist and are the dominant contributions to the average
Euler characteristic. Here, $\overline{\chi(\Omega)}=2+o(1)$ for even $N-M-1$,
strongly indicating a topology homeomorphic to the $S^{N-M-1}$ sphere. This regime is the only nontrivial one found with linear covariance $f(q)$, where the solution manifold must be a sphere if it is not empty.
-\paragraph{Regime II: \boldmath{$\overline{\chi(\Omega)}$} large and negative, isolated contributions at \boldmath{$m=\pm m^*$}.}
+\paragraph{Regime II: \boldmath{$\overline{\chi(\Omega)}$} large and negative, isolated contributions at \boldmath{$m=\pm m_*$}.}
This regime is found when the magnitude of the target value $V_0$ is less than the onset $V_\text{on}$, $\operatorname{Re}\mathcal S(0)>0$, and the value of the action at $m=0$ is complex. The dominant contribution to the average Euler characteristic comes from the equator at $m=0$, but the complexity of the action implies that the Euler characteristic is negative.
While the topology of the manifold is not necessarily connected in this
regime, holes are more numerous than components. Since $V_0^2<V_\text{on}^2$,
-there are isolated contributions to $\overline{\chi(\Omega)}$ at $m=\pm m^*$.
+there are isolated contributions to $\overline{\chi(\Omega)}$ at $m=\pm m_*$.
This implies a temperate band of relative simplicity: given a random point on
the sphere, the nearest parts of the solution manifold are unlikely to have holes or
disconnected components.
-\paragraph{Regime III: \boldmath{$\overline{\chi(\Omega)}$} large and negative, no contribution at \boldmath{$m=\pm m^*$}.}
+\paragraph{Regime III: \boldmath{$\overline{\chi(\Omega)}$} large and negative, no contribution at \boldmath{$m=\pm m_*$}.}
The same as Regime II, but with $V_0^2>V_\text{on}^2$. The solutions at
$m=\pm m_*$ no longer exist, and nontrivial contributions to the Euler
-characteristic are made all the way to the solution manifold's boundary.
+characteristic are made all the way to the edges of the solution manifold.
\paragraph{Regime IV: \boldmath{$\overline{\chi(\Omega)}$} large and positive.}
@@ -515,38 +586,6 @@ there may be circumstances where part of this regime is characterized by
nonempty solution manifolds that are overwhelmingly likely to have Euler
characteristic zero.
-\begin{figure}
- \includegraphics[width=0.196\textwidth]{figs/connected.pdf}
- \includegraphics[width=0.196\textwidth]{figs/middle.pdf}
- \includegraphics[width=0.196\textwidth]{figs/complex.pdf}
- \includegraphics[width=0.196\textwidth]{figs/shattered.pdf}
- \includegraphics[width=0.196\textwidth]{figs/gone.pdf}
-
- \hspace{1.5em}
- \textbf{Regime I}
- \hfill
- \textbf{Regime II}
- \hfill
- \textbf{Regime III}
- \hfill
- \textbf{Regime IV}
- \hfill
- \textbf{Regime V}
- \hspace{1.5em}
-
- \caption{
- \textbf{Cartoons of the solution manifold in five topological regimes.}
- The solution manifold is shown as a shaded region with a black boundary,
- and the height axis $\mathbf x_0$ is a black arrow. In Regime I, the
- statistics of the Euler characteristic is consistent with a manifold with a
- single simply-connected component. In Regime II, holes occupy the equator
- but its most polar regions are
- topologically simple. In Regime III, holes dominate and the edge of the
- manifold is not necessarily simple. In Regime IV, disconnected components
- dominate. In Regime V, the manifold is empty.
- } \label{fig:cartoons}
-\end{figure}
-
\begin{figure}
\includegraphics{figs/phases_1.pdf}
@@ -630,7 +669,7 @@ linear ($\lambda=0$) and pure quadratic ($\lambda=1$). A new phase boundary
appears separating regimes I and II, defined as the point where the real part
of the action at $m=0$ changes from negative to positive. In purely quadratic
case, and in mixed linear and nonlinear cases, there is a substantial region of
-the phase diagram shown in Appendix~\ref{sec:rms} to be susceptible to
+the phase diagram shown in Appendix~\ref{sec:rsb.instability} to be susceptible to
\textsc{rsb}, especially for small $V_0$ and large $\alpha$. Future research
into the structure of solutions in this regime is merited.
@@ -647,31 +686,33 @@ glasses by taking the limit $\alpha\to0$ while keeping $E=V_0\alpha^{-1/2}$ fixe
E_\text{sh}=\pm\sqrt{4f(1)\left(1-\frac{f(1)}{f'(1)}\right)}
\label{eq:ssg.energies}
\end{align}
-for the onset and shattering energies. For the pure $p$-spin spherical spin glasses, which have homogeneous covariances $f(q)=\frac12q^p$,
-$E_\text{sh}=\sqrt{2(p-1)/p}$, precisely the threshold energy in these
-models \cite{Castellani_2005_Spin-glass}. This is intuitive, since threshold energy, defined as the place where
+for the onset and shattering energies. The same limit taken for
+$V_\text{\textsc{sat}}\alpha^{-1/2}$ coincides with the ground state energy
+$E_\text{gs}$.
+
+For the pure $p$-spin spherical spin glasses, which have homogeneous covariance functions $f(q)=\frac12q^p$,
+the shattering energy is $E_\text{sh}=\sqrt{2(p-1)/p}$, precisely the same as the threshold energy $E_\text{th}$ \cite{Castellani_2005_Spin-glass}. This is intuitive, since threshold energy, defined as the place where
marginal minima are dominant in the landscape, is widely understood as the
place where level sets are broken into pieces.
-
-However, for general mixed models the threshold energy is
+However, for general mixed models with inhomogeneous covariance functions the threshold energy is
\begin{equation}
E_\mathrm{th}=\pm\frac{f(1)[f''(1)-f'(1)]+f'(1)^2}{f'(1)\sqrt{f''(1)}}
\end{equation}
which satisfies $|E_\text{sh}|\leq|E_\text{th}|$. Therefore, as one
-descends in energy in generic models one will meet the shattering energy before
+descends in energy in one will generically meet the shattering energy before
the threshold energy. This is perhaps unexpected, since one might imagine that
where level sets of the energy break into many pieces would coincide with the
largest concentration of shallow minima in the landscape. We see here that this isn't the case.
-This fact mirrors another another that was made clear
-recently: when gradient decent dynamics are run on these models, they will
-asymptotically reach an energy above the threshold energy
-\cite{Folena_2020_Rethinking, Folena_2021_Gradient, Folena_2023_On}. The old
-belief that the threshold energy qualitatively coincides with a kind of
-shattering is the origin of the expectation that the dynamic limit should be
-the threshold energy. Given that our work shows the actual shattering energy is
-different, here we compare it with data on the asymptotic limits of dynamics to
-see if they coincide.
+This fact mirrors another another that was made clear recently: when gradient
+decent dynamics are run on these models, they will asymptotically reach an
+energy above the threshold energy \cite{Folena_2020_Rethinking,
+Folena_2021_Gradient, Folena_2023_On}. The old belief that the threshold energy
+qualitatively coincides with a kind of shattering is one source of the
+expectation that the it should coincide with the dynamic limit. Motivated by
+our discovery that the actual shattering energy is different from the threshold
+energy, we make a comparison with existing data on the asymptotic limits of
+dynamics.
\begin{figure}
\includegraphics{figs/dynamics_2.pdf}
@@ -697,7 +738,7 @@ $f(q)$, with
\begin{equation}
f(q)=\frac12\big[\lambda q^p+(1-\lambda)q^s\big]
\end{equation}
-They studied models with this covariance for $p=2$ and $p=3$ while varying $s$.
+The authors of \cite{Folena_2023_On} studied models with this covariance for $p=2$ and $p=3$ while varying $s$.
In both cases, the relative weight $\lambda$ varies with $s$ and was chosen to
maximize a heuristic to increase the chances of seeing nontrivial behavior. The
authors numerically integrated the dynamic mean field theory (\textsc{dmft})
@@ -709,7 +750,7 @@ critical here, see the original paper for details. We simply note that the
authors of \cite{Folena_2023_On} did not associate an uncertainty with them,
nor were they confident that they are unbiased estimates of the asymptotic value.
-Fig.~\ref{fig:ssg} also shows the shattering and {\oldstylenums1}\textsc{rsb}
+Fig.~\ref{fig:ssg} also shows the shattering and annealed
threshold energies as a function of $s$. The solid lines come from using
\textsl{Mathematica}'s \texttt{Interpolation} function to create a smooth
function $\lambda(s)$ through the values used in \cite{Folena_2023_On}. For the
@@ -731,10 +772,11 @@ motivates working to integrate the \textsc{dmft} equations to longer times, or
else look for analytic asymptotic solutions that approach $E_\text{sh}$.
\section{Conclusion}
+\label{sec:conclusion}
We have shown how to calculate the average Euler characteristic of the solution
manifold in a simple model of random constraint satisfaction. The results
-constrain the topology and geometry of this manifold, revealing when it is
+constrain the topology of this manifold, revealing when it is
connected and trivial, when it is extensive but topologically nontrivial, and
when it is shattered into disconnected pieces.
@@ -747,6 +789,19 @@ It's possible that $E_\text{sh}$ is the asymptotic limit of gradient descent
from a random initial condition in such models, but the quality of the
currently available data makes this conjecture inconclusive.
+Our work also highlights a severe limitation of using the statistics of
+stationary points of an energy or cost function to infer topological properties
+of the level sets. In the mixed spherical spin glasses, neither one nor two
+stationary point statistics reveal the presence of the topologically significant
+energy density $E_\text{sh}$ \cite{BenArous_2019_Geometry,
+Folena_2020_Rethinking, Kent-Dobias_2024_Arrangement}. If the shattering energy
+is found conclusively to be the dynamic threshold for gradient descent this
+failure will be all the more serious. It may be enlightening to return to old
+problems of mean-field landscape analysis with this approach in hand, including
+in the analysis of the TAP free energy in many spin-glass settings
+\cite{Bray_1980_Metastable, Crisanti_1995_Thouless-Anderson-Palmer,
+Muller_2006_Marginal}.
+
This paper has focused on equality constraints, while most existing studies of
constraint satisfaction study inequality constraints \cite{Franz_2016_The,
Franz_2017_Universality, Franz_2019_Critical, Annesi_2023_Star-shaped,
@@ -772,7 +827,7 @@ JK-D is supported by a \textsc{DynSysMath} Specific Initiative of the INFN.
Our starting point is the expression \eqref{eq:kac-rice.lagrange} with the
substitutions of the $\delta$-function and determinant \eqref{eq:delta.exp} and
\eqref{eq:det.exp} made. To make the calculation compact, we introduce
-superspace coordinates \cite{DeWitt_1992_Supermanifolds}. Introducing the Grassmann indices $\bar\theta_1$
+superspace coordinates \cite{DeWitt_1992_Supermanifolds, Kent-Dobias_2024_Conditioning}. Introducing the Grassmann indices $\bar\theta_1$
and $\theta_1$, we define the supervectors
\begin{align}
\pmb\phi(1)=\mathbf x+\bar\theta_1\pmb\eta+\bar{\pmb\eta}\theta_1+\bar\theta_1\theta_1i\hat{\mathbf x}
@@ -797,7 +852,7 @@ The Euler characteristic can be expressed using these supervectors as
\right]
\right\} \notag
\end{align}
-where $d1=d\bar\theta_1\,d\theta_1$ is the integral over both Grassmann
+where $d1=d\bar\theta_1\,d\theta_1$ is the integral measure over both Grassmann
indices. Since this is an exponential integrand linear in the Gaussian
functions $V_k$, we can take their average to find
\begin{equation} \label{eq:χ.post-average}
@@ -815,7 +870,7 @@ functions $V_k$, we can take their average to find
\end{equation}
This is a super-Gaussian integral in the super-Lagrange multipliers $\sigma_k$ with $1\leq k\leq M$.
Performing that integral yields
-\begin{equation}
+\begin{equation} \label{eq:pre.hubbard-strat}
\begin{aligned}
\overline{\chi(\Omega)}
&=\int d\pmb\phi\,d\sigma_0\,\exp\Bigg\{
@@ -942,7 +997,7 @@ as removing all dependence on $\bar H$ and $H$. With these solutions inserted, t
\end{align}
The Grassmann terms in these expressions do not contribute to the effective
action, but will be important in our derivation of the prefactor for the
-calculation in the connected regime. The substitution of these expressions into \eqref{eq:post.hubbard-strat} without the Grassmann terms yields \eqref{eq:euler.action} from the main text.
+exponential around the stationary points at $\pm m_*$. The substitution of these expressions into \eqref{eq:post.hubbard-strat} without the Grassmann terms yields \eqref{eq:euler.action} from the main text.
\section{Calculation of the prefactor of the average Euler characteristic}
\label{sec:prefactor}
@@ -958,7 +1013,7 @@ large-$N$ integral. In addition, there are important contributions of a sign of
\subsection{Contribution from the Hubbard--Stratonovich transformations}
\label{sec:prefactor.hs}
-First, we examine the factors arising from the definition of order parameters. This begins by introducing to the integral the factor of one
+First, we examine the factors arising from the definition of order parameters. This begins by introducing to the integral \eqref{eq:pre.hubbard-strat} the factor of one
\begin{equation}
1=(2\pi)^3\int d\mathbb Q\,d\mathbb M\,
\delta\big(N\mathbb Q(1,2)-\pmb\phi(1)\cdot\pmb\phi(2)\big)\,
@@ -1030,7 +1085,7 @@ are important to getting correctly the sign of the prefactor. For instance, cons
\begin{equation}
\operatorname{sdet}\mathbb Q=\frac{CD+R^2}{G^2}
\end{equation}
-The numerator and denominator arise from the determinant in the sector of number and Grassmann number basis elements for the superoperator, respectively. In our calculation, such superdeterminants appear after Gaussian integrals, which produce
+The numerator and denominator arise from the determinant in the sector of ordinary number and Grassmann number basis elements for the superoperator, respectively. In our calculation, such superdeterminants appear after Gaussian integrals, which produce
\begin{equation}
\int d\pmb\phi\,\exp\left\{-\frac12\int d1\,d2\,\pmb\phi(1)\mathbb Q(1,2)\pmb\phi(2)\right\}
=(\operatorname{sdet}\mathbb Q)^{-\frac12}
@@ -1120,27 +1175,27 @@ function $g(R,D,m,\hat m)$ of \eqref{eq:pre-saddle.characteristic} is given by
\right)
\end{aligned}
\end{equation}
-In the connected regime, there are two saddle points of the integrand that
-contribute to the asymptotic value of the integral, at $m=\pm m^*$ with $R=-m^*$, $D=0$, and $\hat m=0$. At this saddle point $\mathcal S_\chi=0$. We can therefore write
+In regime I, there are two saddle points of the integrand that
+contribute to the asymptotic value of the integral, at $m=\pm m_*$ with $R=-m_*$, $D=0$, and $\hat m=0$. At this saddle point $\mathcal S_\chi=0$. We can therefore write
\begin{equation}
\overline{\chi(\Omega)}
- =\sum_{m=\pm m^*}
+ =\sum_{m=\pm m_*}
g(-m,0,m,0)\big[\det\partial\partial\mathcal S_\chi(-m,0,m,0)\big]^{-\frac12}
\end{equation}
where here $\partial=[\frac{\partial}{\partial R},\frac{\partial}{\partial D},\frac\partial{\partial m},\frac\partial{\partial \hat m}]$ is the
vector of derivatives with respect to the remaining order parameters. For both of the two saddle points, the determinant of the Hessian of the effective action evaluates to
\begin{equation}
\det\partial\partial\mathcal S_\chi
- =\left[\frac1{(m^*)^4}\left(1-\frac{\alpha[V_0^2+f(1)]}{f'(1)}\right)\right]^2
+ =\left[\frac1{(m_*)^4}\left(1-\frac{\alpha[V_0^2+f(1)]}{f'(1)}\right)\right]^2
\end{equation}
whereas
\begin{equation}
- g(\mp m^*,0,\pm m^*,0)
- =\frac{(\mp1)^{N+M+1}}{|m^*|^4}\left(1-\frac{\alpha[V_0^2+f(1)]}{f'(1)}\right)
+ g(\mp m_*,0,\pm m_*,0)
+ =\frac{(\mp1)^{N+M+1}}{|m_*|^4}\left(1-\frac{\alpha[V_0^2+f(1)]}{f'(1)}\right)
\end{equation}
-The saddle point at $m=-m^*$, characterized by minima of the height function, always contributes with a positive term. On the other hand, the saddle point with $m=+m^*$, characterized by maxima of the height function, contributes with a sign depending on if $N+M+1$ is even or odd. This follows from the fact that minima, with an index of 0, have a positive contribution to the sum over stationary points, while maxima, with an index of $N-M-1$, have a contribution that depends on the dimension of the manifold.
+The saddle point at $m=-m_*$, characterized by minima of the height function, always contributes with a positive term. On the other hand, the saddle point with $m=+m_*$, characterized by maxima of the height function, contributes with a sign depending on if $N+M+1$ is even or odd. This follows from the fact that minima, with an index of 0, have a positive contribution to the sum over stationary points, while maxima, with an index of $N-M-1$, have a contribution that depends on the dimension of the manifold.
-We have finally that, in the connected regime,
+We have finally that, in regime I,
\begin{equation}
\overline{\chi(\Omega)}=1+(-1)^{N+M+1}+O(N^{-1})
\end{equation}
@@ -1161,7 +1216,7 @@ characteristic. This is accomplished by taking two copies of the integral \eqref
=\int d\pmb\phi_1\,d\pmb\sigma_1\,d\pmb\phi_2\,d\pmb\sigma_2\,
e^{\int d1\,[L(\pmb\phi_1(1),\pmb\sigma_1(1))+L(\pmb\phi_2(1),\pmb\sigma_2(1))]}
\end{equation}
-The same steps as in the derivation of the Euler characteristic follow. The result is the same as \eqref{eq:post.hubbard-strat}, but with the substitutions
+The same steps as in the derivation of the Euler characteristic follow. The result is the same as \eqref{eq:post.hubbard-strat}, but with the substitutions of the order parameters with matrices of order parameters,
\begin{align}
\mathbb Q\mapsto\begin{bmatrix}
\mathbb Q_{11} & \mathbb Q_{12} \\
@@ -1180,7 +1235,9 @@ where we have defined
\mathbb M_i(1)=\frac1N\pmb\phi_i(1)\cdot\mathbf x_0
\end{align}
Expanding the superindices and applying the Dirac $\delta$-functions implied by
-the Lagrange multipliers associated with the spherical constraint, we arrive at an expression
+the Lagrange multipliers associated with the spherical constraint (which set
+$C_{11}=C_{22}=1$ and $G_{11}=-R_{11}$, $G_{22}=-R_{22}$), we arrive at an
+expression
\begin{equation}
\overline{\chi(\Omega)^2}
\simeq\int dC_{12}\,dR_{11}\,dR_{12}\,dR_{21}\,dR_{22}\,dD_{11}\,dD_{12}\,dD_{22}\,dG_{12}\,dG_{21}\,dm_1\,dm_2\,d\hat m_1\,d\hat m_2\,e^{N\mathcal S_{\chi^2}}
@@ -1231,7 +1288,7 @@ with the matrices $A_1$, $A_2$, $A_3$, and $A_4$ defined by
G_{21} & 0 & -R_{22} & 0
\end{bmatrix}
\end{equation}
-and where $\Delta_{12}=G_{12}G_{21}-R_{12}R_{21}$. The expression must be
+and where $\Delta_{12}=G_{12}G_{21}-R_{12}R_{21}$. The effective action must be
extremized over all the order parameters. We look for solutions in two regimes
that are commensurate with the solutions found for the Euler characteristic.
These correspond to $m_1=m_2=0$ and $C_{12}=0$, and $m_1=m_2=\pm m_*$ and
@@ -1256,32 +1313,33 @@ $\overline{\chi(\Omega)^2}\simeq[\overline{\chi(\Omega)}]^2$, justifying the
`annealed' approach we have taken here.
\subsection{Instability to replica symmetry breaking}
+\label{sec:rsb.instability}
However, these solutions are not always the correct saddle point for evaluating
the average squared Euler characteristic. When another solution is dominant,
the dissonance between the average square and squared average indicates the
necessity of a quenched calculation to determine the behavior of typical
-samples, and likely instability to \textsc{rsb}. We can find these points of
+samples, and also a likely instability to \textsc{rsb}. We can find these points of
instability by examining the Hessian of the action of the average square of the
Euler characteristic at $m=0$. The stability of this matrix is not sufficient
to determine if our solution is stable, since the many $\delta$-functions
-employed in our derivation ensure that the resulting saddle point never at a
+employed in our derivation ensure that the resulting saddle point is never at a
true maximum with respect to some combinations of variables. We rather look for
-places where the stability of this matrix changes, which represent places where
+places where the stability of this matrix changes, where
another solution branches from the existing one. However, we must neglect the
branching of trivial solutions, which occur when $R_*$ goes from real- to
complex-valued.
By examination of the results, it appears that nontrivial \textsc{rsb}
instabilities occur along eigenvectors of the Hessian of $\mathcal S_{\chi^2}$
-in the subspace spanned by $C_{12}$, $R_{12}$, $R_{21}$, and $D_{12}$. This may
+constrained to the subspace spanned by $C_{12}$, $R_{12}$, $R_{21}$, and $D_{12}$. This may
not be surprising, since these are the parameters that represent nontrivial
correlations between the two copies of the system. We can therefore find the \textsc{rsb} instability by looking for nontrivial zeros of
\begin{equation}
\det\partial\partial\mathcal S_{\chi^2}\equiv\det\frac{\partial^2\mathcal S_{\chi^2}}{\partial[C_{12},R_{12},R_{21},D_{12}]^2}
\end{equation}
evaluated at the $m=0$ solution described above. The resulting expression is
-usually quite heinous, but there is a regime where a dramatic simplification is
+usually quite heinous and we will not reproduce it in its general form in the text, but there is a regime where a dramatic simplification is
possible. The instability always occurs along the direction
$R_{21}=R_{12}^\dagger$, but when $R_*$ is real, $R_{11}=R_{22}$ and the instability occurs along
the direction $R_{21}=R_{12}$. This allows us to examine a simpler action, and
@@ -1289,8 +1347,8 @@ we find the determinant is proportional to two nontrivial factors, with
\begin{equation} \label{eq:stab.det}
\det\partial\partial\mathcal S_{\chi^2}=-\frac{2B_1B_2}{[r_*f'(1)]^3[(1+r_*)f(1)-r_*f'(1)]^7}
\end{equation}
-If we let
-$r_*=\lim_{m\to0}R_*/m$, then the factors $B_1$ and $B_2$ are
+If we define
+$r_*\equiv\lim_{m\to0}R_*/m$, then the factors $B_1$ and $B_2$ are
\begin{align}
B_1=[(1+r_*)f(1)]^3-3r_*[(1+r_*)f(1)]^2f'(1)
+\alpha V_0^2\big[2(1+r_*)f'(0)^2+r_*f'(1)f''(0)\big]
@@ -1317,8 +1375,9 @@ $r_*=\lim_{m\to0}R_*/m$, then the factors $B_1$ and $B_2$ are
\bigg]
\end{align}
As $\alpha$ is increased from zero, the first of these factors to go through
-zero represents the instability point. These formulas are responsible for the
-shaded regions in Fig.~\ref{fig:phases} and Fig.~\ref{fig:crossover}.
+zero represents the instability point. These formulas are responsible for
+defining the boundaries of the shaded regions in Fig.~\ref{fig:phases} and
+Fig.~\ref{fig:crossover}.
Surprisingly, this approach sees no signal of a
replica symmetry breaking (\textsc{rsb}) transition previously found in
@@ -1347,7 +1406,7 @@ Here we share how the quenched shattering energy is calculated under a
the spherical spin glasses, we start with \eqref{eq:χ.post-average}. The
formula in a quenched calculation is almost the same as that for the annealed,
but the order parameters $C$, $R$, $D$, and $G$ must be understood as $n\times
-n$ matrices rather than scalars. In principle $m$, $\hat m$, $\omega_0$, $\hat\omega_0$, $\omega_1$, and $\hat\omega_1$ should be considered $n$-dimensional vectors, but since in our ansatz replica vectors are constant we can take them to be constant from the start. We have
+n$ matrices rather than scalars. In principle $m$, $\hat m$, $\omega_0$, $\hat\omega_0$, $\omega_1$, and $\hat\omega_1$ should be considered $n$-dimensional vectors, but since in our ansatz replica vectors are constant we can take them to be constant from the start. Expanding the superspace notation, we have
\begin{align}
&\overline{\log\chi(\Omega)}
=\lim_{n\to0}\frac\partial{\partial n}\int dC\,dR\,dD\,dG\,dm\,d\hat m\,d\omega_0\,d\hat\omega_0\,d\omega_1\,d\hat\omega_1\,
@@ -1380,10 +1439,10 @@ symmetry possessed by the original action \cite{Annibale_2003_The, Annibale_2003
\hat m=0
\end{align}
Moreover, this problem with $m=0$ has a close resemblance to the complexity of
-the spherical spin glasses. In both, at the supersymmetric saddle point the
+the spherical spin glasses. In both, at the BRST-symmetric saddle point the
matrix $R$ is diagonal with $R=r_dI$ \cite{Kent-Dobias_2023_How}.
To investigate the shattering energy, we can restrict to solutions with $m=0$
-and look for the place where such solutions vanish. Inserting these simplifications, we have up to highest order in $N$
+and look for the place where such solutions become complex. Inserting these simplifications, we have up to highest order in $N$
\begin{equation}
\begin{aligned}
\overline{\log\chi(\Omega)}
@@ -1431,7 +1490,7 @@ for arbitrary \textsc{rsb} structure in the matrix $C$ as
where $\chi(q)=\int_1^qdq'\int_0^{q'}dq''P(q'')$ and $P(q)$ is the distribution of
off-diagonal elements of the matrix $C$ \cite{Crisanti_1992_The, Crisanti_2004_Spherical, Crisanti_2006_Spherical}. This action must be extremized over
the function $\chi$ and the variables $\hat\beta$ and $\tilde r_d$, under the
-constraint that $\chi(q)$ is continuous, that it has $\chi'(1)=-1$, and
+constraint that $\chi(q)$ is continuous, and that it has $\chi'(1)=-1$ and
$\chi(1)=0$, necessary for $P$ to be a well-defined probability distribution.
Now the specific form of replica symmetry breaking we expect to see is
@@ -1445,16 +1504,30 @@ complexity, this will correspond to full \textsc{rsb}, in an analogous way to
1-q & q \geq q_0
\end{cases}
\end{equation}
-where $\chi_0$ is
+where
\begin{equation}
\chi_0(q)=\frac1{\hat\beta}[f''(q)^{-1/2}-\tilde r_d]
\end{equation}
-the function implied by extremizing \eqref{eq:cont.action} over $\chi$. The
+is the function implied by extremizing \eqref{eq:cont.action} over $\chi$. The
variable $q_0$ must be chosen so that $\chi$ is continuous. The key difference
between \textsc{frsb} and {\oldstylenums1}\textsc{frsb} in this setting is that
in the former case the ground state has $q_0=1$, while in the latter the ground
state has $q_0<1$.
+\begin{figure}
+ \centering
+ \includegraphics{figs/rsb_comp.pdf}
+
+ \caption{
+ \textbf{Self-consistency between \textsc{rsb} instabilities.}
+ Comparison between the predicted value $q_0$ for the \textsc{frsb} solution
+ at the shattering energy in $2+s$ models and the value of the determinant
+ \eqref{eq:stab.det} used in the previous appendix to predict the point of
+ \textsc{rsb} instability. The value of $s$ at which $q_0$ becomes nonzero is
+ precisely the point where the determinant has a nontrivial zero.
+ } \label{fig:rsb}
+\end{figure}
+
We use this action to find the shattering energy in the following way. First,
we know that the ground state energy is the place where the manifold and therefore the average Euler characteristic vanishes. Therefore, setting $\overline{\log\chi(\Omega)}=0$ and solving for $E$ yields a formula
for the ground state energy
@@ -1471,29 +1544,14 @@ correct parameters at the ground state for a particular model. Then, the
shattering energy is found by slowly lowering $q_0$ and solving the combined
extremal and continuity problem for $\hat\beta$, $\tilde r_d$, and $E$ until
$E$ reaches a maximum value and starts to decrease. This maximum is the
-shattering energy, since it is the point where the $m=0$ solution vanishes.
+shattering energy, since it is the point where the $m=0$ solution becomes complex.
Starting from this point, we take small steps in $s$ and $\lambda_s$, again
simultaneously extremizing, ensuring continuity, and maximizing $E$. This draws
out the shattering energy across the entire range of $s$ plotted in
Fig.~\ref{fig:ssg}. The transition to the \textsc{rs} solution occurs when the
value $q_0$ that maximizes $E$ hits zero. We find that the transition between
\textsc{rs} and \textsc{frsb} is precisely predicted by the \textsc{rsb} instability
-calculated in Appendix~\ref{sec:rms} by analyzing the solution to the average
-square of the Euler characteristic, as shown in Fig.~\ref{fig:rsb}.
-
-\begin{figure}
- \centering
- \includegraphics{figs/rsb_comp.pdf}
-
- \caption{
- \textbf{Self-consistency between \textsc{rsb} instabilities.}
- Comparison between the predicted value $q_0$ for the \textsc{frsb} solution
- at the shattering energy in $2+s$ models and the value of the determinant
- \eqref{eq:stab.det} used in the previous appendix to predict the point of
- \textsc{rsb} instability. The value of $s$ at which $q_0$ becomes nonzero is
- precisely the point where the determinant has a nontrivial zero.
- } \label{fig:rsb}
-\end{figure}
+calculated in Appendix~\ref{sec:rms}, as shown in Fig.~\ref{fig:rsb}.
\bibliography{topology}