summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--figs/action.pdfbin0 -> 22726 bytes
-rw-r--r--figs/bar.pdfbin0 -> 4712 bytes
-rw-r--r--figs/characteristic.pdfbin0 -> 10115 bytes
-rw-r--r--figs/connected.pdfbin135349 -> 168514 bytes
-rw-r--r--figs/gone.pdfbin0 -> 155551 bytes
-rw-r--r--figs/shattered.pdfbin164526 -> 172535 bytes
-rw-r--r--topology.bib20
-rw-r--r--topology.tex193
8 files changed, 194 insertions, 19 deletions
diff --git a/figs/action.pdf b/figs/action.pdf
new file mode 100644
index 0000000..8764cf7
--- /dev/null
+++ b/figs/action.pdf
Binary files differ
diff --git a/figs/bar.pdf b/figs/bar.pdf
new file mode 100644
index 0000000..ba12588
--- /dev/null
+++ b/figs/bar.pdf
Binary files differ
diff --git a/figs/characteristic.pdf b/figs/characteristic.pdf
new file mode 100644
index 0000000..8f03c20
--- /dev/null
+++ b/figs/characteristic.pdf
Binary files differ
diff --git a/figs/connected.pdf b/figs/connected.pdf
index 8fccdaf..8bc0dc6 100644
--- a/figs/connected.pdf
+++ b/figs/connected.pdf
Binary files differ
diff --git a/figs/gone.pdf b/figs/gone.pdf
new file mode 100644
index 0000000..fdcf8cc
--- /dev/null
+++ b/figs/gone.pdf
Binary files differ
diff --git a/figs/shattered.pdf b/figs/shattered.pdf
index d3d2957..401cbbf 100644
--- a/figs/shattered.pdf
+++ b/figs/shattered.pdf
Binary files differ
diff --git a/topology.bib b/topology.bib
index 8165e79..31d8bad 100644
--- a/topology.bib
+++ b/topology.bib
@@ -104,6 +104,26 @@
eprinttype = {arxiv}
}
+@unpublished{Kent-Dobias_2024_Algorithm-independent,
+ author = {Kent-Dobias, Jaron},
+ title = {Algorithm-independent bounds on complex optimization through the statistics of marginal optima},
+ year = {2024},
+ url = {https://arxiv.org/abs/2407.02092},
+ archiveprefix = {arXiv},
+ eprint = {2407.02092},
+ primaryclass = {cond-mat.dis-nn}
+}
+
+@unpublished{Kent-Dobias_2024_Conditioning,
+ author = {Kent-Dobias, Jaron},
+ title = {Conditioning the complexity of random landscapes on marginal optima},
+ year = {2024},
+ url = {https://arxiv.org/abs/2407.02082},
+ archiveprefix = {arXiv},
+ eprint = {2407.02082},
+ primaryclass = {cond-mat.dis-nn}
+}
+
@unpublished{Montanari_2023_Solving,
author = {Montanari, Andrea and Subag, Eliran},
title = {Solving overparametrized systems of random equations: I. Model and algorithms for approximate solutions},
diff --git a/topology.tex b/topology.tex
index 032fca9..6e694a1 100644
--- a/topology.tex
+++ b/topology.tex
@@ -66,13 +66,14 @@ their resemblance to encryption, optimization, and vertex models of confluent
tissues \cite{Fyodorov_2019_A, Fyodorov_2020_Counting,
Fyodorov_2022_Optimization, Urbani_2023_A, Kamali_2023_Dynamical,
Kamali_2023_Stochastic, Urbani_2024_Statistical, Montanari_2023_Solving,
-Montanari_2024_On}. In each of these cases, the authors studied properties of
+Montanari_2024_On, Kent-Dobias_2024_Conditioning, Kent-Dobias_2024_Algorithm-independent}. In each of these cases, the authors studied properties of
the cost function
\begin{equation}
\mathcal C(\mathbf x)=\frac12\sum_{k=1}^MV_k(\mathbf x)^2
\end{equation}
which achieves zero cost only for configurations that satisfy all the
-constraints. Here we dispense with defining a cost function, and instead study
+constraints.
+Here we dispense with defining a cost function, and instead study
the set of solutions directly.
The set of solutions to our nonlinear random constraint satisfaction problem
@@ -89,6 +90,10 @@ unlikely, requiring $NM$ independent equations to be simultaneously satisfied.
This means that different connected components of the set of solutions do not
intersect, nor are there self-intersections, without extraordinary fine-tuning.
+One result of this previous work is the value of $\alpha$ for the
+satisfiability transition, which is $\alpha_\text{\textsc{sat}}=f'(1)/f(1)$.
+For $\alpha$ larger than $\alpha_\text{\textsc{sat}}$ solutions do not exist.
+
The Euler characteristic $\chi$ of a manifold is a topological invariant \cite{Hatcher_2002_Algebraic}. It is
perhaps most familiar in the context of connected compact orientable surfaces, where it
characterizes the number of handles in the surface: $\chi=2(1-\#)$ for $\#$
@@ -130,19 +135,47 @@ each of the $V_k$, resulting in the Lagrangian
+\sum_{k=1}^M\omega_kV_k(\mathbf x)
\end{equation}
The integral over $\Omega$ in \eqref{eq:kac-rice} then becomes
-\begin{equation}
+\begin{equation} \label{eq:kac-rice.lagrange}
\chi=\int_{\mathbb R^N} d\mathbf x\int_{\mathbb R^{M+1}}d\pmb\omega
\,\delta\big(\partial L(\mathbf x,\pmb\omega)\big)
\det\partial\partial L(\mathbf x,\pmb\omega)
\end{equation}
-where $\partial=[\frac\partial{\partial\mathbf x},\frac\partial{\partial\pmb\omega}]$ is the vector of partial derivatives
-with respect to all $N+M+1$ variables.
+where $\partial=[\frac\partial{\partial\mathbf x},\frac\partial{\partial\pmb\omega}]$
+is the vector of partial derivatives with respect to all $N+M+1$ variables.
+
+To evaluate the average of $\chi$ over the constraints, we first translate the $\delta$ functions and determinant to integral form, with
+\begin{align}
+ \delta\big(\partial L(\mathbf x,\pmb\omega)\big)
+ =\int\frac{d\hat{\mathbf x}}{(2\pi)^N}\frac{d\hat{\pmb\omega}}{(2\pi)^{M+1}}
+ e^{i[\hat{\mathbf x},\hat{\pmb\omega}]\cdot\partial L(\mathbf x,\pmb\omega)}
+ \\
+ \det\partial\partial L(\mathbf x,\pmb\omega)
+ =\int d\bar{\pmb\eta}\,d\pmb\eta\,d\bar{\pmb\gamma}\,d\pmb\gamma\,
+ e^{-[\bar{\pmb\eta},\bar{\pmb\gamma}]^T\partial\partial H[\pmb\eta,\pmb\gamma]}
+\end{align}
+for real variables $\hat{\mathbf x}$ and $\hat{\pmb\omega}$, and Grassmann
+variables $\bar{\pmb\eta}$, $\pmb\eta$, $\bar{\pmb\gamma}$, and $\pmb\gamma$.
+With these transformations in place, there is a compact way to express $\chi$
+using superspace notation. For a review of the superspace formalism for
+evaluating integrals of the form \eqref{eq:kac-rice.lagrange}, see Appendices A
+\& B of \cite{Kent-Dobias_2024_Conditioning}. Introducing the Grassmann indices
+$\bar\theta_1$ and $\theta_1$, we define superfields
+\begin{align}
+ \pmb\phi(1)
+ &=\mathbf x+\bar\theta_1\pmb\eta+\bar{\pmb\eta}\theta_1+\hat{\mathbf x}\bar\theta_1\theta_1
+ \label{eq:superfield.phi} \\
+ \pmb\sigma(1)
+ &=\pmb\omega+\bar\theta_1\pmb\gamma+\bar{\pmb\gamma}\theta_1+\hat{\pmb\omega}\bar\theta_1\theta_1
+ \label{eq:superfield.sigma}
+\end{align}
+with which we can represent $\chi$ by
\begin{equation}
\chi=\int d\pmb\phi\,d\pmb\sigma\,\exp\left\{
\int d1\,L\big(\pmb\phi(1),\pmb\sigma(1)\big)
\right\}
\end{equation}
-Using standard manipulations, we find
+We are now in a position to average over the distribution of constraints. Using
+standard manipulations, we find the average Euler characteristic is
\begin{equation}
\begin{aligned}
\overline{\chi}&=\int d\pmb\phi\,d\sigma_0\,\exp\Bigg\{
@@ -165,18 +198,32 @@ With this choice made, we can integrate over the superfields $\pmb\phi$.
Defining two order parameters $\mathbb Q(1,2)=\frac1N\phi(1)\cdot\phi(2)$ and
$\mathbb M(1)=\frac1N\phi(1)\cdot\mathbf x_0$, the result is
\begin{align}
- \overline{\chi}&=\int d\mathbb Q\,d\mathbb M\,d\sigma_0\,\exp\Bigg\{
+ \overline{\chi}
+ &=\int d\mathbb Q\,d\mathbb M\,d\sigma_0 \notag\\
+ &\quad\times\exp\Bigg\{
\frac N2\log\operatorname{sdet}(\mathbb Q-\mathbb M\mathbb M^T)
-\frac M2\log\operatorname{sdet}f(\mathbb Q) \notag \\
- &\quad+N\int d1\,\left[
+ &\qquad+N\int d1\,\left[
\mathbb M(1)+\frac12\sigma_0(1)\big(\mathbb Q(1,1)-1\big)
\right]
\Bigg\}
\end{align}
-
+This expression is an integral of an exponential with a leading factor of $N$
+over several order parameters, and is therefore in a convenient position for
+evaluating at large $N$ with a saddle point. The order parameter $\mathbb Q$ is
+made up of scalar products of the original integration variables in our
+problem in \eqref{eq:superfield.phi}, while $\mathbb M$ contains their scalar
+project with $\mathbf x_0$, and $\pmb\sigma_0$ contains $\omega_0$ and
+$\hat\omega_0$. We can solve the saddle point equations in all of these
+parameters save for $m=\frac1N\mathbf x_0\cdot\mathbf x$, the overlap with the
+height axis. The result reduces the average Euler characteristic to
\begin{equation}
+ \bar\chi\propto\int dm\,e^{N\mathcal S_\mathrm a(m)}
+\end{equation}
+where the annealed action $\mathcal S_a$ is given by
+\begin{equation} \label{eq:ann.action}
\begin{aligned}
- &\frac1N\log\overline\chi
+ &\mathcal S_\mathrm a(m)
=\frac12\Bigg[
\log\left(
\frac{\frac{f'(1)}{f(1)}(1-m^2)-1}{\alpha-1}
@@ -189,25 +236,100 @@ $\mathbb M(1)=\frac1N\phi(1)\cdot\mathbf x_0$, the result is
\Bigg]
\end{aligned}
\end{equation}
+and must be evaluated at a maximum with respect to $m$. This function is
+plotted for a specific covariance function $f$ in Fig.~\ref{fig:action}, where
+several distinct regimes can be seen.
+
+\begin{figure}
+ \includegraphics{figs/action.pdf}
+
+ \caption{
+ The annealed action $\mathcal S_\mathrm a$ of \eqref{eq:ann.action} plotted
+ as a function of $m$ at several values of $\alpha$. Here, the covariance
+ function is $f(q)=\frac12q^2$ and $\alpha_\text{\textsc{sat}}=2$. When
+ $\alpha<1$, the action is maximized for $m^2>0$ and its value is zero. When
+ $1\leq\alpha<\alpha_\text{\textsc{sat}}$, the action is maximized at
+ $m=0$ and is positive. When $\alpha>\alpha_\text{\textsc{sat}}$ there is no
+ maximum.
+ } \label{fig:action}
+\end{figure}
+
+First, when $\alpha<1$ the action $\mathcal S_\mathrm a$ is strictly negative
+and has maxima at some $m^2>0$. At these maxima, $\mathcal S_\mathrm a(m)=0$.
+When $\alpha>1$, the action flips over and becomes strictly positive. In the
+regime $1<\alpha<\alpha_\text{\textsc{sat}}$, there is a single maximum at
+$m=0$ where the action is positive. When $\alpha\geq\alpha_\text{\textsc{sat}}$
+the maximum in the action vanishes.
+
+This results in distinctive regimes for $\overline\chi$, with an example plotted in Fig.~\ref{fig:characteristic}. If $m^*$ is the maximum of $\mathcal S_\mathrm a$, then
+\begin{equation}
+ \frac1N\log\overline\chi=\mathcal S_\mathrm a(m^*)
+\end{equation}
+When $\alpha<1$, the action evaluates to zero, and therefore $\overline\chi$ is
+positive and subexponential in $N$. When $1<\alpha<\alpha_\text{\textsc{sat}}$, the action
+is positive, and $\overline\chi$ is exponentially large in $N$. Finally, when
+$\alpha\geq\alpha_\text{\textsc{sat}}$ the action and $\overline\chi$ are ill-defined.
+
+\begin{figure}
+ \includegraphics{figs/characteristic.pdf}
+
+ \caption{
+ The logarithm of the average Euler characteristic $\overline\chi$ as a
+ function of $\alpha$. The covariance function is $f(q)=\frac12q^2$ and
+ $\alpha_\text{\textsc{sat}}=2$.
+ } \label{fig:characteristic}
+\end{figure}
+
+We can interpret this by reasoning about topology of $\Omega$ consistent with
+these results. Cartoons that depict this reasoning are shown in
+Fig.~\ref{fig:cartoons}. In the regime $\alpha<1$, $\overline\chi$ is positive but not
+very large. This is consistent with a solution manifold made up of few large
+components, each with the topology of a hypersphere. The saddle point value
+$m^*$ for the overlap with the height axis $\mathbf x_0$ corresponds to the
+latitude at which most stationary points that contribute to the Euler
+characteristic are found. This means we can interpret $1-m^*$ as the typical
+squared distance between a randomly selected point on the sphere and the
+solution manifold.
\begin{figure}
- \includegraphics[width=0.49\columnwidth]{figs/connected.pdf}
+ \includegraphics[width=0.32\columnwidth]{figs/connected.pdf}
+ \hfill
+ \includegraphics[width=0.32\columnwidth]{figs/shattered.pdf}
\hfill
- \includegraphics[width=0.49\columnwidth]{figs/shattered.pdf}
+ \includegraphics[width=0.32\columnwidth]{figs/gone.pdf}
+
+ \includegraphics{figs/bar.pdf}
\caption{
Cartoon of the topology of the CCSP solution manifold implied by our
calculation. The arrow shows the vector $\mathbf x_0$ defining the height
function. The region of solutions is shaded orange, and the critical points
- of the height function restricted to this region are marked with a red
- point. For $\alpha<1$, there are few simply connected regions with most of
- the minima and maxima contributing to the Euler characteristic concentrated
- at the height $m_\mathrm a^*$. For $\alpha\geq1$, there are many simply
+ of the height function restricted to this region are marked with a point.
+ For $\alpha<1$, there are few simply connected regions with most of the
+ minima and maxima contributing to the Euler characteristic concentrated at
+ the height $m^*$. For $\alpha\geq1$, there are many simply
connected regions and most of their minima and maxima are concentrated at
the equator.
- }
+ } \label{fig:cartoons}
\end{figure}
+When $1<\alpha<\alpha_\text{\textsc{sat}}$, $\overline\chi$ is positive and
+very large. This is consistent with a solution manifold made up of
+exponentially many disconnected components, each with the topology of a
+hypersphere. If this interpretation is correct, our calculation effectively
+counts these components. This is a realization of a shattering transition in
+the solution manifold. Here $m^*$ is zero because for any choice of height
+axis, the vast majority of stationary points that contribute to the Euler
+characteristic are found near the equator. Finally, for
+$\alpha\geq\alpha_\text{\textsc{sat}}$, there are no longer solutions that
+satisfy the constraints. The Euler characteristic is not defined for an empty
+set, and in this regime the calculation yields no solution.
+
+In the regime where $\log\overline\chi$ is positive, it is possible that our
+calculation yields a value which is not characteristic of typical sets of
+constraints. This motivates computing $\overline{\log\chi}$, the average of
+the logarithm, which should produce something characteristic of typical
+samples, the so-called quenched calculation.
\begin{equation}
D=\beta R
@@ -230,7 +352,40 @@ $\mathbb M(1)=\frac1N\phi(1)\cdot\mathbf x_0$, the result is
\section{Euler characteristic of the spherical spin glasses}
We can compare this calculation with what we expect to find for the manifold
-defined by $V(\mathbf x)=E$ for a single function $V$. This corresponds to the
-energy level set of a spherical spin glass.
+defined by $V(\mathbf x)=E$ for a single function $V$, with a rescaled covariance $\overline{V(\mathbf x)V(\mathbf x')}=Nf(\mathbf x\cdot\mathbf x'/N)$. This corresponds to the
+energy level set of a spherical spin glass. Now the Lagrangian is
+\begin{equation}
+ L(\mathbf x,\omega_0,\omega_1)=
+ H(\mathbf x)+\frac12\omega_0\big(\|\mathbf x\|^2-N\big)
+ +\omega_1\big(V(\mathbf x)-NE\big)
+\end{equation}
+The derivation follows almost identically as before, but we do not integrate
+out $\sigma_1$. We have
+\begin{align}
+ \overline{\chi}&=\int d\mathbb Q\,d\mathbb M\,d\sigma_0\,d\sigma_1\,\exp\Bigg\{
+ \frac N2\log\operatorname{sdet}(\mathbb Q-\mathbb M\mathbb M^T)
+ \notag \\
+ &\quad+N\int d1\,\bigg[
+ \mathbb M(1)+\frac12\sigma_0(1)\big(\mathbb Q(1,1)-1\big) \notag \\
+ &-E\sigma_1(1)
+ +\frac 12\int d2\,\sigma_1(1)\sigma_1(2)f\big(\mathbb Q(1,2)\big)
+ \bigg]
+ \Bigg\}
+\end{align}
+The saddle point condition for $\sigma_1$ gives
+\begin{equation}
+ \sigma_1(1)=E\int d2\,f(\mathbb Q)^{-1}(1,2)
+\end{equation}
+which then yields
+\begin{align}
+ \overline{\chi}&=\int d\mathbb Q\,d\mathbb M\,d\sigma_0\,\exp\Bigg\{
+ \frac N2\log\operatorname{sdet}(\mathbb Q-\mathbb M\mathbb M^T)
+ \notag \\
+ &\quad+N\int d1\,\bigg[
+ \mathbb M(1)+\frac12\sigma_0(1)\big(\mathbb Q(1,1)-1\big) \notag \\
+ &-\frac12E^2\int d2\,f(\mathbb Q)^{-1}(1,2)
+ \bigg]
+ \Bigg\}
+\end{align}
\end{document}