diff options
-rw-r--r-- | figs/action.pdf | bin | 0 -> 22726 bytes | |||
-rw-r--r-- | figs/bar.pdf | bin | 0 -> 4712 bytes | |||
-rw-r--r-- | figs/characteristic.pdf | bin | 0 -> 10115 bytes | |||
-rw-r--r-- | figs/connected.pdf | bin | 135349 -> 168514 bytes | |||
-rw-r--r-- | figs/gone.pdf | bin | 0 -> 155551 bytes | |||
-rw-r--r-- | figs/shattered.pdf | bin | 164526 -> 172535 bytes | |||
-rw-r--r-- | topology.bib | 20 | ||||
-rw-r--r-- | topology.tex | 193 |
8 files changed, 194 insertions, 19 deletions
diff --git a/figs/action.pdf b/figs/action.pdf Binary files differnew file mode 100644 index 0000000..8764cf7 --- /dev/null +++ b/figs/action.pdf diff --git a/figs/bar.pdf b/figs/bar.pdf Binary files differnew file mode 100644 index 0000000..ba12588 --- /dev/null +++ b/figs/bar.pdf diff --git a/figs/characteristic.pdf b/figs/characteristic.pdf Binary files differnew file mode 100644 index 0000000..8f03c20 --- /dev/null +++ b/figs/characteristic.pdf diff --git a/figs/connected.pdf b/figs/connected.pdf Binary files differindex 8fccdaf..8bc0dc6 100644 --- a/figs/connected.pdf +++ b/figs/connected.pdf diff --git a/figs/gone.pdf b/figs/gone.pdf Binary files differnew file mode 100644 index 0000000..fdcf8cc --- /dev/null +++ b/figs/gone.pdf diff --git a/figs/shattered.pdf b/figs/shattered.pdf Binary files differindex d3d2957..401cbbf 100644 --- a/figs/shattered.pdf +++ b/figs/shattered.pdf diff --git a/topology.bib b/topology.bib index 8165e79..31d8bad 100644 --- a/topology.bib +++ b/topology.bib @@ -104,6 +104,26 @@ eprinttype = {arxiv} } +@unpublished{Kent-Dobias_2024_Algorithm-independent, + author = {Kent-Dobias, Jaron}, + title = {Algorithm-independent bounds on complex optimization through the statistics of marginal optima}, + year = {2024}, + url = {https://arxiv.org/abs/2407.02092}, + archiveprefix = {arXiv}, + eprint = {2407.02092}, + primaryclass = {cond-mat.dis-nn} +} + +@unpublished{Kent-Dobias_2024_Conditioning, + author = {Kent-Dobias, Jaron}, + title = {Conditioning the complexity of random landscapes on marginal optima}, + year = {2024}, + url = {https://arxiv.org/abs/2407.02082}, + archiveprefix = {arXiv}, + eprint = {2407.02082}, + primaryclass = {cond-mat.dis-nn} +} + @unpublished{Montanari_2023_Solving, author = {Montanari, Andrea and Subag, Eliran}, title = {Solving overparametrized systems of random equations: I. Model and algorithms for approximate solutions}, diff --git a/topology.tex b/topology.tex index 032fca9..6e694a1 100644 --- a/topology.tex +++ b/topology.tex @@ -66,13 +66,14 @@ their resemblance to encryption, optimization, and vertex models of confluent tissues \cite{Fyodorov_2019_A, Fyodorov_2020_Counting, Fyodorov_2022_Optimization, Urbani_2023_A, Kamali_2023_Dynamical, Kamali_2023_Stochastic, Urbani_2024_Statistical, Montanari_2023_Solving, -Montanari_2024_On}. In each of these cases, the authors studied properties of +Montanari_2024_On, Kent-Dobias_2024_Conditioning, Kent-Dobias_2024_Algorithm-independent}. In each of these cases, the authors studied properties of the cost function \begin{equation} \mathcal C(\mathbf x)=\frac12\sum_{k=1}^MV_k(\mathbf x)^2 \end{equation} which achieves zero cost only for configurations that satisfy all the -constraints. Here we dispense with defining a cost function, and instead study +constraints. +Here we dispense with defining a cost function, and instead study the set of solutions directly. The set of solutions to our nonlinear random constraint satisfaction problem @@ -89,6 +90,10 @@ unlikely, requiring $NM$ independent equations to be simultaneously satisfied. This means that different connected components of the set of solutions do not intersect, nor are there self-intersections, without extraordinary fine-tuning. +One result of this previous work is the value of $\alpha$ for the +satisfiability transition, which is $\alpha_\text{\textsc{sat}}=f'(1)/f(1)$. +For $\alpha$ larger than $\alpha_\text{\textsc{sat}}$ solutions do not exist. + The Euler characteristic $\chi$ of a manifold is a topological invariant \cite{Hatcher_2002_Algebraic}. It is perhaps most familiar in the context of connected compact orientable surfaces, where it characterizes the number of handles in the surface: $\chi=2(1-\#)$ for $\#$ @@ -130,19 +135,47 @@ each of the $V_k$, resulting in the Lagrangian +\sum_{k=1}^M\omega_kV_k(\mathbf x) \end{equation} The integral over $\Omega$ in \eqref{eq:kac-rice} then becomes -\begin{equation} +\begin{equation} \label{eq:kac-rice.lagrange} \chi=\int_{\mathbb R^N} d\mathbf x\int_{\mathbb R^{M+1}}d\pmb\omega \,\delta\big(\partial L(\mathbf x,\pmb\omega)\big) \det\partial\partial L(\mathbf x,\pmb\omega) \end{equation} -where $\partial=[\frac\partial{\partial\mathbf x},\frac\partial{\partial\pmb\omega}]$ is the vector of partial derivatives -with respect to all $N+M+1$ variables. +where $\partial=[\frac\partial{\partial\mathbf x},\frac\partial{\partial\pmb\omega}]$ +is the vector of partial derivatives with respect to all $N+M+1$ variables. + +To evaluate the average of $\chi$ over the constraints, we first translate the $\delta$ functions and determinant to integral form, with +\begin{align} + \delta\big(\partial L(\mathbf x,\pmb\omega)\big) + =\int\frac{d\hat{\mathbf x}}{(2\pi)^N}\frac{d\hat{\pmb\omega}}{(2\pi)^{M+1}} + e^{i[\hat{\mathbf x},\hat{\pmb\omega}]\cdot\partial L(\mathbf x,\pmb\omega)} + \\ + \det\partial\partial L(\mathbf x,\pmb\omega) + =\int d\bar{\pmb\eta}\,d\pmb\eta\,d\bar{\pmb\gamma}\,d\pmb\gamma\, + e^{-[\bar{\pmb\eta},\bar{\pmb\gamma}]^T\partial\partial H[\pmb\eta,\pmb\gamma]} +\end{align} +for real variables $\hat{\mathbf x}$ and $\hat{\pmb\omega}$, and Grassmann +variables $\bar{\pmb\eta}$, $\pmb\eta$, $\bar{\pmb\gamma}$, and $\pmb\gamma$. +With these transformations in place, there is a compact way to express $\chi$ +using superspace notation. For a review of the superspace formalism for +evaluating integrals of the form \eqref{eq:kac-rice.lagrange}, see Appendices A +\& B of \cite{Kent-Dobias_2024_Conditioning}. Introducing the Grassmann indices +$\bar\theta_1$ and $\theta_1$, we define superfields +\begin{align} + \pmb\phi(1) + &=\mathbf x+\bar\theta_1\pmb\eta+\bar{\pmb\eta}\theta_1+\hat{\mathbf x}\bar\theta_1\theta_1 + \label{eq:superfield.phi} \\ + \pmb\sigma(1) + &=\pmb\omega+\bar\theta_1\pmb\gamma+\bar{\pmb\gamma}\theta_1+\hat{\pmb\omega}\bar\theta_1\theta_1 + \label{eq:superfield.sigma} +\end{align} +with which we can represent $\chi$ by \begin{equation} \chi=\int d\pmb\phi\,d\pmb\sigma\,\exp\left\{ \int d1\,L\big(\pmb\phi(1),\pmb\sigma(1)\big) \right\} \end{equation} -Using standard manipulations, we find +We are now in a position to average over the distribution of constraints. Using +standard manipulations, we find the average Euler characteristic is \begin{equation} \begin{aligned} \overline{\chi}&=\int d\pmb\phi\,d\sigma_0\,\exp\Bigg\{ @@ -165,18 +198,32 @@ With this choice made, we can integrate over the superfields $\pmb\phi$. Defining two order parameters $\mathbb Q(1,2)=\frac1N\phi(1)\cdot\phi(2)$ and $\mathbb M(1)=\frac1N\phi(1)\cdot\mathbf x_0$, the result is \begin{align} - \overline{\chi}&=\int d\mathbb Q\,d\mathbb M\,d\sigma_0\,\exp\Bigg\{ + \overline{\chi} + &=\int d\mathbb Q\,d\mathbb M\,d\sigma_0 \notag\\ + &\quad\times\exp\Bigg\{ \frac N2\log\operatorname{sdet}(\mathbb Q-\mathbb M\mathbb M^T) -\frac M2\log\operatorname{sdet}f(\mathbb Q) \notag \\ - &\quad+N\int d1\,\left[ + &\qquad+N\int d1\,\left[ \mathbb M(1)+\frac12\sigma_0(1)\big(\mathbb Q(1,1)-1\big) \right] \Bigg\} \end{align} - +This expression is an integral of an exponential with a leading factor of $N$ +over several order parameters, and is therefore in a convenient position for +evaluating at large $N$ with a saddle point. The order parameter $\mathbb Q$ is +made up of scalar products of the original integration variables in our +problem in \eqref{eq:superfield.phi}, while $\mathbb M$ contains their scalar +project with $\mathbf x_0$, and $\pmb\sigma_0$ contains $\omega_0$ and +$\hat\omega_0$. We can solve the saddle point equations in all of these +parameters save for $m=\frac1N\mathbf x_0\cdot\mathbf x$, the overlap with the +height axis. The result reduces the average Euler characteristic to \begin{equation} + \bar\chi\propto\int dm\,e^{N\mathcal S_\mathrm a(m)} +\end{equation} +where the annealed action $\mathcal S_a$ is given by +\begin{equation} \label{eq:ann.action} \begin{aligned} - &\frac1N\log\overline\chi + &\mathcal S_\mathrm a(m) =\frac12\Bigg[ \log\left( \frac{\frac{f'(1)}{f(1)}(1-m^2)-1}{\alpha-1} @@ -189,25 +236,100 @@ $\mathbb M(1)=\frac1N\phi(1)\cdot\mathbf x_0$, the result is \Bigg] \end{aligned} \end{equation} +and must be evaluated at a maximum with respect to $m$. This function is +plotted for a specific covariance function $f$ in Fig.~\ref{fig:action}, where +several distinct regimes can be seen. + +\begin{figure} + \includegraphics{figs/action.pdf} + + \caption{ + The annealed action $\mathcal S_\mathrm a$ of \eqref{eq:ann.action} plotted + as a function of $m$ at several values of $\alpha$. Here, the covariance + function is $f(q)=\frac12q^2$ and $\alpha_\text{\textsc{sat}}=2$. When + $\alpha<1$, the action is maximized for $m^2>0$ and its value is zero. When + $1\leq\alpha<\alpha_\text{\textsc{sat}}$, the action is maximized at + $m=0$ and is positive. When $\alpha>\alpha_\text{\textsc{sat}}$ there is no + maximum. + } \label{fig:action} +\end{figure} + +First, when $\alpha<1$ the action $\mathcal S_\mathrm a$ is strictly negative +and has maxima at some $m^2>0$. At these maxima, $\mathcal S_\mathrm a(m)=0$. +When $\alpha>1$, the action flips over and becomes strictly positive. In the +regime $1<\alpha<\alpha_\text{\textsc{sat}}$, there is a single maximum at +$m=0$ where the action is positive. When $\alpha\geq\alpha_\text{\textsc{sat}}$ +the maximum in the action vanishes. + +This results in distinctive regimes for $\overline\chi$, with an example plotted in Fig.~\ref{fig:characteristic}. If $m^*$ is the maximum of $\mathcal S_\mathrm a$, then +\begin{equation} + \frac1N\log\overline\chi=\mathcal S_\mathrm a(m^*) +\end{equation} +When $\alpha<1$, the action evaluates to zero, and therefore $\overline\chi$ is +positive and subexponential in $N$. When $1<\alpha<\alpha_\text{\textsc{sat}}$, the action +is positive, and $\overline\chi$ is exponentially large in $N$. Finally, when +$\alpha\geq\alpha_\text{\textsc{sat}}$ the action and $\overline\chi$ are ill-defined. + +\begin{figure} + \includegraphics{figs/characteristic.pdf} + + \caption{ + The logarithm of the average Euler characteristic $\overline\chi$ as a + function of $\alpha$. The covariance function is $f(q)=\frac12q^2$ and + $\alpha_\text{\textsc{sat}}=2$. + } \label{fig:characteristic} +\end{figure} + +We can interpret this by reasoning about topology of $\Omega$ consistent with +these results. Cartoons that depict this reasoning are shown in +Fig.~\ref{fig:cartoons}. In the regime $\alpha<1$, $\overline\chi$ is positive but not +very large. This is consistent with a solution manifold made up of few large +components, each with the topology of a hypersphere. The saddle point value +$m^*$ for the overlap with the height axis $\mathbf x_0$ corresponds to the +latitude at which most stationary points that contribute to the Euler +characteristic are found. This means we can interpret $1-m^*$ as the typical +squared distance between a randomly selected point on the sphere and the +solution manifold. \begin{figure} - \includegraphics[width=0.49\columnwidth]{figs/connected.pdf} + \includegraphics[width=0.32\columnwidth]{figs/connected.pdf} + \hfill + \includegraphics[width=0.32\columnwidth]{figs/shattered.pdf} \hfill - \includegraphics[width=0.49\columnwidth]{figs/shattered.pdf} + \includegraphics[width=0.32\columnwidth]{figs/gone.pdf} + + \includegraphics{figs/bar.pdf} \caption{ Cartoon of the topology of the CCSP solution manifold implied by our calculation. The arrow shows the vector $\mathbf x_0$ defining the height function. The region of solutions is shaded orange, and the critical points - of the height function restricted to this region are marked with a red - point. For $\alpha<1$, there are few simply connected regions with most of - the minima and maxima contributing to the Euler characteristic concentrated - at the height $m_\mathrm a^*$. For $\alpha\geq1$, there are many simply + of the height function restricted to this region are marked with a point. + For $\alpha<1$, there are few simply connected regions with most of the + minima and maxima contributing to the Euler characteristic concentrated at + the height $m^*$. For $\alpha\geq1$, there are many simply connected regions and most of their minima and maxima are concentrated at the equator. - } + } \label{fig:cartoons} \end{figure} +When $1<\alpha<\alpha_\text{\textsc{sat}}$, $\overline\chi$ is positive and +very large. This is consistent with a solution manifold made up of +exponentially many disconnected components, each with the topology of a +hypersphere. If this interpretation is correct, our calculation effectively +counts these components. This is a realization of a shattering transition in +the solution manifold. Here $m^*$ is zero because for any choice of height +axis, the vast majority of stationary points that contribute to the Euler +characteristic are found near the equator. Finally, for +$\alpha\geq\alpha_\text{\textsc{sat}}$, there are no longer solutions that +satisfy the constraints. The Euler characteristic is not defined for an empty +set, and in this regime the calculation yields no solution. + +In the regime where $\log\overline\chi$ is positive, it is possible that our +calculation yields a value which is not characteristic of typical sets of +constraints. This motivates computing $\overline{\log\chi}$, the average of +the logarithm, which should produce something characteristic of typical +samples, the so-called quenched calculation. \begin{equation} D=\beta R @@ -230,7 +352,40 @@ $\mathbb M(1)=\frac1N\phi(1)\cdot\mathbf x_0$, the result is \section{Euler characteristic of the spherical spin glasses} We can compare this calculation with what we expect to find for the manifold -defined by $V(\mathbf x)=E$ for a single function $V$. This corresponds to the -energy level set of a spherical spin glass. +defined by $V(\mathbf x)=E$ for a single function $V$, with a rescaled covariance $\overline{V(\mathbf x)V(\mathbf x')}=Nf(\mathbf x\cdot\mathbf x'/N)$. This corresponds to the +energy level set of a spherical spin glass. Now the Lagrangian is +\begin{equation} + L(\mathbf x,\omega_0,\omega_1)= + H(\mathbf x)+\frac12\omega_0\big(\|\mathbf x\|^2-N\big) + +\omega_1\big(V(\mathbf x)-NE\big) +\end{equation} +The derivation follows almost identically as before, but we do not integrate +out $\sigma_1$. We have +\begin{align} + \overline{\chi}&=\int d\mathbb Q\,d\mathbb M\,d\sigma_0\,d\sigma_1\,\exp\Bigg\{ + \frac N2\log\operatorname{sdet}(\mathbb Q-\mathbb M\mathbb M^T) + \notag \\ + &\quad+N\int d1\,\bigg[ + \mathbb M(1)+\frac12\sigma_0(1)\big(\mathbb Q(1,1)-1\big) \notag \\ + &-E\sigma_1(1) + +\frac 12\int d2\,\sigma_1(1)\sigma_1(2)f\big(\mathbb Q(1,2)\big) + \bigg] + \Bigg\} +\end{align} +The saddle point condition for $\sigma_1$ gives +\begin{equation} + \sigma_1(1)=E\int d2\,f(\mathbb Q)^{-1}(1,2) +\end{equation} +which then yields +\begin{align} + \overline{\chi}&=\int d\mathbb Q\,d\mathbb M\,d\sigma_0\,\exp\Bigg\{ + \frac N2\log\operatorname{sdet}(\mathbb Q-\mathbb M\mathbb M^T) + \notag \\ + &\quad+N\int d1\,\bigg[ + \mathbb M(1)+\frac12\sigma_0(1)\big(\mathbb Q(1,1)-1\big) \notag \\ + &-\frac12E^2\int d2\,f(\mathbb Q)^{-1}(1,2) + \bigg] + \Bigg\} +\end{align} \end{document} |