Some writing.

author: Jaron Kent-Dobias <jaron@kent-dobias.com> 2024-06-14 14:53:17 +0200
committer: Jaron Kent-Dobias <jaron@kent-dobias.com> 2024-06-14 14:53:17 +0200
commit: cfae19ddfb5ebf9c8e6bde6aebb9fb1922b1bf2d (patch)
tree: 648b2c0390c4f59d5314c5f4699b17bc6878e05e
parent: 7db11201112bcc8de0ea11aec010ff6cfdadb55f (diff)
download: marginal-cfae19ddfb5ebf9c8e6bde6aebb9fb1922b1bf2d.tar.gz
marginal-cfae19ddfb5ebf9c8e6bde6aebb9fb1922b1bf2d.tar.bz2
marginal-cfae19ddfb5ebf9c8e6bde6aebb9fb1922b1bf2d.zip
1 files changed, 60 insertions, 27 deletions
diff --git a/marginal.tex b/marginal.tex
index 0013a4f..6451147 100644
--- a/marginal.tex
+++ b/marginal.tex
@@ -32,13 +32,14 @@
   attract algorithms and physical dynamics. Often, the important family of
   marginal attractors are a vanishing minority compared with nonmarginal optima
   and other unstable stationary points. We introduce a generic technique for
-  conditioning the statistics of stationary points on their marginality, and
-  apply it in three isotropic settings with qualitatively different structure:
-  in the spherical spin-glasses, where the Hessian is GOE; in a multispherical
-  spin glasses, which are Gaussian but non-GOE; and in a model of random
-  nonlinear sum of squares, which is non-Gaussian. In these problems we are
-  able to fully characterize the distribution of marginal optima in the
-  landscape, including when they are in the minority.
+  conditioning the statistics of stationary points in random landscapes on
+  their marginality, and apply it in three isotropic settings with
+  qualitatively different structure: in the spherical spin-glasses, where the
+  energy is Gaussian and its Hessian is GOE; in a multispherical spin glasses,
+  which are Gaussian but non-GOE; and in spherical random nonlinear sum of
+  squares, which is non-Gaussian. In these problems we are able to fully
+  characterize the distribution of marginal optima in the landscape, including
+  when they are in the minority.
 \end{abstract}
 
 \maketitle
@@ -57,8 +58,7 @@ stuck exploring only a subset of configurations.
 In some simple models of such landscapes, it was recently found that marginal
 minima are significant as the attractors of gradient descent dynamics
 \cite{Folena_2020_Rethinking, Folena_2023_On}. This extends to more novel
-algorithms, like message passing \cite{Add_me} \textbf{Find out if this is true}.
-\textbf{Think of other examples.}
+algorithms, like message passing \cite{}. 
 While it is still not known how to predict which marginal minima will be
 attractors, this ubiquity of behavior suggests that cartography of marginal
 minima is a useful step in bounding out-of-equilibrium dynamical behavior.
@@ -203,17 +203,25 @@ have
   \right]\right\}
   \end{aligned}
 \end{equation}
-We make the Hubbard--Stratonovich transformation to the matrix field $Q_{ab}=\frac1N\mathbf s_a^T\mathbf s_b$. This gives
+\end{widetext}
+We make the Hubbard--Stratonovich transformation to the matrix field
+$Q_{ab}=\frac1N\mathbf s_a^T\mathbf s_b$. This gives
 \begin{equation}
   e^{NG_{\lambda^*}(\mu)}
-  =\lim_{\beta\to\infty}\lim_{n\to0}\int d\hat\lambda\,dQ\,
-  \exp N\left\{
-    \hat\lambda(\lambda^*-\mu)-n\beta\mu+\sigma^2\left[\beta^2\sum_{ab}^nQ_{ab}^2
-      +2\beta\hat\lambda\sum_a^nQ_{1a}^2
-    +\hat\lambda^2
-  \right]+\frac12\log\det Q\right\}
+  =\lim_{\beta\to\infty}\lim_{m\to0}\int d\hat\lambda\,dQ\,
+  e^{N\mathcal U_\mathrm{GOE}(\hat\lambda,Q\mid\mu,\lambda^*,\beta)}
+\end{equation}
+where the effective action is given by
+\begin{equation}
+  \begin{aligned}
+    &\mathcal U_\textrm{GOE}(\hat\lambda, Q\mid\lambda^*,\mu,\beta)
+    =\hat\lambda(\lambda^*-\mu)-m\beta\mu \\
+    &+\sigma^2\left[\beta^2\sum_{ab}^mQ_{ab}^2
+        +2\beta\hat\lambda\sum_a^mQ_{1a}^2
+      +\hat\lambda^2
+    \right]+\frac12\log\det Q
+  \end{aligned}
 \end{equation}
-\end{widetext}
 where $Q_{aa}=1$ because of the spherical constraint. We can evaluate this
 integral using the saddle point method. We make a replica symmetric ansatz for
 $Q$, because this is a 2-spin model, but with the first row singled out because
@@ -227,21 +235,21 @@ of its unique coupling with $\hat\lambda$. This gives
     \tilde q_0&q_0&q_0&\cdots&1
   \end{bmatrix}
 \end{equation}
-with $\sum_{ab}Q_{ab}^2=n+2(n-1)\tilde q_0^2+(n-1)(n-2)q_0^2$, $\sum_aQ_{1a}^2=1+(n-1)\tilde q_0^2$,
+with $\sum_{ab}Q_{ab}^2=m+2(m-1)\tilde q_0^2+(m-1)(m-2)q_0^2$, $\sum_aQ_{1a}^2=1+(m-1)\tilde q_0^2$,
 and
 \begin{equation}
-  \log\det Q=(n-2)\log(1-q_0)+\log(1+(n-2)q_0-(n-1)\tilde q_0^2)
+  \log\det Q=(m-2)\log(1-q_0)+\log(1+(m-2)q_0-(m-1)\tilde q_0^2)
 \end{equation}
-Inserting these expressions and taking the limit of $n$ to zero, we find
+Inserting these expressions and taking the limit of $m$ to zero, we find
 \begin{equation}
   e^{NG_{\lambda^*}(\mu)}
   =\lim_{\beta\to\infty}\int d\hat\lambda\,dq_0\,d\tilde q_0\,
-  e^{N\mathcal U_\textrm{GOE}(q_0,\tilde q_0,\hat\lambda\mid\beta)}
+  e^{N\mathcal U_\textrm{GOE}(\hat\lambda,q_0,\tilde q_0\mid\mu,\lambda^*,\beta)}
 \end{equation}
 with the effective action
 \begin{equation}
   \begin{aligned}
-    &\mathcal U_\mathrm{GOE}(q_0,\tilde q_0,\hat\lambda\mid\beta) \\
+    &\mathcal U_\mathrm{GOE}(\hat\lambda,q_0,\tilde q_0\mid\mu,\lambda^*,\beta) \\
     &\quad=\hat\lambda(\lambda^*-\mu)+\sigma^2\left[
       2\beta^2(q_0^2-\tilde q_0^2)+2\beta\hat\lambda(1-\tilde q_0^2)+\hat\lambda^2
     \right] \\
@@ -261,7 +269,7 @@ However, taking the limit with $y\neq\tilde y$ results in an expression for the
 action that diverges with $\beta$. To cure this, we must take $\tilde y=y$. The result is
 \begin{equation}
   \begin{aligned}
-    \mathcal U_\textrm{GOE}(y,\Delta z,\hat\lambda\mid\infty)
+    \mathcal U_\textrm{GOE}(\hat\lambda,y,\Delta z\mid\mu,\lambda^*,\infty)
     &=\hat\lambda(\lambda^*-\mu)
     +\sigma^2\big[
       \hat\lambda^2-4(y+\Delta z)
@@ -282,7 +290,7 @@ Inserting this solution into $\mathcal S_\infty$ we find
   \begin{aligned}
     &G_{\lambda^*}(\mu)
     =\mathop{\textrm{extremum}}_{y,\Delta z,\hat\lambda}
-    \mathcal U_\mathrm{GOE}(y,\Delta z,\hat\lambda\mid\infty) \\
+    \mathcal U_\mathrm{GOE}(y,\Delta z,\hat\lambda\mid\mu,\lambda^*,\infty) \\
     &=-\tfrac{\mu+\lambda^*}{2\sigma}\sqrt{\Big(\tfrac{\mu+\lambda^*}{2\sigma}\Big)^2-1}
     +\log\left(
       \tfrac{\mu+\lambda^*}{2\sigma}+\sqrt{\Big(\tfrac{\mu+\lambda^*}{2\sigma}\Big)^2-1}
@@ -798,11 +806,13 @@ independent, and $Q$ to have the planted replica symmetric form of
 \eqref{eq:Q.structure}, we find that
 \begin{equation}
   \mathcal U_\mathrm{SSG}(\hat\lambda,Q,0,0\mid\beta,\lambda^*,\mu,C)
-  =\mathcal U_\mathrm{GOE}(\hat\lambda,q_0,\tilde q_0\mid\beta)
+  =\mathcal U_\mathrm{GOE}(\hat\lambda,Q\mid\mu,\lambda^*,\beta)
 \end{equation}
-with $\sigma=f''(1)$. That is, the effective action for the terms related to
+with $\sigma^2=f''(1)$. That is, the effective action for the terms related to
 fixing the eigenvalue in the spherical Kac--Rice problem is exactly the same as
-that for the \textrm{GOE} problem.
+that for the \textrm{GOE} problem. This is perhaps not so surprising, since we
+established from the beginning that the Hession of the spherical spin glasses
+belongs to the GOE class.
 
 \begin{equation}
   \Sigma_{\lambda^*}(E,\mu)
@@ -902,6 +912,19 @@ the spectrum to zero. In some ways the current method is more convenient than
 this, since it is a purely variational method and therefore can be reduced to a
 since root-finding exercise.
 
+Unlike the constraints on the configurations $\mathbf x$, the constraint on the
+tangent vectors $\mathbf s=[\mathbf s^{(1)},\mathbf s^{(2)}]\in\mathbb R^{2N}$
+remains the same spherical constraint as before, which implies $N=\|\mathbf
+s\|^2=\|\mathbf s^{(1)}\|^2+\|\mathbf s^{(2)}\|^2$. Defining intra- and inter-sphere overlap matrices
+\begin{equation}
+  Q^{ij,\alpha\gamma}_{ab}=\frac1N\mathbf s^{(i),\alpha}_a\cdot\mathbf s^{(j),\gamma}_b
+\end{equation}
+this problem no longer has the property that the diagonal of the $Q$s is one,
+but instead that $1=Q^{11,\alpha\alpha}_{aa}+Q^{22,\alpha\alpha}_{aa}$. This is
+the manifestation of the fact that a normalized vector in the tangent space of the
+multispherical model need not be equally spread on the two subspaces, but can
+be concentrated in one or the other.
+
 The calculation of the marginal complexity in this problem follows very closely
 to that of the spherical spin glasses in the previous subsection, making
 immediately the simplifying assumptions that the soft directions of different
@@ -1302,6 +1325,16 @@ taking the zero-temperature limit, we find
 \end{equation}
 \end{widetext}
 
+\begin{figure}
+  \includegraphics{figs/most_squares_complexity.pdf}
+  \caption{
+    Dominant and marginal complexity in the nonlinear sum of squares problem
+    for $\alpha=\frac32$ and $f(q)=q^2+q^3$. The ground state energy
+    $E_\mathrm{gs}$ and the threshold energy $E_\mathrm{th}$ are marked on the
+    plot.
+  }
+\end{figure}
+
 \section{Conclusion}
 
 \begin{acknowledgements}
author	Jaron Kent-Dobias <jaron@kent-dobias.com>	2024-06-14 14:53:17 +0200
committer	Jaron Kent-Dobias <jaron@kent-dobias.com>	2024-06-14 14:53:17 +0200
commit	cfae19ddfb5ebf9c8e6bde6aebb9fb1922b1bf2d (patch)
tree	648b2c0390c4f59d5314c5f4699b17bc6878e05e
parent	7db11201112bcc8de0ea11aec010ff6cfdadb55f (diff)
download	marginal-cfae19ddfb5ebf9c8e6bde6aebb9fb1922b1bf2d.tar.gz marginal-cfae19ddfb5ebf9c8e6bde6aebb9fb1922b1bf2d.tar.bz2 marginal-cfae19ddfb5ebf9c8e6bde6aebb9fb1922b1bf2d.zip