Work on the train.

author: Jaron Kent-Dobias <jaron@kent-dobias.com> 2024-05-21 16:33:51 +0200
committer: Jaron Kent-Dobias <jaron@kent-dobias.com> 2024-05-21 16:33:51 +0200
commit: c10694ea25be6a6593765ad82a92d994ae02e18b (patch)
tree: 6a12fb32735987af6e7809d8059dae1e8bd3b983
parent: 63eacd9d14ded6cc306c11e759398071bef3ee76 (diff)
download: marginal-c10694ea25be6a6593765ad82a92d994ae02e18b.tar.gz
marginal-c10694ea25be6a6593765ad82a92d994ae02e18b.tar.bz2
marginal-c10694ea25be6a6593765ad82a92d994ae02e18b.zip
2 files changed, 167 insertions, 113 deletions
diff --git a/marginal.bib b/marginal.bib
index b51659e..9f89910 100644
--- a/marginal.bib
+++ b/marginal.bib
@@ -27,6 +27,35 @@
  doi = {10.21468/scipostphys.15.3.109}
 }
 
+@article{Fyodorov_2020_Counting,
+ author = {Fyodorov, Y. V. and Tublin, R.},
+ title = {Counting Stationary Points of the Loss Function in the Simplest Constrained Least-square Optimization},
+ journal = {Acta Physica Polonica B},
+ publisher = {Jagiellonian University},
+ year = {2020},
+ number = {7},
+ volume = {51},
+ pages = {1663},
+ url = {http://dx.doi.org/10.5506/APhysPolB.51.1663},
+ doi = {10.5506/aphyspolb.51.1663},
+ issn = {1509-5770}
+}
+
+@article{Fyodorov_2022_Optimization,
+ author = {Fyodorov, Yan V and Tublin, Rashel},
+ title = {Optimization landscape in the simplest constrained random least-square problem},
+ journal = {Journal of Physics A: Mathematical and Theoretical},
+ publisher = {IOP Publishing},
+ year = {2022},
+ month = {May},
+ number = {24},
+ volume = {55},
+ pages = {244008},
+ url = {http://dx.doi.org/10.1088/1751-8121/ac6d8e},
+ doi = {10.1088/1751-8121/ac6d8e},
+ issn = {1751-8121}
+}
+
 @article{Ikeda_2023_Bose-Einstein-like,
  author = {Ikeda, Harukuni},
  title = {{Bose}--{Einstein}-like condensation of deformed random matrix: a replica approach},
@@ -70,35 +99,6 @@
  doi = {10.1103/physrevlett.75.2847}
 }
 
-@article{Fyodorov_2020_Counting,
- author = {Fyodorov, Y.V. and Tublin, R.},
- title = {Counting Stationary Points of the Loss Function in the Simplest Constrained Least-square Optimization},
- journal = {Acta Physica Polonica B},
- publisher = {Jagiellonian University},
- year = {2020},
- number = {7},
- volume = {51},
- pages = {1663},
- url = {http://dx.doi.org/10.5506/APhysPolB.51.1663},
- doi = {10.5506/aphyspolb.51.1663},
- issn = {1509-5770}
-}
-
-@article{Fyodorov_2022_Optimization,
- author = {Fyodorov, Yan V and Tublin, Rashel},
- title = {Optimization landscape in the simplest constrained random least-square problem},
- journal = {Journal of Physics A: Mathematical and Theoretical},
- publisher = {IOP Publishing},
- year = {2022},
- month = {May},
- number = {24},
- volume = {55},
- pages = {244008},
- url = {http://dx.doi.org/10.1088/1751-8121/ac6d8e},
- doi = {10.1088/1751-8121/ac6d8e},
- issn = {1751-8121}
-}
-
 @phdthesis{Tublin_2022_A,
  author = {Tublin, Rashel},
  title = {A Few Results in Random Matrix Theory and Random Optimization},
diff --git a/marginal.tex b/marginal.tex
index ec28568..5b73f9e 100644
--- a/marginal.tex
+++ b/marginal.tex
@@ -4,15 +4,8 @@
 \usepackage[T1]{fontenc}
 \usepackage{amsmath,amssymb,latexsym,graphicx}
 \usepackage{newtxtext,newtxmath}
-\usepackage{bbold}
+\usepackage{bbold,anyfontsize}
 \usepackage[dvipsnames]{xcolor}
-\usepackage[
-  colorlinks=true,
-  urlcolor=MidnightBlue,
-  citecolor=MidnightBlue,
-  filecolor=MidnightBlue,
-  linkcolor=MidnightBlue
-]{hyperref}
 
 \begin{document}
 
@@ -24,6 +17,18 @@
 \affiliation{Istituto Nazionale di Fisica Nucleare, Sezione di Roma I, Rome, Italy 00184}
 
 \begin{abstract}
+  Marginal optima are minima or maxima of a function with many asymptotically
+  flat directions. In settings with many competing optima, marginal ones tend
+  to attract algorithms and physical dynamics. Often, the important family of
+  marginal attractors are a vanishing minority compared with nonmarginal optima
+  and other unstable stationary points. We introduce a generic technique for
+  conditioning the statistics of stationary points on their marginality, and
+  apply it in three isotropic settings with different typical forms for the
+  Hessian at optima: in the spherical spin-glasses, where the Hessian is GOE;
+  in a multispherical spin glasses, which are Gaussian but non-GOE; and in a
+  model of random nonlinear sum of squares, which is non-Gaussian. In these
+  problems we are able to fully characterize the distribution of marginal
+  optima in the landscape, including when they are in the minority.
 \end{abstract}
 
 \maketitle
@@ -79,7 +84,7 @@ stationary points be zero, we restrict to marginal minima, either those with a
 pseudogap in their bulk spectrum or those with outlying eigenvectors. We
 provide a heuristic to distinguish these two cases. We demonstrate the method
 on the spherical models, where it is unnecessary but instructive, and on
-extensions of the spherical models with non-\textsc{goe} Hessians where the technique is
+extensions of the spherical models with non-GOE Hessians where the technique is
 more useful.
 
 \section{Conditioning on the smallest eigenvalue}
@@ -88,7 +93,7 @@ more useful.
 
 An arbitrary function $g$ of the minimum eigenvalue of a matrix $A$ can be
 expressed as
-\begin{equation}
+\begin{equation} \label{eq:λmax}
   g(\lambda_\textrm{max}(A))
   =\lim_{\beta\to\infty}\int
     \frac{d\mathbf s\,\delta(N-\mathbf s^T\mathbf s)e^{\beta\mathbf s^TA\mathbf s}}
@@ -128,8 +133,8 @@ minimum eigenvalue is zero.
 
 We demonstrate the efficacy of the technique by rederiving a well-known result:
 the large-deviation function for pulling an eigenvalue from the bulk of the
-\textsc{goe} spectrum.
-Consider an ensemble of $N\times N$ matrices $A=B+\mu I$ for $B$ drawn from the \textsc{goe} ensemble with entries
+GOE spectrum.
+Consider an ensemble of $N\times N$ matrices $A=B+\mu I$ for $B$ drawn from the GOE ensemble with entries
 whose variance is $\sigma^2/N$. We know that the bulk spectrum of $A$ is a
 Wigner semicircle with radius $2\sigma$ shifted by a constant $\mu$.
 Therefore, for $\mu=2\sigma$, the minimum eigenvalue will typically be zero,
@@ -141,37 +146,51 @@ eigenvalues, would be necessary. This final case cannot be quantified by this
 method, but instead the nonexistence of a large deviation linear in $N$ appears
 as the emergence of an imaginary part in the function.
 
-\begin{widetext}
 As an example, we compute
 \begin{equation} \label{eq:large.dev}
-  e^{NG_\lambda(\mu)}=P_{\lambda_\mathrm{min}(B+\mu I)=\lambda}=\overline{\lim_{\beta\to\infty}\int\frac{d\mathbf x\,\delta(N-\mathbf x^T\mathbf x)e^{-\beta\mathbf x^T(B+\mu I)\mathbf x}}{\int d\mathbf x'\,\delta(N-\mathbf x'^T\mathbf x')e^{-\beta\mathbf x'^T(B+\mu I)\mathbf x'}}\,\delta\big(\mathbf x^T(B+\mu I)\mathbf x-N\lambda\big)}
+  e^{NG_\lambda^*(\mu)}
+  =P_{\lambda_\mathrm{max}(B-\mu I)=\lambda^*}
+  =\overline{\delta\big(N\lambda^*-N\lambda_\mathrm{max}(B-\mu I)\big)}
 \end{equation}
 where the overline is the average over $B$, and we have defined the large
-deviation function $G_\sigma(\mu)$. Using replicas to treat the denominator ($x^{-1}=\lim_{n\to0}x^{n-1}$)
+deviation function $G_\sigma(\mu)$.
+Using the representation of $\lambda_\mathrm{max}$ defined in \eqref{eq:λmax}, we have
+\begin{widetext}
+\begin{equation}
+  e^{NG_{\lambda^*}(\mu)}
+  =\overline{
+    \lim_{\beta\to\infty}\int\frac{d\mathbf x\,\delta(N-\mathbf x^T\mathbf x)e^{\beta\mathbf x^T(B-\mu I)\mathbf x}}
+    {\int d\mathbf x'\,\delta(N-\mathbf x'^T\mathbf x')e^{\beta\mathbf x'^T(B-\mu I)\mathbf x'}}\,\delta\big(N\lambda^*-\mathbf x^T(B-\mu I)\mathbf x\big)
+  }
+\end{equation}
+Using replicas to treat the denominator ($x^{-1}=\lim_{n\to0}x^{n-1}$)
 and transforming the $\delta$-function to its Fourier
 representation, we have
 \begin{equation}
-  e^{NG_\lambda(\mu)}=\overline{\lim_{\beta\to\infty}\lim_{n\to0}\int d\hat\lambda\prod_{a=1}^n\left[d\mathbf x_a\,\delta(N-\mathbf x_a^T\mathbf x_a)\right]
-  \exp\left\{-\beta\sum_{a=1}^n\mathbf x_a^T(B+\mu I)\mathbf x_a+\hat\lambda\mathbf x_1^T(B+\mu I)\mathbf x_1-N\hat\lambda\lambda\right\}}
+  e^{NG_{\lambda^*}(\mu)}
+  =\overline{\lim_{\beta\to\infty}\lim_{n\to0}\int d\hat\lambda\prod_{a=1}^n\left[d\mathbf x_a\,\delta(N-\mathbf x_a^T\mathbf x_a)\right]
+  \exp\left\{\beta\sum_{a=1}^n\mathbf x_a^T(B-\mu I)\mathbf x_a+\hat\lambda\left[N\lambda^*-\mathbf x_1^T(B-\mu I)\mathbf x_1\right]\right\}}
 \end{equation}
 having introduced the parameter $\hat\lambda$ in the Fourier representation of the $\delta$-function.
 The whole expression, so transformed, is a simple exponential integral linear in the matrix $B$.
 Taking the average over $B$, we have
 \begin{equation}
-  e^{NG_\lambda(\mu)}
-  =\lim_{\beta\to\infty}\lim_{n\to0}\int d\hat\lambda\prod_{a=1}^n\left[d\mathbf x_a\,\delta(N-\mathbf x_a^T\mathbf x_a)\right]
-  \exp\left\{-Nn\beta\mu+N\hat\lambda(\mu-\lambda)+\frac{\sigma^2}{N}\left[\beta^2\sum_{ab}^n(\mathbf x_a^T\mathbf x_b)^2
+  \begin{aligned}
+  &e^{NG_{\lambda^*}(\mu)}
+  =\lim_{\beta\to\infty}\lim_{n\to0}\int d\hat\lambda\prod_{a=1}^n\left[d\mathbf x_a\,\delta(N-\mathbf x_a^T\mathbf x_a)\right] \\
+  &\hspace{10em}\exp\left\{N\left[\hat\lambda(\mu+\lambda^*)-n\beta\mu\right]+\frac{\sigma^2}{N}\left[\beta^2\sum_{ab}^n(\mathbf x_a^T\mathbf x_b)^2
     -2\beta\hat\lambda\sum_a^n(\mathbf x_a^T\mathbf x_1)^2
     +\hat\lambda^2N^2
   \right]\right\}
+  \end{aligned}
 \end{equation}
 We make the Hubbard--Stratonovich transformation to the matrix field $Q_{ab}=\frac1N\mathbf x_a^T\mathbf x_b$. This gives
 \begin{equation}
-  e^{NG_\lambda(\mu)}
+  e^{NG_{\lambda^*}(\mu)}
   =\lim_{\beta\to\infty}\lim_{n\to0}\int d\hat\lambda\,dQ\,
   \exp N\left\{
-    -n\beta\mu+\hat\lambda(\mu-\lambda)+\sigma^2\left[\beta^2\sum_{ab}^nQ_{ab}^2
-      +-\beta\hat\lambda\sum_a^nQ_{1a}^2
+    \hat\lambda(\mu+\lambda^*)-n\beta\mu+\sigma^2\left[\beta^2\sum_{ab}^nQ_{ab}^2
+      -2\beta\hat\lambda\sum_a^nQ_{1a}^2
     +\hat\lambda^2
   \right]+\frac12\log\det Q\right\}
 \end{equation}
@@ -196,49 +215,57 @@ and
 \end{equation}
 Inserting these expressions and taking the limit of $n$ to zero, we find
 \begin{equation}
-  e^{NG_\sigma(\mu)}=\lim_{\beta\to\infty}\int d\hat\lambda\,dq_0\,d\tilde q_0\,e^{N\mathcal S_\beta(q_0,\tilde q_0,\hat\lambda)}
+  e^{NG_{\lambda^*}(\mu)}=\lim_{\beta\to\infty}\int d\hat\lambda\,dq_0\,d\tilde q_0\,e^{N\mathcal S_\beta(q_0,\tilde q_0,\hat\lambda)}
 \end{equation}
 with the effective action
 \begin{equation}
-  \mathcal S_\beta(q_0,\tilde q_0,\hat\lambda)=\hat\lambda(\mu-\lambda)+\sigma^2\left[
-    2\beta^2(q_0^2-\tilde q_0^2)-2\beta\hat\lambda(1-\tilde q_0^2)+\hat\lambda^2
-  \right]-\log(1-q_0)+\frac12\log(1-2q_0+\tilde q_0^2)
+  \begin{aligned}
+    &\mathcal S_\beta(q_0,\tilde q_0,\hat\lambda) \\
+    &\quad=\hat\lambda(\mu+\lambda^*)+\sigma^2\left[
+      2\beta^2(q_0^2-\tilde q_0^2)-2\beta\hat\lambda(1-\tilde q_0^2)+\hat\lambda^2
+    \right] \\
+    &\qquad-\log(1-q_0)+\frac12\log(1-2q_0+\tilde q_0^2)
+  \end{aligned}
 \end{equation}
 We need to evaluate the integral above using the saddle point method, but in the limit of $\beta\to\infty$.
 We expect the overlaps to concentrate on one as $\beta$ goes to infinity. We therefore take
 \begin{align}
-  q_0=1-y\beta^{-1}-z\beta^{-2}+O(\beta^{-3})
-  &&
-  \tilde q_0=1-\tilde y\beta^{-1}-\tilde z\beta^{-2}+O(\beta^{-3})
+  q_0&=1-y\beta^{-1}-z\beta^{-2}+O(\beta^{-3})
+  \\
+  \tilde q_0&=1-\tilde y\beta^{-1}-(z-\Delta z)\beta^{-2}+O(\beta^{-3})
 \end{align}
 However, taking the limit with $y\neq\tilde y$ results in an expression for the
 action that diverges with $\beta$. To cure this, we must take $\tilde y=y$. The result is
 \begin{equation}
-  \mathcal S_\infty(y,z,\tilde z,\hat\lambda)
-  =\hat\lambda(\mu-\lambda)+\sigma^2\big[
-    \hat\lambda^2-4(y+z-\tilde z)
-  \big]+\frac12\log\left(1+2\frac{z-\tilde z}{y^2}\right)
+  \begin{aligned}
+    \mathcal S_\infty(y,\Delta z,\hat\lambda)
+    &=\hat\lambda(\mu+\lambda^*)
+    +\sigma^2\big[
+      \hat\lambda^2-4(y+\Delta z)
+    \big] \\
+    &\qquad+\frac12\log\left(1+\frac{2\Delta z}{y^2}\right)
+  \end{aligned}
 \end{equation}
 Extremizing this action over the new parameters $y$, $\Delta z=z-\tilde z$, and $\hat\lambda$, we have
 \begin{align}
-  \hat\lambda=-\frac1\sigma\sqrt{\frac{(\mu-\lambda)^2}{(2\sigma)^2}-1}
+  \hat\lambda=-\frac1\sigma\sqrt{\frac{(\mu+\lambda^*)^2}{(2\sigma)^2}-1}
   \\
-  y=\frac1{2\sigma}\left(\frac{\mu-\lambda}{2\sigma}-\sqrt{\frac{(\mu-\lambda)^2}{(2\sigma)^2}-1}\right)
+  y=\frac1{2\sigma}\left(\frac{\mu+\lambda^*}{2\sigma}-\sqrt{\frac{(\mu+\lambda^*)^2}{(2\sigma)^2}-1}\right)
   &\\
-  \Delta z=\frac1{4\sigma^2}\left(1-\frac{\mu-\lambda}{2\sigma}\left(\frac{\mu-\lambda}{2\sigma}-\sqrt{\frac{(\mu-\lambda)^2}{(2\sigma)^2}-1}\right)\right)
+  \Delta z=\frac1{4\sigma^2}\left(1-\frac{\mu+\lambda^*}{2\sigma}\left(\frac{\mu+\lambda^*}{2\sigma}-\sqrt{\frac{(\mu+\lambda^*)^2}{(2\sigma)^2}-1}\right)\right)
 \end{align}
 Inserting this solution into $\mathcal S_\infty$ we find
 \begin{equation}
   \begin{aligned}
-    &G_\lambda(\mu)
+    &G_{\lambda^*}(\mu)
     =\mathop{\textrm{extremum}}_{y,\Delta z,\hat\lambda}\mathcal S_\infty(y,\Delta z,\hat\lambda) \\
-    &=-\frac{\mu-\lambda}{2\sigma}\sqrt{\frac{(\mu-\lambda)^2}{(2\sigma)^2}-1}
+    &=-\tfrac{\mu+\lambda^*}{2\sigma}\sqrt{\Big(\tfrac{\mu+\lambda^*}{2\sigma}\Big)^2-1}
     +\log\left(
-      \frac{\mu-\lambda}{2\sigma}+\sqrt{\frac{(\mu-\lambda)^2}{(2\sigma)^2}-1}
+      \tfrac{\mu+\lambda^*}{2\sigma}+\sqrt{\Big(\tfrac{\mu+\lambda^*}{2\sigma}\Big)^2-1}
     \right)
   \end{aligned}
 \end{equation}
-This function is plotted in Fig.~\ref{fig:large.dev}. For $\mu<2\sigma$ $G_\sigma(\mu)$ has an
+This function is plotted in Fig.~\ref{fig:large.dev}. For $\mu<2\sigma$ $G_{\lambda^*}(\mu)$ has an
 imaginary part, which makes any additional integral over $\mu$ highly
 oscillatory. This indicates that the existence of a marginal minimum for this
 parameter value corresponds with a large deviation that grows faster than $N$,
@@ -248,14 +275,15 @@ deviations in order for the smallest eigenvalue to be zero. For
 $\mu\geq2\sigma$ this function gives the large deviation function for the
 probability of seeing a zero eigenvalue given the shift $\mu$.
 $\mu=2\sigma$ is the maximum of the function with a real value, and
-corresponds to the intersection of the average spectrum with zero, i.e., a pseudogap.
+corresponds to the intersection of the average spectrum with zero, i.e., a
+pseudogap.
 
 \begin{figure}
   \includegraphics[width=\columnwidth]{figs/large_deviation.pdf}
   \caption{
     The large deviation function $G_\sigma(\mu)$ defined in
     \eqref{eq:large.dev} as a function of the shift $\mu$ to the
-    \textsc{goe} diagonal. As expected, $G_\sigma(2\sigma)=0$, while for
+    GOE diagonal. As expected, $G_\sigma(2\sigma)=0$, while for
     $\mu>2\sigma$ it is negative and for $\mu<2\sigma$ it gains an
     imaginary part.
   } \label{fig:large.dev}
@@ -264,7 +292,7 @@ corresponds to the intersection of the average spectrum with zero, i.e., a pseud
 Marginal spectra with a pseudogap and those with simple isolated eigenvalues
 are qualitatively different, and more attention may be focused on the former.
 Here, we see what appears to be a general heuristic for identifying the saddle
-parameters for which the spectrum is psedogapped: the equivalent of this
+parameters for which the spectrum is pseudogapped: the equivalent of this
 large-deviation functions will lie on the singular boundary between a purely
 real and complex value.
 
@@ -279,17 +307,28 @@ pseudogapped spectra, where the continuous part of the spectral density has a
 lower bound at zero.
 
 Fortunately, our calculation can be modified to ensure that we consider only
-psedogapped spectra. First, we insert a shift $\mu$ by hand into the `natural'
+pseudogapped spectra. First, we insert a shift $\mu$ by hand into the `natural'
 spectrum of the problem at hand, conditioning the trace to have a specific
-value. Then, we choose this artificial shift so that the resulting conditioned
-spectra are pseudogapped. This we can do by looking for the point where the
-order parameter $\lambda$ associated with the marginal condition is zero.
+value $\mu=\operatorname{Tr}A$. Then, we choose this artificial shift so that
+the resulting conditioned spectra are pseudogapped. As seen the previous
+subsection, this can be done by starting from a sufficiently large $\mu$ and
+decreasing it until the calculation develops an imaginary part, signaling the
+breakdown of the large-deviation principle at order $N$.
+
+In isotropic or zero-signal landscapes, there is another way to condition on a
+pseudogap. In such landscapes, the typical spectrum does not have an isolated
+eigenvalue. Therefore, the condition associated with the bulk of the spectrum
+teaching zero, i.e., the pseudogap, will always correspond to the most common
+configuration. We can therefore choose $\mu=\mu_\textrm m$ such that
+\begin{equation}
+  0=\frac\partial{\partial\lambda^*}G_{\lambda^*}(\mu_\mathrm m)\bigg|_{\lambda^*=0}
+\end{equation}
+In the previous problem, this corresponds precisely to $\mu_\mathrm m=2\sigma$,
+the correct marginal shift. Note that when we treat the Dirac $\delta$ function
+using its Fourier representation with auxiliary parameter $\hat\lambda$, as in
+the previous subsection, this condition corresponds with choosing $\mu$ such
+that $\hat\lambda=0$.
 
-What is the interpretation of this? In general the condition $\lambda=0$
-corresponds to a point where the conditioning does not change the volume
-measured by the integral. Therefore, the typical matrix with the value of $\mu$
-for which $\lambda=0$ has a zero eigenvalue. In isotropic problems where
-isolated eigenvalues in the spectrum are atypical, this implies a pseudogap.
 
 \section{Marginal complexity in random landscapes}
 
@@ -321,19 +360,25 @@ the Jacobian of the argument to the $\delta$-function. It is usually more
 interesting to condition the count on interesting properties of the stationary
 points, like the energy,
 \begin{equation}
-  d\mu_H(\mathbf s,\pmb\omega\mid E)=d\mu_H(\mathbf s,\pmb\omega)\,\delta\big(NE-H(\mathbf s)\big)
+  \begin{aligned}
+    &d\mu_H(\mathbf s,\pmb\omega\mid E,\mu) \\
+    &\quad=d\mu_H(\mathbf s,\pmb\omega)\,
+    \delta\big(NE-H(\mathbf s)\big)
+    \,\delta\big(N\mu-\operatorname{Tr}\operatorname{Hess}H(\mathbf x,\pmb\omega)\big)
+  \end{aligned}
 \end{equation}
 In this paper we in particular want to exploit our method to condition
 complexity on the marginality of stationary points. We therefore define the
 number of marginal points in a particular instantiation $H$ as
+\begin{widetext}
 \begin{equation}
   \begin{aligned}
-    &\mathcal N_{0}(E,\mu)
-    =\int d\mu_H(\mathbf s,\pmb\omega\mid E,\mu)\,\delta\big(N\lambda_\mathrm{min}(\operatorname{Hess}H(\mathbf s,\pmb\omega))\big) \\
+    &\mathcal N_H(E,\mu,\lambda^*)
+    =\int d\mu_H(\mathbf s,\pmb\omega\mid E,\mu)\,\delta\big(N\lambda^*-\lambda_\mathrm{max}(\operatorname{Hess}H(\mathbf s,\pmb\omega))\big) \\
     &=\lim_{\beta\to\infty}\int d\mu_H(\mathbf s,\pmb\omega\mid E,\mu)
     \frac{d\mathbf x\,\delta(N-\mathbf x^T\mathbf x)\delta(\mathbf x^T\partial\mathbf g(\mathbf s))e^{\beta\mathbf x^T\operatorname{Hess}H(\mathbf s,\pmb\omega)\mathbf x}}
     {\int d\mathbf x'\,\delta(N-\mathbf x'^T\mathbf x')\delta(\mathbf x'^T\partial\mathbf g(\mathbf s))e^{\beta\mathbf x'^T\operatorname{Hess}H(\mathbf s,\pmb\omega)\mathbf x'}}
-    \delta\big(\mathbf x^T\operatorname{Hess}H(\mathbf s,\pmb\omega)\mathbf x\big)
+    \delta\big(N\lambda^*-\mathbf x^T\operatorname{Hess}H(\mathbf s,\pmb\omega)\mathbf x\big)
   \end{aligned}
 \end{equation}
 where the $\delta$-functions
@@ -341,25 +386,28 @@ where the $\delta$-functions
   \delta(\mathbf x^T\partial\mathbf g(\mathbf s))
   =\prod_{s=1}^r\delta(\mathbf x^T\partial g_i(\mathbf s))
 \end{equation}
-ensure that the integrals are constrained to the tangent space of the configuration manifold at the point $\mathbf s$. This likewise allows us to define the complexity of marginal points at energy $E$ as
+ensure that the integrals are constrained to the tangent space of the
+configuration manifold at the point $\mathbf s$. This likewise allows us to
+define the complexity of points with a specific energy, stability, and maximum eigenvalue as
 \begin{equation}
-  \Sigma_0(E,\mu)
-  =\frac1N\overline{\log\mathcal N_0(E)}
+  \Sigma_{\lambda^*}(E,\mu)
+  =\frac1N\overline{\log\mathcal N_H(E,\mu,\lambda^*)}
 \end{equation}
 In practice, this can be computed by introducing replicas to treat the
 logarithm ($\log x=\lim_{n\to0}\frac\partial{\partial n}x^n$) and replicating
 again to treat each of the normalizations in the numerator. This leads to the expression
 \begin{equation}
   \begin{aligned}
-    \Sigma_0(E,\mu)
-    &=\lim_{\beta\to\infty}\lim_{n\to0}\frac1N\frac\partial{\partial n}\int\prod_{a=1}^n\Bigg[d\mu_H(\mathbf s_a,\pmb\omega_a\mid E,\mu)\,\delta\big((\mathbf x_a^1)^T\operatorname{Hess}H(\mathbf s_a,\pmb\omega_a)\mathbf x_a^1\big)\\
-      &\qquad\times\lim_{m_a\to0}
+    \Sigma_{\lambda^*}(E,\mu)
+    &=\lim_{\beta\to\infty}\lim_{n\to0}\frac1N\frac\partial{\partial n}\int\prod_{a=1}^n\Bigg[d\mu_H(\mathbf s_a,\pmb\omega_a\mid E,\mu)\,\delta\big(N\lambda^*-(\mathbf x_a^1)^T\operatorname{Hess}H(\mathbf s_a,\pmb\omega_a)\mathbf x_a^1\big)\\
+    &\hspace{12em}\times\lim_{m_a\to0}
       \left(\prod_{b=1}^{m_a} d\mathbf x_a^b\,\delta(N-(\mathbf x_a^b)^T\mathbf x_a^b)\delta((\mathbf x_a^b)^T\partial\mathbf g(\mathbf s_a))e^{\beta(\mathbf x_a^b)^T\operatorname{Hess}H(\mathbf s_a,\pmb\omega_a)\mathbf x_a^b}\right)\Bigg]
   \end{aligned}
 \end{equation}
-Finally, the \emph{marginal} complexity is given by fixing $\mu=\mu_\text{m}$ so that the complexity is stationary with respect to changes in the value of the minimum eigenvalue, or
+\end{widetext}
+Finally, the \emph{marginal} complexity is given by fixing $\mu=\mu_\text{m}$ so that the complexity is stationary with respect to changes in the value of the maximum eigenvalue, or
 \begin{equation}
-  0=\frac\partial{\partial\lambda}\Sigma_\lambda(E,\mu_\text{m}(E))\bigg|_{\lambda=0}
+  0=\frac\partial{\partial\lambda^*}\Sigma_{\lambda^*}(E,\mu_\text{m}(E))\bigg|_{\lambda^*=0}
 \end{equation}
 Finally, the marginal complexity is defined by evaluating the complexity conditioned on $\lambda_{\text{min}}=0$ at $\mu_\text{m}$,
 \begin{equation}
@@ -367,7 +415,9 @@ Finally, the marginal complexity is defined by evaluating the complexity conditi
   =\Sigma_0(E,\mu_\text m(E))
 \end{equation}
 
-\subsection{Application to the spherical models}
+\section{Examples}
+
+\subsection{Spherical spin glasses}
 
 \begin{align}
   C_{ab}=\frac1N\mathbf s_a\cdot\mathbf s_b
@@ -383,6 +433,7 @@ Finally, the marginal complexity is defined by evaluating the complexity conditi
   \hat X^c_{ab}=\frac1N\hat{\mathbf s}_a\cdot\mathbf x_b^c
 \end{align}
 
+\begin{widetext}
 \begin{equation}
   \begin{aligned}
     &\sum_{ab}^n\left[\beta\omega A_{aa}^{bb}+\hat x\omega A_{aa}^{11}+\beta^2f''(1)\sum_{cd}^m(A_{ab}^{cd})^2+\hat x^2f''(1)(A_{ab}^{11})^2+\beta\hat xf''(1)\sum_c^m A_{ab}^{1c}\right]\\
@@ -390,6 +441,7 @@ Finally, the marginal complexity is defined by evaluating the complexity conditi
     +\log\det\begin{bmatrix}C&iR\\iR&D\end{bmatrix}-\log\det F
   \end{aligned}
 \end{equation}
+\end{widetext}
 
 $X^a$ is $n\times m_a$, and $A^{ab}$ is $m_a\times m_b$.
 \begin{equation}
@@ -416,7 +468,7 @@ We will discuss at the end of this paper when these order parameters can be expe
 \end{equation}
 where the maximum over $\omega$ needs to lie at a real value.
 
-\subsection{Twin spherical model}
+\subsection{Twin spherical spin glasses}
 
 $\Omega=S^{N-1}\times S^{N-1}$
 \begin{equation}
@@ -427,6 +479,7 @@ $\Omega=S^{N-1}\times S^{N-1}$
   =Nf_s\left(\frac{\pmb\sigma_1\cdot\pmb\sigma_2}N\right)
 \end{equation}
 
+\begin{widetext}
 \begin{equation}
   \mathcal S(C,R,D,W,\hat\beta,\omega)
   =\frac12\frac1n
@@ -436,13 +489,15 @@ $\Omega=S^{N-1}\times S^{N-1}$
 \end{equation}
 
 \begin{equation}
-  \mathcal S(C^{11},R^{11},D^{11},W^{11},\hat\beta)+\mathcal S(C^{22},R^{22},D^{22},W^{22},\hat\beta)
-  -\epsilon(r_{12}+r_{21})-\omega_1(r^{11}_d-w^{11}_d)-\omega_2(r^{22}_d-w^{22}_d)+\hat\beta E
-  +\frac12\log\det\begin{bmatrix}C^{11}&iR^{11}\\iR^{11}&D^{11}\end{bmatrix}
-  +\frac12\log\det\left(
-  \begin{bmatrix}C^{22}-q_{12}^2C^{11}&iR^{22}\\iR^{22}&D^{22}\end{bmatrix}
-  \right)
-  -\log\det(W^{11}W^{22}+W^{12}W^{21})
+  \begin{aligned}
+    &\mathcal S(C^{11},R^{11},D^{11},W^{11},\hat\beta)+\mathcal S(C^{22},R^{22},D^{22},W^{22},\hat\beta)
+    -\epsilon(r_{12}+r_{21})-\omega_1(r^{11}_d-w^{11}_d)-\omega_2(r^{22}_d-w^{22}_d)+\hat\beta E \\
+    &+\frac12\log\det\begin{bmatrix}C^{11}&iR^{11}\\iR^{11}&D^{11}\end{bmatrix}
+    +\frac12\log\det\left(
+    \begin{bmatrix}C^{22}-q_{12}^2C^{11}&iR^{22}\\iR^{22}&D^{22}\end{bmatrix}
+    \right)
+    -\log\det(W^{11}W^{22}+W^{12}W^{21})
+  \end{aligned}
 \end{equation}
 
 \begin{equation}
@@ -456,6 +511,7 @@ $\Omega=S^{N-1}\times S^{N-1}$
     \end{bmatrix}
   \end{aligned}
 \end{equation}
+\end{widetext}
 \begin{equation}
   \log\det\begin{bmatrix}
     Q^{11}&Q^{12}\\
@@ -464,7 +520,7 @@ $\Omega=S^{N-1}\times S^{N-1}$
   +\log\det(Q^{11}Q^{22}-Q^{12}Q^{12})
 \end{equation}
 
-\subsection{Nonlinear least squares}
+\subsection{Random nonlinear least squares}
 
 In this subsection we consider perhaps the simplest example of a non-Gaussian
 landscape: the problem of random nonlinear least squares optimization. Though,
@@ -570,15 +626,13 @@ which produces
   \right\}
 \end{equation}
 \begin{equation}
-  \log\det\left(
-    \begin{bmatrix}
-      (\hat\beta_a\delta_{ac}+G_{ac}^2f''(C_{ac}))f(C_{cb}) + R_{ab}f'(C_{ab})
-      &
-      \frac1N\left[(\hat\beta_a\delta_{ac}+G_{ac}^2f''(C_{ac}))f'(C_{cb})+R_{ab}f''(C_{ab})\right]\mathbf x_b^T-\frac1Nf'(C_{ab})\hat{\mathbf x}_a^T
-      \\
-      -i\hat{\mathbf x}_af(C_{ab})+\frac1N\hat\mu f'(C_{ab})\mathbf x_b
-    \end{bmatrix}
-  \right)
+  \begin{bmatrix}
+    (\hat\beta_a\delta_{ac}+G_{ac}^2f''(C_{ac}))f(C_{cb}) + R_{ab}f'(C_{ab})
+    &
+    \frac1N\left[(\hat\beta_a\delta_{ac}+G_{ac}^2f''(C_{ac}))f'(C_{cb})+R_{ab}f''(C_{ab})\right]\mathbf x_b^T-\frac1Nf'(C_{ab})\hat{\mathbf x}_a^T
+    \\
+    -i\hat{\mathbf x}_af(C_{ab})+\frac1N\hat\mu f'(C_{ab})\mathbf x_b
+  \end{bmatrix}
 \end{equation}
 
 The condition fixing the maximum eigenvalue adds to the integrand
author	Jaron Kent-Dobias <jaron@kent-dobias.com>	2024-05-21 16:33:51 +0200
committer	Jaron Kent-Dobias <jaron@kent-dobias.com>	2024-05-21 16:33:51 +0200
commit	c10694ea25be6a6593765ad82a92d994ae02e18b (patch)
tree	6a12fb32735987af6e7809d8059dae1e8bd3b983
parent	63eacd9d14ded6cc306c11e759398071bef3ee76 (diff)
download	marginal-c10694ea25be6a6593765ad82a92d994ae02e18b.tar.gz marginal-c10694ea25be6a6593765ad82a92d994ae02e18b.tar.bz2 marginal-c10694ea25be6a6593765ad82a92d994ae02e18b.zip