summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJaron Kent-Dobias <jaron@kent-dobias.com>2024-05-21 16:33:51 +0200
committerJaron Kent-Dobias <jaron@kent-dobias.com>2024-05-21 16:33:51 +0200
commitc10694ea25be6a6593765ad82a92d994ae02e18b (patch)
tree6a12fb32735987af6e7809d8059dae1e8bd3b983
parent63eacd9d14ded6cc306c11e759398071bef3ee76 (diff)
downloadmarginal-c10694ea25be6a6593765ad82a92d994ae02e18b.tar.gz
marginal-c10694ea25be6a6593765ad82a92d994ae02e18b.tar.bz2
marginal-c10694ea25be6a6593765ad82a92d994ae02e18b.zip
Work on the train.
-rw-r--r--marginal.bib58
-rw-r--r--marginal.tex222
2 files changed, 167 insertions, 113 deletions
diff --git a/marginal.bib b/marginal.bib
index b51659e..9f89910 100644
--- a/marginal.bib
+++ b/marginal.bib
@@ -27,6 +27,35 @@
doi = {10.21468/scipostphys.15.3.109}
}
+@article{Fyodorov_2020_Counting,
+ author = {Fyodorov, Y. V. and Tublin, R.},
+ title = {Counting Stationary Points of the Loss Function in the Simplest Constrained Least-square Optimization},
+ journal = {Acta Physica Polonica B},
+ publisher = {Jagiellonian University},
+ year = {2020},
+ number = {7},
+ volume = {51},
+ pages = {1663},
+ url = {http://dx.doi.org/10.5506/APhysPolB.51.1663},
+ doi = {10.5506/aphyspolb.51.1663},
+ issn = {1509-5770}
+}
+
+@article{Fyodorov_2022_Optimization,
+ author = {Fyodorov, Yan V and Tublin, Rashel},
+ title = {Optimization landscape in the simplest constrained random least-square problem},
+ journal = {Journal of Physics A: Mathematical and Theoretical},
+ publisher = {IOP Publishing},
+ year = {2022},
+ month = {May},
+ number = {24},
+ volume = {55},
+ pages = {244008},
+ url = {http://dx.doi.org/10.1088/1751-8121/ac6d8e},
+ doi = {10.1088/1751-8121/ac6d8e},
+ issn = {1751-8121}
+}
+
@article{Ikeda_2023_Bose-Einstein-like,
author = {Ikeda, Harukuni},
title = {{Bose}--{Einstein}-like condensation of deformed random matrix: a replica approach},
@@ -70,35 +99,6 @@
doi = {10.1103/physrevlett.75.2847}
}
-@article{Fyodorov_2020_Counting,
- author = {Fyodorov, Y.V. and Tublin, R.},
- title = {Counting Stationary Points of the Loss Function in the Simplest Constrained Least-square Optimization},
- journal = {Acta Physica Polonica B},
- publisher = {Jagiellonian University},
- year = {2020},
- number = {7},
- volume = {51},
- pages = {1663},
- url = {http://dx.doi.org/10.5506/APhysPolB.51.1663},
- doi = {10.5506/aphyspolb.51.1663},
- issn = {1509-5770}
-}
-
-@article{Fyodorov_2022_Optimization,
- author = {Fyodorov, Yan V and Tublin, Rashel},
- title = {Optimization landscape in the simplest constrained random least-square problem},
- journal = {Journal of Physics A: Mathematical and Theoretical},
- publisher = {IOP Publishing},
- year = {2022},
- month = {May},
- number = {24},
- volume = {55},
- pages = {244008},
- url = {http://dx.doi.org/10.1088/1751-8121/ac6d8e},
- doi = {10.1088/1751-8121/ac6d8e},
- issn = {1751-8121}
-}
-
@phdthesis{Tublin_2022_A,
author = {Tublin, Rashel},
title = {A Few Results in Random Matrix Theory and Random Optimization},
diff --git a/marginal.tex b/marginal.tex
index ec28568..5b73f9e 100644
--- a/marginal.tex
+++ b/marginal.tex
@@ -4,15 +4,8 @@
\usepackage[T1]{fontenc}
\usepackage{amsmath,amssymb,latexsym,graphicx}
\usepackage{newtxtext,newtxmath}
-\usepackage{bbold}
+\usepackage{bbold,anyfontsize}
\usepackage[dvipsnames]{xcolor}
-\usepackage[
- colorlinks=true,
- urlcolor=MidnightBlue,
- citecolor=MidnightBlue,
- filecolor=MidnightBlue,
- linkcolor=MidnightBlue
-]{hyperref}
\begin{document}
@@ -24,6 +17,18 @@
\affiliation{Istituto Nazionale di Fisica Nucleare, Sezione di Roma I, Rome, Italy 00184}
\begin{abstract}
+ Marginal optima are minima or maxima of a function with many asymptotically
+ flat directions. In settings with many competing optima, marginal ones tend
+ to attract algorithms and physical dynamics. Often, the important family of
+ marginal attractors are a vanishing minority compared with nonmarginal optima
+ and other unstable stationary points. We introduce a generic technique for
+ conditioning the statistics of stationary points on their marginality, and
+ apply it in three isotropic settings with different typical forms for the
+ Hessian at optima: in the spherical spin-glasses, where the Hessian is GOE;
+ in a multispherical spin glasses, which are Gaussian but non-GOE; and in a
+ model of random nonlinear sum of squares, which is non-Gaussian. In these
+ problems we are able to fully characterize the distribution of marginal
+ optima in the landscape, including when they are in the minority.
\end{abstract}
\maketitle
@@ -79,7 +84,7 @@ stationary points be zero, we restrict to marginal minima, either those with a
pseudogap in their bulk spectrum or those with outlying eigenvectors. We
provide a heuristic to distinguish these two cases. We demonstrate the method
on the spherical models, where it is unnecessary but instructive, and on
-extensions of the spherical models with non-\textsc{goe} Hessians where the technique is
+extensions of the spherical models with non-GOE Hessians where the technique is
more useful.
\section{Conditioning on the smallest eigenvalue}
@@ -88,7 +93,7 @@ more useful.
An arbitrary function $g$ of the minimum eigenvalue of a matrix $A$ can be
expressed as
-\begin{equation}
+\begin{equation} \label{eq:λmax}
g(\lambda_\textrm{max}(A))
=\lim_{\beta\to\infty}\int
\frac{d\mathbf s\,\delta(N-\mathbf s^T\mathbf s)e^{\beta\mathbf s^TA\mathbf s}}
@@ -128,8 +133,8 @@ minimum eigenvalue is zero.
We demonstrate the efficacy of the technique by rederiving a well-known result:
the large-deviation function for pulling an eigenvalue from the bulk of the
-\textsc{goe} spectrum.
-Consider an ensemble of $N\times N$ matrices $A=B+\mu I$ for $B$ drawn from the \textsc{goe} ensemble with entries
+GOE spectrum.
+Consider an ensemble of $N\times N$ matrices $A=B+\mu I$ for $B$ drawn from the GOE ensemble with entries
whose variance is $\sigma^2/N$. We know that the bulk spectrum of $A$ is a
Wigner semicircle with radius $2\sigma$ shifted by a constant $\mu$.
Therefore, for $\mu=2\sigma$, the minimum eigenvalue will typically be zero,
@@ -141,37 +146,51 @@ eigenvalues, would be necessary. This final case cannot be quantified by this
method, but instead the nonexistence of a large deviation linear in $N$ appears
as the emergence of an imaginary part in the function.
-\begin{widetext}
As an example, we compute
\begin{equation} \label{eq:large.dev}
- e^{NG_\lambda(\mu)}=P_{\lambda_\mathrm{min}(B+\mu I)=\lambda}=\overline{\lim_{\beta\to\infty}\int\frac{d\mathbf x\,\delta(N-\mathbf x^T\mathbf x)e^{-\beta\mathbf x^T(B+\mu I)\mathbf x}}{\int d\mathbf x'\,\delta(N-\mathbf x'^T\mathbf x')e^{-\beta\mathbf x'^T(B+\mu I)\mathbf x'}}\,\delta\big(\mathbf x^T(B+\mu I)\mathbf x-N\lambda\big)}
+ e^{NG_\lambda^*(\mu)}
+ =P_{\lambda_\mathrm{max}(B-\mu I)=\lambda^*}
+ =\overline{\delta\big(N\lambda^*-N\lambda_\mathrm{max}(B-\mu I)\big)}
\end{equation}
where the overline is the average over $B$, and we have defined the large
-deviation function $G_\sigma(\mu)$. Using replicas to treat the denominator ($x^{-1}=\lim_{n\to0}x^{n-1}$)
+deviation function $G_\sigma(\mu)$.
+Using the representation of $\lambda_\mathrm{max}$ defined in \eqref{eq:λmax}, we have
+\begin{widetext}
+\begin{equation}
+ e^{NG_{\lambda^*}(\mu)}
+ =\overline{
+ \lim_{\beta\to\infty}\int\frac{d\mathbf x\,\delta(N-\mathbf x^T\mathbf x)e^{\beta\mathbf x^T(B-\mu I)\mathbf x}}
+ {\int d\mathbf x'\,\delta(N-\mathbf x'^T\mathbf x')e^{\beta\mathbf x'^T(B-\mu I)\mathbf x'}}\,\delta\big(N\lambda^*-\mathbf x^T(B-\mu I)\mathbf x\big)
+ }
+\end{equation}
+Using replicas to treat the denominator ($x^{-1}=\lim_{n\to0}x^{n-1}$)
and transforming the $\delta$-function to its Fourier
representation, we have
\begin{equation}
- e^{NG_\lambda(\mu)}=\overline{\lim_{\beta\to\infty}\lim_{n\to0}\int d\hat\lambda\prod_{a=1}^n\left[d\mathbf x_a\,\delta(N-\mathbf x_a^T\mathbf x_a)\right]
- \exp\left\{-\beta\sum_{a=1}^n\mathbf x_a^T(B+\mu I)\mathbf x_a+\hat\lambda\mathbf x_1^T(B+\mu I)\mathbf x_1-N\hat\lambda\lambda\right\}}
+ e^{NG_{\lambda^*}(\mu)}
+ =\overline{\lim_{\beta\to\infty}\lim_{n\to0}\int d\hat\lambda\prod_{a=1}^n\left[d\mathbf x_a\,\delta(N-\mathbf x_a^T\mathbf x_a)\right]
+ \exp\left\{\beta\sum_{a=1}^n\mathbf x_a^T(B-\mu I)\mathbf x_a+\hat\lambda\left[N\lambda^*-\mathbf x_1^T(B-\mu I)\mathbf x_1\right]\right\}}
\end{equation}
having introduced the parameter $\hat\lambda$ in the Fourier representation of the $\delta$-function.
The whole expression, so transformed, is a simple exponential integral linear in the matrix $B$.
Taking the average over $B$, we have
\begin{equation}
- e^{NG_\lambda(\mu)}
- =\lim_{\beta\to\infty}\lim_{n\to0}\int d\hat\lambda\prod_{a=1}^n\left[d\mathbf x_a\,\delta(N-\mathbf x_a^T\mathbf x_a)\right]
- \exp\left\{-Nn\beta\mu+N\hat\lambda(\mu-\lambda)+\frac{\sigma^2}{N}\left[\beta^2\sum_{ab}^n(\mathbf x_a^T\mathbf x_b)^2
+ \begin{aligned}
+ &e^{NG_{\lambda^*}(\mu)}
+ =\lim_{\beta\to\infty}\lim_{n\to0}\int d\hat\lambda\prod_{a=1}^n\left[d\mathbf x_a\,\delta(N-\mathbf x_a^T\mathbf x_a)\right] \\
+ &\hspace{10em}\exp\left\{N\left[\hat\lambda(\mu+\lambda^*)-n\beta\mu\right]+\frac{\sigma^2}{N}\left[\beta^2\sum_{ab}^n(\mathbf x_a^T\mathbf x_b)^2
-2\beta\hat\lambda\sum_a^n(\mathbf x_a^T\mathbf x_1)^2
+\hat\lambda^2N^2
\right]\right\}
+ \end{aligned}
\end{equation}
We make the Hubbard--Stratonovich transformation to the matrix field $Q_{ab}=\frac1N\mathbf x_a^T\mathbf x_b$. This gives
\begin{equation}
- e^{NG_\lambda(\mu)}
+ e^{NG_{\lambda^*}(\mu)}
=\lim_{\beta\to\infty}\lim_{n\to0}\int d\hat\lambda\,dQ\,
\exp N\left\{
- -n\beta\mu+\hat\lambda(\mu-\lambda)+\sigma^2\left[\beta^2\sum_{ab}^nQ_{ab}^2
- +-\beta\hat\lambda\sum_a^nQ_{1a}^2
+ \hat\lambda(\mu+\lambda^*)-n\beta\mu+\sigma^2\left[\beta^2\sum_{ab}^nQ_{ab}^2
+ -2\beta\hat\lambda\sum_a^nQ_{1a}^2
+\hat\lambda^2
\right]+\frac12\log\det Q\right\}
\end{equation}
@@ -196,49 +215,57 @@ and
\end{equation}
Inserting these expressions and taking the limit of $n$ to zero, we find
\begin{equation}
- e^{NG_\sigma(\mu)}=\lim_{\beta\to\infty}\int d\hat\lambda\,dq_0\,d\tilde q_0\,e^{N\mathcal S_\beta(q_0,\tilde q_0,\hat\lambda)}
+ e^{NG_{\lambda^*}(\mu)}=\lim_{\beta\to\infty}\int d\hat\lambda\,dq_0\,d\tilde q_0\,e^{N\mathcal S_\beta(q_0,\tilde q_0,\hat\lambda)}
\end{equation}
with the effective action
\begin{equation}
- \mathcal S_\beta(q_0,\tilde q_0,\hat\lambda)=\hat\lambda(\mu-\lambda)+\sigma^2\left[
- 2\beta^2(q_0^2-\tilde q_0^2)-2\beta\hat\lambda(1-\tilde q_0^2)+\hat\lambda^2
- \right]-\log(1-q_0)+\frac12\log(1-2q_0+\tilde q_0^2)
+ \begin{aligned}
+ &\mathcal S_\beta(q_0,\tilde q_0,\hat\lambda) \\
+ &\quad=\hat\lambda(\mu+\lambda^*)+\sigma^2\left[
+ 2\beta^2(q_0^2-\tilde q_0^2)-2\beta\hat\lambda(1-\tilde q_0^2)+\hat\lambda^2
+ \right] \\
+ &\qquad-\log(1-q_0)+\frac12\log(1-2q_0+\tilde q_0^2)
+ \end{aligned}
\end{equation}
We need to evaluate the integral above using the saddle point method, but in the limit of $\beta\to\infty$.
We expect the overlaps to concentrate on one as $\beta$ goes to infinity. We therefore take
\begin{align}
- q_0=1-y\beta^{-1}-z\beta^{-2}+O(\beta^{-3})
- &&
- \tilde q_0=1-\tilde y\beta^{-1}-\tilde z\beta^{-2}+O(\beta^{-3})
+ q_0&=1-y\beta^{-1}-z\beta^{-2}+O(\beta^{-3})
+ \\
+ \tilde q_0&=1-\tilde y\beta^{-1}-(z-\Delta z)\beta^{-2}+O(\beta^{-3})
\end{align}
However, taking the limit with $y\neq\tilde y$ results in an expression for the
action that diverges with $\beta$. To cure this, we must take $\tilde y=y$. The result is
\begin{equation}
- \mathcal S_\infty(y,z,\tilde z,\hat\lambda)
- =\hat\lambda(\mu-\lambda)+\sigma^2\big[
- \hat\lambda^2-4(y+z-\tilde z)
- \big]+\frac12\log\left(1+2\frac{z-\tilde z}{y^2}\right)
+ \begin{aligned}
+ \mathcal S_\infty(y,\Delta z,\hat\lambda)
+ &=\hat\lambda(\mu+\lambda^*)
+ +\sigma^2\big[
+ \hat\lambda^2-4(y+\Delta z)
+ \big] \\
+ &\qquad+\frac12\log\left(1+\frac{2\Delta z}{y^2}\right)
+ \end{aligned}
\end{equation}
Extremizing this action over the new parameters $y$, $\Delta z=z-\tilde z$, and $\hat\lambda$, we have
\begin{align}
- \hat\lambda=-\frac1\sigma\sqrt{\frac{(\mu-\lambda)^2}{(2\sigma)^2}-1}
+ \hat\lambda=-\frac1\sigma\sqrt{\frac{(\mu+\lambda^*)^2}{(2\sigma)^2}-1}
\\
- y=\frac1{2\sigma}\left(\frac{\mu-\lambda}{2\sigma}-\sqrt{\frac{(\mu-\lambda)^2}{(2\sigma)^2}-1}\right)
+ y=\frac1{2\sigma}\left(\frac{\mu+\lambda^*}{2\sigma}-\sqrt{\frac{(\mu+\lambda^*)^2}{(2\sigma)^2}-1}\right)
&\\
- \Delta z=\frac1{4\sigma^2}\left(1-\frac{\mu-\lambda}{2\sigma}\left(\frac{\mu-\lambda}{2\sigma}-\sqrt{\frac{(\mu-\lambda)^2}{(2\sigma)^2}-1}\right)\right)
+ \Delta z=\frac1{4\sigma^2}\left(1-\frac{\mu+\lambda^*}{2\sigma}\left(\frac{\mu+\lambda^*}{2\sigma}-\sqrt{\frac{(\mu+\lambda^*)^2}{(2\sigma)^2}-1}\right)\right)
\end{align}
Inserting this solution into $\mathcal S_\infty$ we find
\begin{equation}
\begin{aligned}
- &G_\lambda(\mu)
+ &G_{\lambda^*}(\mu)
=\mathop{\textrm{extremum}}_{y,\Delta z,\hat\lambda}\mathcal S_\infty(y,\Delta z,\hat\lambda) \\
- &=-\frac{\mu-\lambda}{2\sigma}\sqrt{\frac{(\mu-\lambda)^2}{(2\sigma)^2}-1}
+ &=-\tfrac{\mu+\lambda^*}{2\sigma}\sqrt{\Big(\tfrac{\mu+\lambda^*}{2\sigma}\Big)^2-1}
+\log\left(
- \frac{\mu-\lambda}{2\sigma}+\sqrt{\frac{(\mu-\lambda)^2}{(2\sigma)^2}-1}
+ \tfrac{\mu+\lambda^*}{2\sigma}+\sqrt{\Big(\tfrac{\mu+\lambda^*}{2\sigma}\Big)^2-1}
\right)
\end{aligned}
\end{equation}
-This function is plotted in Fig.~\ref{fig:large.dev}. For $\mu<2\sigma$ $G_\sigma(\mu)$ has an
+This function is plotted in Fig.~\ref{fig:large.dev}. For $\mu<2\sigma$ $G_{\lambda^*}(\mu)$ has an
imaginary part, which makes any additional integral over $\mu$ highly
oscillatory. This indicates that the existence of a marginal minimum for this
parameter value corresponds with a large deviation that grows faster than $N$,
@@ -248,14 +275,15 @@ deviations in order for the smallest eigenvalue to be zero. For
$\mu\geq2\sigma$ this function gives the large deviation function for the
probability of seeing a zero eigenvalue given the shift $\mu$.
$\mu=2\sigma$ is the maximum of the function with a real value, and
-corresponds to the intersection of the average spectrum with zero, i.e., a pseudogap.
+corresponds to the intersection of the average spectrum with zero, i.e., a
+pseudogap.
\begin{figure}
\includegraphics[width=\columnwidth]{figs/large_deviation.pdf}
\caption{
The large deviation function $G_\sigma(\mu)$ defined in
\eqref{eq:large.dev} as a function of the shift $\mu$ to the
- \textsc{goe} diagonal. As expected, $G_\sigma(2\sigma)=0$, while for
+ GOE diagonal. As expected, $G_\sigma(2\sigma)=0$, while for
$\mu>2\sigma$ it is negative and for $\mu<2\sigma$ it gains an
imaginary part.
} \label{fig:large.dev}
@@ -264,7 +292,7 @@ corresponds to the intersection of the average spectrum with zero, i.e., a pseud
Marginal spectra with a pseudogap and those with simple isolated eigenvalues
are qualitatively different, and more attention may be focused on the former.
Here, we see what appears to be a general heuristic for identifying the saddle
-parameters for which the spectrum is psedogapped: the equivalent of this
+parameters for which the spectrum is pseudogapped: the equivalent of this
large-deviation functions will lie on the singular boundary between a purely
real and complex value.
@@ -279,17 +307,28 @@ pseudogapped spectra, where the continuous part of the spectral density has a
lower bound at zero.
Fortunately, our calculation can be modified to ensure that we consider only
-psedogapped spectra. First, we insert a shift $\mu$ by hand into the `natural'
+pseudogapped spectra. First, we insert a shift $\mu$ by hand into the `natural'
spectrum of the problem at hand, conditioning the trace to have a specific
-value. Then, we choose this artificial shift so that the resulting conditioned
-spectra are pseudogapped. This we can do by looking for the point where the
-order parameter $\lambda$ associated with the marginal condition is zero.
+value $\mu=\operatorname{Tr}A$. Then, we choose this artificial shift so that
+the resulting conditioned spectra are pseudogapped. As seen the previous
+subsection, this can be done by starting from a sufficiently large $\mu$ and
+decreasing it until the calculation develops an imaginary part, signaling the
+breakdown of the large-deviation principle at order $N$.
+
+In isotropic or zero-signal landscapes, there is another way to condition on a
+pseudogap. In such landscapes, the typical spectrum does not have an isolated
+eigenvalue. Therefore, the condition associated with the bulk of the spectrum
+teaching zero, i.e., the pseudogap, will always correspond to the most common
+configuration. We can therefore choose $\mu=\mu_\textrm m$ such that
+\begin{equation}
+ 0=\frac\partial{\partial\lambda^*}G_{\lambda^*}(\mu_\mathrm m)\bigg|_{\lambda^*=0}
+\end{equation}
+In the previous problem, this corresponds precisely to $\mu_\mathrm m=2\sigma$,
+the correct marginal shift. Note that when we treat the Dirac $\delta$ function
+using its Fourier representation with auxiliary parameter $\hat\lambda$, as in
+the previous subsection, this condition corresponds with choosing $\mu$ such
+that $\hat\lambda=0$.
-What is the interpretation of this? In general the condition $\lambda=0$
-corresponds to a point where the conditioning does not change the volume
-measured by the integral. Therefore, the typical matrix with the value of $\mu$
-for which $\lambda=0$ has a zero eigenvalue. In isotropic problems where
-isolated eigenvalues in the spectrum are atypical, this implies a pseudogap.
\section{Marginal complexity in random landscapes}
@@ -321,19 +360,25 @@ the Jacobian of the argument to the $\delta$-function. It is usually more
interesting to condition the count on interesting properties of the stationary
points, like the energy,
\begin{equation}
- d\mu_H(\mathbf s,\pmb\omega\mid E)=d\mu_H(\mathbf s,\pmb\omega)\,\delta\big(NE-H(\mathbf s)\big)
+ \begin{aligned}
+ &d\mu_H(\mathbf s,\pmb\omega\mid E,\mu) \\
+ &\quad=d\mu_H(\mathbf s,\pmb\omega)\,
+ \delta\big(NE-H(\mathbf s)\big)
+ \,\delta\big(N\mu-\operatorname{Tr}\operatorname{Hess}H(\mathbf x,\pmb\omega)\big)
+ \end{aligned}
\end{equation}
In this paper we in particular want to exploit our method to condition
complexity on the marginality of stationary points. We therefore define the
number of marginal points in a particular instantiation $H$ as
+\begin{widetext}
\begin{equation}
\begin{aligned}
- &\mathcal N_{0}(E,\mu)
- =\int d\mu_H(\mathbf s,\pmb\omega\mid E,\mu)\,\delta\big(N\lambda_\mathrm{min}(\operatorname{Hess}H(\mathbf s,\pmb\omega))\big) \\
+ &\mathcal N_H(E,\mu,\lambda^*)
+ =\int d\mu_H(\mathbf s,\pmb\omega\mid E,\mu)\,\delta\big(N\lambda^*-\lambda_\mathrm{max}(\operatorname{Hess}H(\mathbf s,\pmb\omega))\big) \\
&=\lim_{\beta\to\infty}\int d\mu_H(\mathbf s,\pmb\omega\mid E,\mu)
\frac{d\mathbf x\,\delta(N-\mathbf x^T\mathbf x)\delta(\mathbf x^T\partial\mathbf g(\mathbf s))e^{\beta\mathbf x^T\operatorname{Hess}H(\mathbf s,\pmb\omega)\mathbf x}}
{\int d\mathbf x'\,\delta(N-\mathbf x'^T\mathbf x')\delta(\mathbf x'^T\partial\mathbf g(\mathbf s))e^{\beta\mathbf x'^T\operatorname{Hess}H(\mathbf s,\pmb\omega)\mathbf x'}}
- \delta\big(\mathbf x^T\operatorname{Hess}H(\mathbf s,\pmb\omega)\mathbf x\big)
+ \delta\big(N\lambda^*-\mathbf x^T\operatorname{Hess}H(\mathbf s,\pmb\omega)\mathbf x\big)
\end{aligned}
\end{equation}
where the $\delta$-functions
@@ -341,25 +386,28 @@ where the $\delta$-functions
\delta(\mathbf x^T\partial\mathbf g(\mathbf s))
=\prod_{s=1}^r\delta(\mathbf x^T\partial g_i(\mathbf s))
\end{equation}
-ensure that the integrals are constrained to the tangent space of the configuration manifold at the point $\mathbf s$. This likewise allows us to define the complexity of marginal points at energy $E$ as
+ensure that the integrals are constrained to the tangent space of the
+configuration manifold at the point $\mathbf s$. This likewise allows us to
+define the complexity of points with a specific energy, stability, and maximum eigenvalue as
\begin{equation}
- \Sigma_0(E,\mu)
- =\frac1N\overline{\log\mathcal N_0(E)}
+ \Sigma_{\lambda^*}(E,\mu)
+ =\frac1N\overline{\log\mathcal N_H(E,\mu,\lambda^*)}
\end{equation}
In practice, this can be computed by introducing replicas to treat the
logarithm ($\log x=\lim_{n\to0}\frac\partial{\partial n}x^n$) and replicating
again to treat each of the normalizations in the numerator. This leads to the expression
\begin{equation}
\begin{aligned}
- \Sigma_0(E,\mu)
- &=\lim_{\beta\to\infty}\lim_{n\to0}\frac1N\frac\partial{\partial n}\int\prod_{a=1}^n\Bigg[d\mu_H(\mathbf s_a,\pmb\omega_a\mid E,\mu)\,\delta\big((\mathbf x_a^1)^T\operatorname{Hess}H(\mathbf s_a,\pmb\omega_a)\mathbf x_a^1\big)\\
- &\qquad\times\lim_{m_a\to0}
+ \Sigma_{\lambda^*}(E,\mu)
+ &=\lim_{\beta\to\infty}\lim_{n\to0}\frac1N\frac\partial{\partial n}\int\prod_{a=1}^n\Bigg[d\mu_H(\mathbf s_a,\pmb\omega_a\mid E,\mu)\,\delta\big(N\lambda^*-(\mathbf x_a^1)^T\operatorname{Hess}H(\mathbf s_a,\pmb\omega_a)\mathbf x_a^1\big)\\
+ &\hspace{12em}\times\lim_{m_a\to0}
\left(\prod_{b=1}^{m_a} d\mathbf x_a^b\,\delta(N-(\mathbf x_a^b)^T\mathbf x_a^b)\delta((\mathbf x_a^b)^T\partial\mathbf g(\mathbf s_a))e^{\beta(\mathbf x_a^b)^T\operatorname{Hess}H(\mathbf s_a,\pmb\omega_a)\mathbf x_a^b}\right)\Bigg]
\end{aligned}
\end{equation}
-Finally, the \emph{marginal} complexity is given by fixing $\mu=\mu_\text{m}$ so that the complexity is stationary with respect to changes in the value of the minimum eigenvalue, or
+\end{widetext}
+Finally, the \emph{marginal} complexity is given by fixing $\mu=\mu_\text{m}$ so that the complexity is stationary with respect to changes in the value of the maximum eigenvalue, or
\begin{equation}
- 0=\frac\partial{\partial\lambda}\Sigma_\lambda(E,\mu_\text{m}(E))\bigg|_{\lambda=0}
+ 0=\frac\partial{\partial\lambda^*}\Sigma_{\lambda^*}(E,\mu_\text{m}(E))\bigg|_{\lambda^*=0}
\end{equation}
Finally, the marginal complexity is defined by evaluating the complexity conditioned on $\lambda_{\text{min}}=0$ at $\mu_\text{m}$,
\begin{equation}
@@ -367,7 +415,9 @@ Finally, the marginal complexity is defined by evaluating the complexity conditi
=\Sigma_0(E,\mu_\text m(E))
\end{equation}
-\subsection{Application to the spherical models}
+\section{Examples}
+
+\subsection{Spherical spin glasses}
\begin{align}
C_{ab}=\frac1N\mathbf s_a\cdot\mathbf s_b
@@ -383,6 +433,7 @@ Finally, the marginal complexity is defined by evaluating the complexity conditi
\hat X^c_{ab}=\frac1N\hat{\mathbf s}_a\cdot\mathbf x_b^c
\end{align}
+\begin{widetext}
\begin{equation}
\begin{aligned}
&\sum_{ab}^n\left[\beta\omega A_{aa}^{bb}+\hat x\omega A_{aa}^{11}+\beta^2f''(1)\sum_{cd}^m(A_{ab}^{cd})^2+\hat x^2f''(1)(A_{ab}^{11})^2+\beta\hat xf''(1)\sum_c^m A_{ab}^{1c}\right]\\
@@ -390,6 +441,7 @@ Finally, the marginal complexity is defined by evaluating the complexity conditi
+\log\det\begin{bmatrix}C&iR\\iR&D\end{bmatrix}-\log\det F
\end{aligned}
\end{equation}
+\end{widetext}
$X^a$ is $n\times m_a$, and $A^{ab}$ is $m_a\times m_b$.
\begin{equation}
@@ -416,7 +468,7 @@ We will discuss at the end of this paper when these order parameters can be expe
\end{equation}
where the maximum over $\omega$ needs to lie at a real value.
-\subsection{Twin spherical model}
+\subsection{Twin spherical spin glasses}
$\Omega=S^{N-1}\times S^{N-1}$
\begin{equation}
@@ -427,6 +479,7 @@ $\Omega=S^{N-1}\times S^{N-1}$
=Nf_s\left(\frac{\pmb\sigma_1\cdot\pmb\sigma_2}N\right)
\end{equation}
+\begin{widetext}
\begin{equation}
\mathcal S(C,R,D,W,\hat\beta,\omega)
=\frac12\frac1n
@@ -436,13 +489,15 @@ $\Omega=S^{N-1}\times S^{N-1}$
\end{equation}
\begin{equation}
- \mathcal S(C^{11},R^{11},D^{11},W^{11},\hat\beta)+\mathcal S(C^{22},R^{22},D^{22},W^{22},\hat\beta)
- -\epsilon(r_{12}+r_{21})-\omega_1(r^{11}_d-w^{11}_d)-\omega_2(r^{22}_d-w^{22}_d)+\hat\beta E
- +\frac12\log\det\begin{bmatrix}C^{11}&iR^{11}\\iR^{11}&D^{11}\end{bmatrix}
- +\frac12\log\det\left(
- \begin{bmatrix}C^{22}-q_{12}^2C^{11}&iR^{22}\\iR^{22}&D^{22}\end{bmatrix}
- \right)
- -\log\det(W^{11}W^{22}+W^{12}W^{21})
+ \begin{aligned}
+ &\mathcal S(C^{11},R^{11},D^{11},W^{11},\hat\beta)+\mathcal S(C^{22},R^{22},D^{22},W^{22},\hat\beta)
+ -\epsilon(r_{12}+r_{21})-\omega_1(r^{11}_d-w^{11}_d)-\omega_2(r^{22}_d-w^{22}_d)+\hat\beta E \\
+ &+\frac12\log\det\begin{bmatrix}C^{11}&iR^{11}\\iR^{11}&D^{11}\end{bmatrix}
+ +\frac12\log\det\left(
+ \begin{bmatrix}C^{22}-q_{12}^2C^{11}&iR^{22}\\iR^{22}&D^{22}\end{bmatrix}
+ \right)
+ -\log\det(W^{11}W^{22}+W^{12}W^{21})
+ \end{aligned}
\end{equation}
\begin{equation}
@@ -456,6 +511,7 @@ $\Omega=S^{N-1}\times S^{N-1}$
\end{bmatrix}
\end{aligned}
\end{equation}
+\end{widetext}
\begin{equation}
\log\det\begin{bmatrix}
Q^{11}&Q^{12}\\
@@ -464,7 +520,7 @@ $\Omega=S^{N-1}\times S^{N-1}$
+\log\det(Q^{11}Q^{22}-Q^{12}Q^{12})
\end{equation}
-\subsection{Nonlinear least squares}
+\subsection{Random nonlinear least squares}
In this subsection we consider perhaps the simplest example of a non-Gaussian
landscape: the problem of random nonlinear least squares optimization. Though,
@@ -570,15 +626,13 @@ which produces
\right\}
\end{equation}
\begin{equation}
- \log\det\left(
- \begin{bmatrix}
- (\hat\beta_a\delta_{ac}+G_{ac}^2f''(C_{ac}))f(C_{cb}) + R_{ab}f'(C_{ab})
- &
- \frac1N\left[(\hat\beta_a\delta_{ac}+G_{ac}^2f''(C_{ac}))f'(C_{cb})+R_{ab}f''(C_{ab})\right]\mathbf x_b^T-\frac1Nf'(C_{ab})\hat{\mathbf x}_a^T
- \\
- -i\hat{\mathbf x}_af(C_{ab})+\frac1N\hat\mu f'(C_{ab})\mathbf x_b
- \end{bmatrix}
- \right)
+ \begin{bmatrix}
+ (\hat\beta_a\delta_{ac}+G_{ac}^2f''(C_{ac}))f(C_{cb}) + R_{ab}f'(C_{ab})
+ &
+ \frac1N\left[(\hat\beta_a\delta_{ac}+G_{ac}^2f''(C_{ac}))f'(C_{cb})+R_{ab}f''(C_{ab})\right]\mathbf x_b^T-\frac1Nf'(C_{ab})\hat{\mathbf x}_a^T
+ \\
+ -i\hat{\mathbf x}_af(C_{ab})+\frac1N\hat\mu f'(C_{ab})\mathbf x_b
+ \end{bmatrix}
\end{equation}
The condition fixing the maximum eigenvalue adds to the integrand