From c10694ea25be6a6593765ad82a92d994ae02e18b Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Tue, 21 May 2024 16:33:51 +0200 Subject: Work on the train. --- marginal.bib | 58 ++++++++-------- marginal.tex | 222 +++++++++++++++++++++++++++++++++++++---------------------- 2 files changed, 167 insertions(+), 113 deletions(-) diff --git a/marginal.bib b/marginal.bib index b51659e..9f89910 100644 --- a/marginal.bib +++ b/marginal.bib @@ -27,6 +27,35 @@ doi = {10.21468/scipostphys.15.3.109} } +@article{Fyodorov_2020_Counting, + author = {Fyodorov, Y. V. and Tublin, R.}, + title = {Counting Stationary Points of the Loss Function in the Simplest Constrained Least-square Optimization}, + journal = {Acta Physica Polonica B}, + publisher = {Jagiellonian University}, + year = {2020}, + number = {7}, + volume = {51}, + pages = {1663}, + url = {http://dx.doi.org/10.5506/APhysPolB.51.1663}, + doi = {10.5506/aphyspolb.51.1663}, + issn = {1509-5770} +} + +@article{Fyodorov_2022_Optimization, + author = {Fyodorov, Yan V and Tublin, Rashel}, + title = {Optimization landscape in the simplest constrained random least-square problem}, + journal = {Journal of Physics A: Mathematical and Theoretical}, + publisher = {IOP Publishing}, + year = {2022}, + month = {May}, + number = {24}, + volume = {55}, + pages = {244008}, + url = {http://dx.doi.org/10.1088/1751-8121/ac6d8e}, + doi = {10.1088/1751-8121/ac6d8e}, + issn = {1751-8121} +} + @article{Ikeda_2023_Bose-Einstein-like, author = {Ikeda, Harukuni}, title = {{Bose}--{Einstein}-like condensation of deformed random matrix: a replica approach}, @@ -70,35 +99,6 @@ doi = {10.1103/physrevlett.75.2847} } -@article{Fyodorov_2020_Counting, - author = {Fyodorov, Y.V. and Tublin, R.}, - title = {Counting Stationary Points of the Loss Function in the Simplest Constrained Least-square Optimization}, - journal = {Acta Physica Polonica B}, - publisher = {Jagiellonian University}, - year = {2020}, - number = {7}, - volume = {51}, - pages = {1663}, - url = {http://dx.doi.org/10.5506/APhysPolB.51.1663}, - doi = {10.5506/aphyspolb.51.1663}, - issn = {1509-5770} -} - -@article{Fyodorov_2022_Optimization, - author = {Fyodorov, Yan V and Tublin, Rashel}, - title = {Optimization landscape in the simplest constrained random least-square problem}, - journal = {Journal of Physics A: Mathematical and Theoretical}, - publisher = {IOP Publishing}, - year = {2022}, - month = {May}, - number = {24}, - volume = {55}, - pages = {244008}, - url = {http://dx.doi.org/10.1088/1751-8121/ac6d8e}, - doi = {10.1088/1751-8121/ac6d8e}, - issn = {1751-8121} -} - @phdthesis{Tublin_2022_A, author = {Tublin, Rashel}, title = {A Few Results in Random Matrix Theory and Random Optimization}, diff --git a/marginal.tex b/marginal.tex index ec28568..5b73f9e 100644 --- a/marginal.tex +++ b/marginal.tex @@ -4,15 +4,8 @@ \usepackage[T1]{fontenc} \usepackage{amsmath,amssymb,latexsym,graphicx} \usepackage{newtxtext,newtxmath} -\usepackage{bbold} +\usepackage{bbold,anyfontsize} \usepackage[dvipsnames]{xcolor} -\usepackage[ - colorlinks=true, - urlcolor=MidnightBlue, - citecolor=MidnightBlue, - filecolor=MidnightBlue, - linkcolor=MidnightBlue -]{hyperref} \begin{document} @@ -24,6 +17,18 @@ \affiliation{Istituto Nazionale di Fisica Nucleare, Sezione di Roma I, Rome, Italy 00184} \begin{abstract} + Marginal optima are minima or maxima of a function with many asymptotically + flat directions. In settings with many competing optima, marginal ones tend + to attract algorithms and physical dynamics. Often, the important family of + marginal attractors are a vanishing minority compared with nonmarginal optima + and other unstable stationary points. We introduce a generic technique for + conditioning the statistics of stationary points on their marginality, and + apply it in three isotropic settings with different typical forms for the + Hessian at optima: in the spherical spin-glasses, where the Hessian is GOE; + in a multispherical spin glasses, which are Gaussian but non-GOE; and in a + model of random nonlinear sum of squares, which is non-Gaussian. In these + problems we are able to fully characterize the distribution of marginal + optima in the landscape, including when they are in the minority. \end{abstract} \maketitle @@ -79,7 +84,7 @@ stationary points be zero, we restrict to marginal minima, either those with a pseudogap in their bulk spectrum or those with outlying eigenvectors. We provide a heuristic to distinguish these two cases. We demonstrate the method on the spherical models, where it is unnecessary but instructive, and on -extensions of the spherical models with non-\textsc{goe} Hessians where the technique is +extensions of the spherical models with non-GOE Hessians where the technique is more useful. \section{Conditioning on the smallest eigenvalue} @@ -88,7 +93,7 @@ more useful. An arbitrary function $g$ of the minimum eigenvalue of a matrix $A$ can be expressed as -\begin{equation} +\begin{equation} \label{eq:λmax} g(\lambda_\textrm{max}(A)) =\lim_{\beta\to\infty}\int \frac{d\mathbf s\,\delta(N-\mathbf s^T\mathbf s)e^{\beta\mathbf s^TA\mathbf s}} @@ -128,8 +133,8 @@ minimum eigenvalue is zero. We demonstrate the efficacy of the technique by rederiving a well-known result: the large-deviation function for pulling an eigenvalue from the bulk of the -\textsc{goe} spectrum. -Consider an ensemble of $N\times N$ matrices $A=B+\mu I$ for $B$ drawn from the \textsc{goe} ensemble with entries +GOE spectrum. +Consider an ensemble of $N\times N$ matrices $A=B+\mu I$ for $B$ drawn from the GOE ensemble with entries whose variance is $\sigma^2/N$. We know that the bulk spectrum of $A$ is a Wigner semicircle with radius $2\sigma$ shifted by a constant $\mu$. Therefore, for $\mu=2\sigma$, the minimum eigenvalue will typically be zero, @@ -141,37 +146,51 @@ eigenvalues, would be necessary. This final case cannot be quantified by this method, but instead the nonexistence of a large deviation linear in $N$ appears as the emergence of an imaginary part in the function. -\begin{widetext} As an example, we compute \begin{equation} \label{eq:large.dev} - e^{NG_\lambda(\mu)}=P_{\lambda_\mathrm{min}(B+\mu I)=\lambda}=\overline{\lim_{\beta\to\infty}\int\frac{d\mathbf x\,\delta(N-\mathbf x^T\mathbf x)e^{-\beta\mathbf x^T(B+\mu I)\mathbf x}}{\int d\mathbf x'\,\delta(N-\mathbf x'^T\mathbf x')e^{-\beta\mathbf x'^T(B+\mu I)\mathbf x'}}\,\delta\big(\mathbf x^T(B+\mu I)\mathbf x-N\lambda\big)} + e^{NG_\lambda^*(\mu)} + =P_{\lambda_\mathrm{max}(B-\mu I)=\lambda^*} + =\overline{\delta\big(N\lambda^*-N\lambda_\mathrm{max}(B-\mu I)\big)} \end{equation} where the overline is the average over $B$, and we have defined the large -deviation function $G_\sigma(\mu)$. Using replicas to treat the denominator ($x^{-1}=\lim_{n\to0}x^{n-1}$) +deviation function $G_\sigma(\mu)$. +Using the representation of $\lambda_\mathrm{max}$ defined in \eqref{eq:λmax}, we have +\begin{widetext} +\begin{equation} + e^{NG_{\lambda^*}(\mu)} + =\overline{ + \lim_{\beta\to\infty}\int\frac{d\mathbf x\,\delta(N-\mathbf x^T\mathbf x)e^{\beta\mathbf x^T(B-\mu I)\mathbf x}} + {\int d\mathbf x'\,\delta(N-\mathbf x'^T\mathbf x')e^{\beta\mathbf x'^T(B-\mu I)\mathbf x'}}\,\delta\big(N\lambda^*-\mathbf x^T(B-\mu I)\mathbf x\big) + } +\end{equation} +Using replicas to treat the denominator ($x^{-1}=\lim_{n\to0}x^{n-1}$) and transforming the $\delta$-function to its Fourier representation, we have \begin{equation} - e^{NG_\lambda(\mu)}=\overline{\lim_{\beta\to\infty}\lim_{n\to0}\int d\hat\lambda\prod_{a=1}^n\left[d\mathbf x_a\,\delta(N-\mathbf x_a^T\mathbf x_a)\right] - \exp\left\{-\beta\sum_{a=1}^n\mathbf x_a^T(B+\mu I)\mathbf x_a+\hat\lambda\mathbf x_1^T(B+\mu I)\mathbf x_1-N\hat\lambda\lambda\right\}} + e^{NG_{\lambda^*}(\mu)} + =\overline{\lim_{\beta\to\infty}\lim_{n\to0}\int d\hat\lambda\prod_{a=1}^n\left[d\mathbf x_a\,\delta(N-\mathbf x_a^T\mathbf x_a)\right] + \exp\left\{\beta\sum_{a=1}^n\mathbf x_a^T(B-\mu I)\mathbf x_a+\hat\lambda\left[N\lambda^*-\mathbf x_1^T(B-\mu I)\mathbf x_1\right]\right\}} \end{equation} having introduced the parameter $\hat\lambda$ in the Fourier representation of the $\delta$-function. The whole expression, so transformed, is a simple exponential integral linear in the matrix $B$. Taking the average over $B$, we have \begin{equation} - e^{NG_\lambda(\mu)} - =\lim_{\beta\to\infty}\lim_{n\to0}\int d\hat\lambda\prod_{a=1}^n\left[d\mathbf x_a\,\delta(N-\mathbf x_a^T\mathbf x_a)\right] - \exp\left\{-Nn\beta\mu+N\hat\lambda(\mu-\lambda)+\frac{\sigma^2}{N}\left[\beta^2\sum_{ab}^n(\mathbf x_a^T\mathbf x_b)^2 + \begin{aligned} + &e^{NG_{\lambda^*}(\mu)} + =\lim_{\beta\to\infty}\lim_{n\to0}\int d\hat\lambda\prod_{a=1}^n\left[d\mathbf x_a\,\delta(N-\mathbf x_a^T\mathbf x_a)\right] \\ + &\hspace{10em}\exp\left\{N\left[\hat\lambda(\mu+\lambda^*)-n\beta\mu\right]+\frac{\sigma^2}{N}\left[\beta^2\sum_{ab}^n(\mathbf x_a^T\mathbf x_b)^2 -2\beta\hat\lambda\sum_a^n(\mathbf x_a^T\mathbf x_1)^2 +\hat\lambda^2N^2 \right]\right\} + \end{aligned} \end{equation} We make the Hubbard--Stratonovich transformation to the matrix field $Q_{ab}=\frac1N\mathbf x_a^T\mathbf x_b$. This gives \begin{equation} - e^{NG_\lambda(\mu)} + e^{NG_{\lambda^*}(\mu)} =\lim_{\beta\to\infty}\lim_{n\to0}\int d\hat\lambda\,dQ\, \exp N\left\{ - -n\beta\mu+\hat\lambda(\mu-\lambda)+\sigma^2\left[\beta^2\sum_{ab}^nQ_{ab}^2 - +-\beta\hat\lambda\sum_a^nQ_{1a}^2 + \hat\lambda(\mu+\lambda^*)-n\beta\mu+\sigma^2\left[\beta^2\sum_{ab}^nQ_{ab}^2 + -2\beta\hat\lambda\sum_a^nQ_{1a}^2 +\hat\lambda^2 \right]+\frac12\log\det Q\right\} \end{equation} @@ -196,49 +215,57 @@ and \end{equation} Inserting these expressions and taking the limit of $n$ to zero, we find \begin{equation} - e^{NG_\sigma(\mu)}=\lim_{\beta\to\infty}\int d\hat\lambda\,dq_0\,d\tilde q_0\,e^{N\mathcal S_\beta(q_0,\tilde q_0,\hat\lambda)} + e^{NG_{\lambda^*}(\mu)}=\lim_{\beta\to\infty}\int d\hat\lambda\,dq_0\,d\tilde q_0\,e^{N\mathcal S_\beta(q_0,\tilde q_0,\hat\lambda)} \end{equation} with the effective action \begin{equation} - \mathcal S_\beta(q_0,\tilde q_0,\hat\lambda)=\hat\lambda(\mu-\lambda)+\sigma^2\left[ - 2\beta^2(q_0^2-\tilde q_0^2)-2\beta\hat\lambda(1-\tilde q_0^2)+\hat\lambda^2 - \right]-\log(1-q_0)+\frac12\log(1-2q_0+\tilde q_0^2) + \begin{aligned} + &\mathcal S_\beta(q_0,\tilde q_0,\hat\lambda) \\ + &\quad=\hat\lambda(\mu+\lambda^*)+\sigma^2\left[ + 2\beta^2(q_0^2-\tilde q_0^2)-2\beta\hat\lambda(1-\tilde q_0^2)+\hat\lambda^2 + \right] \\ + &\qquad-\log(1-q_0)+\frac12\log(1-2q_0+\tilde q_0^2) + \end{aligned} \end{equation} We need to evaluate the integral above using the saddle point method, but in the limit of $\beta\to\infty$. We expect the overlaps to concentrate on one as $\beta$ goes to infinity. We therefore take \begin{align} - q_0=1-y\beta^{-1}-z\beta^{-2}+O(\beta^{-3}) - && - \tilde q_0=1-\tilde y\beta^{-1}-\tilde z\beta^{-2}+O(\beta^{-3}) + q_0&=1-y\beta^{-1}-z\beta^{-2}+O(\beta^{-3}) + \\ + \tilde q_0&=1-\tilde y\beta^{-1}-(z-\Delta z)\beta^{-2}+O(\beta^{-3}) \end{align} However, taking the limit with $y\neq\tilde y$ results in an expression for the action that diverges with $\beta$. To cure this, we must take $\tilde y=y$. The result is \begin{equation} - \mathcal S_\infty(y,z,\tilde z,\hat\lambda) - =\hat\lambda(\mu-\lambda)+\sigma^2\big[ - \hat\lambda^2-4(y+z-\tilde z) - \big]+\frac12\log\left(1+2\frac{z-\tilde z}{y^2}\right) + \begin{aligned} + \mathcal S_\infty(y,\Delta z,\hat\lambda) + &=\hat\lambda(\mu+\lambda^*) + +\sigma^2\big[ + \hat\lambda^2-4(y+\Delta z) + \big] \\ + &\qquad+\frac12\log\left(1+\frac{2\Delta z}{y^2}\right) + \end{aligned} \end{equation} Extremizing this action over the new parameters $y$, $\Delta z=z-\tilde z$, and $\hat\lambda$, we have \begin{align} - \hat\lambda=-\frac1\sigma\sqrt{\frac{(\mu-\lambda)^2}{(2\sigma)^2}-1} + \hat\lambda=-\frac1\sigma\sqrt{\frac{(\mu+\lambda^*)^2}{(2\sigma)^2}-1} \\ - y=\frac1{2\sigma}\left(\frac{\mu-\lambda}{2\sigma}-\sqrt{\frac{(\mu-\lambda)^2}{(2\sigma)^2}-1}\right) + y=\frac1{2\sigma}\left(\frac{\mu+\lambda^*}{2\sigma}-\sqrt{\frac{(\mu+\lambda^*)^2}{(2\sigma)^2}-1}\right) &\\ - \Delta z=\frac1{4\sigma^2}\left(1-\frac{\mu-\lambda}{2\sigma}\left(\frac{\mu-\lambda}{2\sigma}-\sqrt{\frac{(\mu-\lambda)^2}{(2\sigma)^2}-1}\right)\right) + \Delta z=\frac1{4\sigma^2}\left(1-\frac{\mu+\lambda^*}{2\sigma}\left(\frac{\mu+\lambda^*}{2\sigma}-\sqrt{\frac{(\mu+\lambda^*)^2}{(2\sigma)^2}-1}\right)\right) \end{align} Inserting this solution into $\mathcal S_\infty$ we find \begin{equation} \begin{aligned} - &G_\lambda(\mu) + &G_{\lambda^*}(\mu) =\mathop{\textrm{extremum}}_{y,\Delta z,\hat\lambda}\mathcal S_\infty(y,\Delta z,\hat\lambda) \\ - &=-\frac{\mu-\lambda}{2\sigma}\sqrt{\frac{(\mu-\lambda)^2}{(2\sigma)^2}-1} + &=-\tfrac{\mu+\lambda^*}{2\sigma}\sqrt{\Big(\tfrac{\mu+\lambda^*}{2\sigma}\Big)^2-1} +\log\left( - \frac{\mu-\lambda}{2\sigma}+\sqrt{\frac{(\mu-\lambda)^2}{(2\sigma)^2}-1} + \tfrac{\mu+\lambda^*}{2\sigma}+\sqrt{\Big(\tfrac{\mu+\lambda^*}{2\sigma}\Big)^2-1} \right) \end{aligned} \end{equation} -This function is plotted in Fig.~\ref{fig:large.dev}. For $\mu<2\sigma$ $G_\sigma(\mu)$ has an +This function is plotted in Fig.~\ref{fig:large.dev}. For $\mu<2\sigma$ $G_{\lambda^*}(\mu)$ has an imaginary part, which makes any additional integral over $\mu$ highly oscillatory. This indicates that the existence of a marginal minimum for this parameter value corresponds with a large deviation that grows faster than $N$, @@ -248,14 +275,15 @@ deviations in order for the smallest eigenvalue to be zero. For $\mu\geq2\sigma$ this function gives the large deviation function for the probability of seeing a zero eigenvalue given the shift $\mu$. $\mu=2\sigma$ is the maximum of the function with a real value, and -corresponds to the intersection of the average spectrum with zero, i.e., a pseudogap. +corresponds to the intersection of the average spectrum with zero, i.e., a +pseudogap. \begin{figure} \includegraphics[width=\columnwidth]{figs/large_deviation.pdf} \caption{ The large deviation function $G_\sigma(\mu)$ defined in \eqref{eq:large.dev} as a function of the shift $\mu$ to the - \textsc{goe} diagonal. As expected, $G_\sigma(2\sigma)=0$, while for + GOE diagonal. As expected, $G_\sigma(2\sigma)=0$, while for $\mu>2\sigma$ it is negative and for $\mu<2\sigma$ it gains an imaginary part. } \label{fig:large.dev} @@ -264,7 +292,7 @@ corresponds to the intersection of the average spectrum with zero, i.e., a pseud Marginal spectra with a pseudogap and those with simple isolated eigenvalues are qualitatively different, and more attention may be focused on the former. Here, we see what appears to be a general heuristic for identifying the saddle -parameters for which the spectrum is psedogapped: the equivalent of this +parameters for which the spectrum is pseudogapped: the equivalent of this large-deviation functions will lie on the singular boundary between a purely real and complex value. @@ -279,17 +307,28 @@ pseudogapped spectra, where the continuous part of the spectral density has a lower bound at zero. Fortunately, our calculation can be modified to ensure that we consider only -psedogapped spectra. First, we insert a shift $\mu$ by hand into the `natural' +pseudogapped spectra. First, we insert a shift $\mu$ by hand into the `natural' spectrum of the problem at hand, conditioning the trace to have a specific -value. Then, we choose this artificial shift so that the resulting conditioned -spectra are pseudogapped. This we can do by looking for the point where the -order parameter $\lambda$ associated with the marginal condition is zero. +value $\mu=\operatorname{Tr}A$. Then, we choose this artificial shift so that +the resulting conditioned spectra are pseudogapped. As seen the previous +subsection, this can be done by starting from a sufficiently large $\mu$ and +decreasing it until the calculation develops an imaginary part, signaling the +breakdown of the large-deviation principle at order $N$. + +In isotropic or zero-signal landscapes, there is another way to condition on a +pseudogap. In such landscapes, the typical spectrum does not have an isolated +eigenvalue. Therefore, the condition associated with the bulk of the spectrum +teaching zero, i.e., the pseudogap, will always correspond to the most common +configuration. We can therefore choose $\mu=\mu_\textrm m$ such that +\begin{equation} + 0=\frac\partial{\partial\lambda^*}G_{\lambda^*}(\mu_\mathrm m)\bigg|_{\lambda^*=0} +\end{equation} +In the previous problem, this corresponds precisely to $\mu_\mathrm m=2\sigma$, +the correct marginal shift. Note that when we treat the Dirac $\delta$ function +using its Fourier representation with auxiliary parameter $\hat\lambda$, as in +the previous subsection, this condition corresponds with choosing $\mu$ such +that $\hat\lambda=0$. -What is the interpretation of this? In general the condition $\lambda=0$ -corresponds to a point where the conditioning does not change the volume -measured by the integral. Therefore, the typical matrix with the value of $\mu$ -for which $\lambda=0$ has a zero eigenvalue. In isotropic problems where -isolated eigenvalues in the spectrum are atypical, this implies a pseudogap. \section{Marginal complexity in random landscapes} @@ -321,19 +360,25 @@ the Jacobian of the argument to the $\delta$-function. It is usually more interesting to condition the count on interesting properties of the stationary points, like the energy, \begin{equation} - d\mu_H(\mathbf s,\pmb\omega\mid E)=d\mu_H(\mathbf s,\pmb\omega)\,\delta\big(NE-H(\mathbf s)\big) + \begin{aligned} + &d\mu_H(\mathbf s,\pmb\omega\mid E,\mu) \\ + &\quad=d\mu_H(\mathbf s,\pmb\omega)\, + \delta\big(NE-H(\mathbf s)\big) + \,\delta\big(N\mu-\operatorname{Tr}\operatorname{Hess}H(\mathbf x,\pmb\omega)\big) + \end{aligned} \end{equation} In this paper we in particular want to exploit our method to condition complexity on the marginality of stationary points. We therefore define the number of marginal points in a particular instantiation $H$ as +\begin{widetext} \begin{equation} \begin{aligned} - &\mathcal N_{0}(E,\mu) - =\int d\mu_H(\mathbf s,\pmb\omega\mid E,\mu)\,\delta\big(N\lambda_\mathrm{min}(\operatorname{Hess}H(\mathbf s,\pmb\omega))\big) \\ + &\mathcal N_H(E,\mu,\lambda^*) + =\int d\mu_H(\mathbf s,\pmb\omega\mid E,\mu)\,\delta\big(N\lambda^*-\lambda_\mathrm{max}(\operatorname{Hess}H(\mathbf s,\pmb\omega))\big) \\ &=\lim_{\beta\to\infty}\int d\mu_H(\mathbf s,\pmb\omega\mid E,\mu) \frac{d\mathbf x\,\delta(N-\mathbf x^T\mathbf x)\delta(\mathbf x^T\partial\mathbf g(\mathbf s))e^{\beta\mathbf x^T\operatorname{Hess}H(\mathbf s,\pmb\omega)\mathbf x}} {\int d\mathbf x'\,\delta(N-\mathbf x'^T\mathbf x')\delta(\mathbf x'^T\partial\mathbf g(\mathbf s))e^{\beta\mathbf x'^T\operatorname{Hess}H(\mathbf s,\pmb\omega)\mathbf x'}} - \delta\big(\mathbf x^T\operatorname{Hess}H(\mathbf s,\pmb\omega)\mathbf x\big) + \delta\big(N\lambda^*-\mathbf x^T\operatorname{Hess}H(\mathbf s,\pmb\omega)\mathbf x\big) \end{aligned} \end{equation} where the $\delta$-functions @@ -341,25 +386,28 @@ where the $\delta$-functions \delta(\mathbf x^T\partial\mathbf g(\mathbf s)) =\prod_{s=1}^r\delta(\mathbf x^T\partial g_i(\mathbf s)) \end{equation} -ensure that the integrals are constrained to the tangent space of the configuration manifold at the point $\mathbf s$. This likewise allows us to define the complexity of marginal points at energy $E$ as +ensure that the integrals are constrained to the tangent space of the +configuration manifold at the point $\mathbf s$. This likewise allows us to +define the complexity of points with a specific energy, stability, and maximum eigenvalue as \begin{equation} - \Sigma_0(E,\mu) - =\frac1N\overline{\log\mathcal N_0(E)} + \Sigma_{\lambda^*}(E,\mu) + =\frac1N\overline{\log\mathcal N_H(E,\mu,\lambda^*)} \end{equation} In practice, this can be computed by introducing replicas to treat the logarithm ($\log x=\lim_{n\to0}\frac\partial{\partial n}x^n$) and replicating again to treat each of the normalizations in the numerator. This leads to the expression \begin{equation} \begin{aligned} - \Sigma_0(E,\mu) - &=\lim_{\beta\to\infty}\lim_{n\to0}\frac1N\frac\partial{\partial n}\int\prod_{a=1}^n\Bigg[d\mu_H(\mathbf s_a,\pmb\omega_a\mid E,\mu)\,\delta\big((\mathbf x_a^1)^T\operatorname{Hess}H(\mathbf s_a,\pmb\omega_a)\mathbf x_a^1\big)\\ - &\qquad\times\lim_{m_a\to0} + \Sigma_{\lambda^*}(E,\mu) + &=\lim_{\beta\to\infty}\lim_{n\to0}\frac1N\frac\partial{\partial n}\int\prod_{a=1}^n\Bigg[d\mu_H(\mathbf s_a,\pmb\omega_a\mid E,\mu)\,\delta\big(N\lambda^*-(\mathbf x_a^1)^T\operatorname{Hess}H(\mathbf s_a,\pmb\omega_a)\mathbf x_a^1\big)\\ + &\hspace{12em}\times\lim_{m_a\to0} \left(\prod_{b=1}^{m_a} d\mathbf x_a^b\,\delta(N-(\mathbf x_a^b)^T\mathbf x_a^b)\delta((\mathbf x_a^b)^T\partial\mathbf g(\mathbf s_a))e^{\beta(\mathbf x_a^b)^T\operatorname{Hess}H(\mathbf s_a,\pmb\omega_a)\mathbf x_a^b}\right)\Bigg] \end{aligned} \end{equation} -Finally, the \emph{marginal} complexity is given by fixing $\mu=\mu_\text{m}$ so that the complexity is stationary with respect to changes in the value of the minimum eigenvalue, or +\end{widetext} +Finally, the \emph{marginal} complexity is given by fixing $\mu=\mu_\text{m}$ so that the complexity is stationary with respect to changes in the value of the maximum eigenvalue, or \begin{equation} - 0=\frac\partial{\partial\lambda}\Sigma_\lambda(E,\mu_\text{m}(E))\bigg|_{\lambda=0} + 0=\frac\partial{\partial\lambda^*}\Sigma_{\lambda^*}(E,\mu_\text{m}(E))\bigg|_{\lambda^*=0} \end{equation} Finally, the marginal complexity is defined by evaluating the complexity conditioned on $\lambda_{\text{min}}=0$ at $\mu_\text{m}$, \begin{equation} @@ -367,7 +415,9 @@ Finally, the marginal complexity is defined by evaluating the complexity conditi =\Sigma_0(E,\mu_\text m(E)) \end{equation} -\subsection{Application to the spherical models} +\section{Examples} + +\subsection{Spherical spin glasses} \begin{align} C_{ab}=\frac1N\mathbf s_a\cdot\mathbf s_b @@ -383,6 +433,7 @@ Finally, the marginal complexity is defined by evaluating the complexity conditi \hat X^c_{ab}=\frac1N\hat{\mathbf s}_a\cdot\mathbf x_b^c \end{align} +\begin{widetext} \begin{equation} \begin{aligned} &\sum_{ab}^n\left[\beta\omega A_{aa}^{bb}+\hat x\omega A_{aa}^{11}+\beta^2f''(1)\sum_{cd}^m(A_{ab}^{cd})^2+\hat x^2f''(1)(A_{ab}^{11})^2+\beta\hat xf''(1)\sum_c^m A_{ab}^{1c}\right]\\ @@ -390,6 +441,7 @@ Finally, the marginal complexity is defined by evaluating the complexity conditi +\log\det\begin{bmatrix}C&iR\\iR&D\end{bmatrix}-\log\det F \end{aligned} \end{equation} +\end{widetext} $X^a$ is $n\times m_a$, and $A^{ab}$ is $m_a\times m_b$. \begin{equation} @@ -416,7 +468,7 @@ We will discuss at the end of this paper when these order parameters can be expe \end{equation} where the maximum over $\omega$ needs to lie at a real value. -\subsection{Twin spherical model} +\subsection{Twin spherical spin glasses} $\Omega=S^{N-1}\times S^{N-1}$ \begin{equation} @@ -427,6 +479,7 @@ $\Omega=S^{N-1}\times S^{N-1}$ =Nf_s\left(\frac{\pmb\sigma_1\cdot\pmb\sigma_2}N\right) \end{equation} +\begin{widetext} \begin{equation} \mathcal S(C,R,D,W,\hat\beta,\omega) =\frac12\frac1n @@ -436,13 +489,15 @@ $\Omega=S^{N-1}\times S^{N-1}$ \end{equation} \begin{equation} - \mathcal S(C^{11},R^{11},D^{11},W^{11},\hat\beta)+\mathcal S(C^{22},R^{22},D^{22},W^{22},\hat\beta) - -\epsilon(r_{12}+r_{21})-\omega_1(r^{11}_d-w^{11}_d)-\omega_2(r^{22}_d-w^{22}_d)+\hat\beta E - +\frac12\log\det\begin{bmatrix}C^{11}&iR^{11}\\iR^{11}&D^{11}\end{bmatrix} - +\frac12\log\det\left( - \begin{bmatrix}C^{22}-q_{12}^2C^{11}&iR^{22}\\iR^{22}&D^{22}\end{bmatrix} - \right) - -\log\det(W^{11}W^{22}+W^{12}W^{21}) + \begin{aligned} + &\mathcal S(C^{11},R^{11},D^{11},W^{11},\hat\beta)+\mathcal S(C^{22},R^{22},D^{22},W^{22},\hat\beta) + -\epsilon(r_{12}+r_{21})-\omega_1(r^{11}_d-w^{11}_d)-\omega_2(r^{22}_d-w^{22}_d)+\hat\beta E \\ + &+\frac12\log\det\begin{bmatrix}C^{11}&iR^{11}\\iR^{11}&D^{11}\end{bmatrix} + +\frac12\log\det\left( + \begin{bmatrix}C^{22}-q_{12}^2C^{11}&iR^{22}\\iR^{22}&D^{22}\end{bmatrix} + \right) + -\log\det(W^{11}W^{22}+W^{12}W^{21}) + \end{aligned} \end{equation} \begin{equation} @@ -456,6 +511,7 @@ $\Omega=S^{N-1}\times S^{N-1}$ \end{bmatrix} \end{aligned} \end{equation} +\end{widetext} \begin{equation} \log\det\begin{bmatrix} Q^{11}&Q^{12}\\ @@ -464,7 +520,7 @@ $\Omega=S^{N-1}\times S^{N-1}$ +\log\det(Q^{11}Q^{22}-Q^{12}Q^{12}) \end{equation} -\subsection{Nonlinear least squares} +\subsection{Random nonlinear least squares} In this subsection we consider perhaps the simplest example of a non-Gaussian landscape: the problem of random nonlinear least squares optimization. Though, @@ -570,15 +626,13 @@ which produces \right\} \end{equation} \begin{equation} - \log\det\left( - \begin{bmatrix} - (\hat\beta_a\delta_{ac}+G_{ac}^2f''(C_{ac}))f(C_{cb}) + R_{ab}f'(C_{ab}) - & - \frac1N\left[(\hat\beta_a\delta_{ac}+G_{ac}^2f''(C_{ac}))f'(C_{cb})+R_{ab}f''(C_{ab})\right]\mathbf x_b^T-\frac1Nf'(C_{ab})\hat{\mathbf x}_a^T - \\ - -i\hat{\mathbf x}_af(C_{ab})+\frac1N\hat\mu f'(C_{ab})\mathbf x_b - \end{bmatrix} - \right) + \begin{bmatrix} + (\hat\beta_a\delta_{ac}+G_{ac}^2f''(C_{ac}))f(C_{cb}) + R_{ab}f'(C_{ab}) + & + \frac1N\left[(\hat\beta_a\delta_{ac}+G_{ac}^2f''(C_{ac}))f'(C_{cb})+R_{ab}f''(C_{ab})\right]\mathbf x_b^T-\frac1Nf'(C_{ab})\hat{\mathbf x}_a^T + \\ + -i\hat{\mathbf x}_af(C_{ab})+\frac1N\hat\mu f'(C_{ab})\mathbf x_b + \end{bmatrix} \end{equation} The condition fixing the maximum eigenvalue adds to the integrand -- cgit v1.2.3-70-g09d2