From 7f64d0736d8b3b022a66d070e54c990a782630ce Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Thu, 23 May 2024 17:12:36 +0200 Subject: Some changes to reference list. --- marginal.tex | 234 +++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 194 insertions(+), 40 deletions(-) (limited to 'marginal.tex') diff --git a/marginal.tex b/marginal.tex index 5b73f9e..83d9d25 100644 --- a/marginal.tex +++ b/marginal.tex @@ -47,7 +47,7 @@ stuck exploring only a subset of configurations. In some simple models of such landscapes, it was recently found that marginal minima are significant as the attractors of gradient descent dynamics \cite{Folena_2020_Rethinking, Folena_2023_On}. This extends to more novel -algorithms, like message passing \cite{} \textbf{Find out if this is true}. +algorithms, like message passing \cite{Add_me} \textbf{Find out if this is true}. \textbf{Think of other examples.} While it is still not known how to predict which marginal minima will be attractors, this ubiquity of behavior suggests that cartography of marginal @@ -159,8 +159,8 @@ Using the representation of $\lambda_\mathrm{max}$ defined in \eqref{eq:λmax}, \begin{equation} e^{NG_{\lambda^*}(\mu)} =\overline{ - \lim_{\beta\to\infty}\int\frac{d\mathbf x\,\delta(N-\mathbf x^T\mathbf x)e^{\beta\mathbf x^T(B-\mu I)\mathbf x}} - {\int d\mathbf x'\,\delta(N-\mathbf x'^T\mathbf x')e^{\beta\mathbf x'^T(B-\mu I)\mathbf x'}}\,\delta\big(N\lambda^*-\mathbf x^T(B-\mu I)\mathbf x\big) + \lim_{\beta\to\infty}\int\frac{d\mathbf s\,\delta(N-\mathbf s^T\mathbf s)e^{\beta\mathbf s^T(B-\mu I)\mathbf s}} + {\int d\mathbf s'\,\delta(N-\mathbf s'^T\mathbf s')e^{\beta\mathbf s'^T(B-\mu I)\mathbf s'}}\,\delta\big(N\lambda^*-\mathbf s^T(B-\mu I)\mathbf s\big) } \end{equation} Using replicas to treat the denominator ($x^{-1}=\lim_{n\to0}x^{n-1}$) @@ -168,8 +168,8 @@ and transforming the $\delta$-function to its Fourier representation, we have \begin{equation} e^{NG_{\lambda^*}(\mu)} - =\overline{\lim_{\beta\to\infty}\lim_{n\to0}\int d\hat\lambda\prod_{a=1}^n\left[d\mathbf x_a\,\delta(N-\mathbf x_a^T\mathbf x_a)\right] - \exp\left\{\beta\sum_{a=1}^n\mathbf x_a^T(B-\mu I)\mathbf x_a+\hat\lambda\left[N\lambda^*-\mathbf x_1^T(B-\mu I)\mathbf x_1\right]\right\}} + =\overline{\lim_{\beta\to\infty}\lim_{n\to0}\int d\hat\lambda\prod_{a=1}^n\left[d\mathbf s_a\,\delta(N-\mathbf s_a^T\mathbf s_a)\right] + \exp\left\{\beta\sum_{a=1}^n\mathbf s_a^T(B-\mu I)\mathbf s_a+\hat\lambda\left[N\lambda^*-\mathbf s_1^T(B-\mu I)\mathbf s_1\right]\right\}} \end{equation} having introduced the parameter $\hat\lambda$ in the Fourier representation of the $\delta$-function. The whole expression, so transformed, is a simple exponential integral linear in the matrix $B$. @@ -177,14 +177,14 @@ Taking the average over $B$, we have \begin{equation} \begin{aligned} &e^{NG_{\lambda^*}(\mu)} - =\lim_{\beta\to\infty}\lim_{n\to0}\int d\hat\lambda\prod_{a=1}^n\left[d\mathbf x_a\,\delta(N-\mathbf x_a^T\mathbf x_a)\right] \\ - &\hspace{10em}\exp\left\{N\left[\hat\lambda(\mu+\lambda^*)-n\beta\mu\right]+\frac{\sigma^2}{N}\left[\beta^2\sum_{ab}^n(\mathbf x_a^T\mathbf x_b)^2 - -2\beta\hat\lambda\sum_a^n(\mathbf x_a^T\mathbf x_1)^2 + =\lim_{\beta\to\infty}\lim_{n\to0}\int d\hat\lambda\prod_{a=1}^n\left[d\mathbf s_a\,\delta(N-\mathbf s_a^T\mathbf s_a)\right] \\ + &\hspace{10em}\exp\left\{N\left[\hat\lambda(\mu+\lambda^*)-n\beta\mu\right]+\frac{\sigma^2}{N}\left[\beta^2\sum_{ab}^n(\mathbf s_a^T\mathbf s_b)^2 + -2\beta\hat\lambda\sum_a^n(\mathbf s_a^T\mathbf s_1)^2 +\hat\lambda^2N^2 \right]\right\} \end{aligned} \end{equation} -We make the Hubbard--Stratonovich transformation to the matrix field $Q_{ab}=\frac1N\mathbf x_a^T\mathbf x_b$. This gives +We make the Hubbard--Stratonovich transformation to the matrix field $Q_{ab}=\frac1N\mathbf s_a^T\mathbf s_b$. This gives \begin{equation} e^{NG_{\lambda^*}(\mu)} =\lim_{\beta\to\infty}\lim_{n\to0}\int d\hat\lambda\,dQ\, @@ -255,7 +255,7 @@ Extremizing this action over the new parameters $y$, $\Delta z=z-\tilde z$, and \Delta z=\frac1{4\sigma^2}\left(1-\frac{\mu+\lambda^*}{2\sigma}\left(\frac{\mu+\lambda^*}{2\sigma}-\sqrt{\frac{(\mu+\lambda^*)^2}{(2\sigma)^2}-1}\right)\right) \end{align} Inserting this solution into $\mathcal S_\infty$ we find -\begin{equation} +\begin{equation} \label{eq:goe.large.dev} \begin{aligned} &G_{\lambda^*}(\mu) =\mathop{\textrm{extremum}}_{y,\Delta z,\hat\lambda}\mathcal S_\infty(y,\Delta z,\hat\lambda) \\ @@ -335,24 +335,24 @@ that $\hat\lambda=0$. The situation in the study of random landscapes is often as follows: an ensemble of smooth functions $H:\mathbb R^N\to\mathbb R$ define random landscapes, often with their configuration space subject to one or more -constraints of the form $g(\mathbf s)=0$ for $\mathbf s\in\mathbb R^N$. The +constraints of the form $g(\mathbf x)=0$ for $\mathbf x\in\mathbb R^N$. The geometry of such landscapes is studied by their complexity, or the average logarithm of the number of stationary points with certain properties, e.g., of marginal minima at a given energy. Such problems can be studied using the method of Lagrange multipliers, with one introduced for every constraint. If the configuration space is defined by $r$ constraints, then the problem is to extremize \begin{equation} - H(\mathbf s)+\sum_{i=1}^r\omega_ig_i(\mathbf s) + H(\mathbf x)+\sum_{i=1}^r\omega_ig_i(\mathbf x) \end{equation} -with respect to $\mathbf s$ and $\pmb\omega=\{\omega_1,\ldots,\omega_r\}$. The corresponding gradient and Hessian for the problem are +with respect to $\mathbf x$ and $\pmb\omega=\{\omega_1,\ldots,\omega_r\}$. The corresponding gradient and Hessian for the problem are \begin{align} - \nabla H(\mathbf s,\pmb\omega)=\partial H(\mathbf s)+\sum_{i=1}^r\omega_i\partial g_i(\mathbf s) + \nabla H(\mathbf x,\pmb\omega)=\partial H(\mathbf x)+\sum_{i=1}^r\omega_i\partial g_i(\mathbf x) \\ - \operatorname{Hess}H(\mathbf s,\pmb\omega)=\partial\partial H(\mathbf s)+\sum_{i=1}^r\omega_i\partial\partial g_i(\mathbf s) + \operatorname{Hess}H(\mathbf x,\pmb\omega)=\partial\partial H(\mathbf x)+\sum_{i=1}^r\omega_i\partial\partial g_i(\mathbf x) \end{align} The number of stationary points in a landscape for a particular realization $H$ is found by integrating over the Kac--Rice measure \begin{equation} - d\mu_H(\mathbf s,\pmb\omega)=d\mathbf s\,d\pmb\omega\,\delta\big(\nabla H(\mathbf s,\pmb\omega)\big)\,\delta\big(\mathbf g(\mathbf s)\big)\,\big|\det\operatorname{Hess}H(\mathbf s,\pmb\omega)\big| + d\mu_H(\mathbf x,\pmb\omega)=d\mathbf x\,d\pmb\omega\,\delta\big(\nabla H(\mathbf x,\pmb\omega)\big)\,\delta\big(\mathbf g(\mathbf x)\big)\,\big|\det\operatorname{Hess}H(\mathbf x,\pmb\omega)\big| \end{equation} with a $\delta$-function of the gradient and the constraints ensuring that we count valid stationary points, and the Hessian entering in the determinant of @@ -361,9 +361,9 @@ interesting to condition the count on interesting properties of the stationary points, like the energy, \begin{equation} \begin{aligned} - &d\mu_H(\mathbf s,\pmb\omega\mid E,\mu) \\ - &\quad=d\mu_H(\mathbf s,\pmb\omega)\, - \delta\big(NE-H(\mathbf s)\big) + &d\mu_H(\mathbf x,\pmb\omega\mid E,\mu) \\ + &\quad=d\mu_H(\mathbf x,\pmb\omega)\, + \delta\big(NE-H(\mathbf x)\big) \,\delta\big(N\mu-\operatorname{Tr}\operatorname{Hess}H(\mathbf x,\pmb\omega)\big) \end{aligned} \end{equation} @@ -374,20 +374,20 @@ number of marginal points in a particular instantiation $H$ as \begin{equation} \begin{aligned} &\mathcal N_H(E,\mu,\lambda^*) - =\int d\mu_H(\mathbf s,\pmb\omega\mid E,\mu)\,\delta\big(N\lambda^*-\lambda_\mathrm{max}(\operatorname{Hess}H(\mathbf s,\pmb\omega))\big) \\ - &=\lim_{\beta\to\infty}\int d\mu_H(\mathbf s,\pmb\omega\mid E,\mu) - \frac{d\mathbf x\,\delta(N-\mathbf x^T\mathbf x)\delta(\mathbf x^T\partial\mathbf g(\mathbf s))e^{\beta\mathbf x^T\operatorname{Hess}H(\mathbf s,\pmb\omega)\mathbf x}} - {\int d\mathbf x'\,\delta(N-\mathbf x'^T\mathbf x')\delta(\mathbf x'^T\partial\mathbf g(\mathbf s))e^{\beta\mathbf x'^T\operatorname{Hess}H(\mathbf s,\pmb\omega)\mathbf x'}} - \delta\big(N\lambda^*-\mathbf x^T\operatorname{Hess}H(\mathbf s,\pmb\omega)\mathbf x\big) + =\int d\mu_H(\mathbf x,\pmb\omega\mid E,\mu)\,\delta\big(N\lambda^*-\lambda_\mathrm{max}(\operatorname{Hess}H(\mathbf x,\pmb\omega))\big) \\ + &=\lim_{\beta\to\infty}\int d\mu_H(\mathbf x,\pmb\omega\mid E,\mu) + \frac{d\mathbf s\,\delta(N-\mathbf s^T\mathbf s)\delta(\mathbf s^T\partial\mathbf g(\mathbf x))e^{\beta\mathbf s^T\operatorname{Hess}H(\mathbf x,\pmb\omega)\mathbf s}} + {\int d\mathbf s'\,\delta(N-\mathbf s'^T\mathbf s')\delta(\mathbf s'^T\partial\mathbf g(\mathbf x))e^{\beta\mathbf s'^T\operatorname{Hess}H(\mathbf x,\pmb\omega)\mathbf s'}} + \delta\big(N\lambda^*-\mathbf s^T\operatorname{Hess}H(\mathbf x,\pmb\omega)\mathbf s\big) \end{aligned} \end{equation} where the $\delta$-functions \begin{equation} - \delta(\mathbf x^T\partial\mathbf g(\mathbf s)) - =\prod_{s=1}^r\delta(\mathbf x^T\partial g_i(\mathbf s)) + \delta(\mathbf s^T\partial\mathbf g(\mathbf x)) + =\prod_{s=1}^r\delta(\mathbf s^T\partial g_i(\mathbf x)) \end{equation} ensure that the integrals are constrained to the tangent space of the -configuration manifold at the point $\mathbf s$. This likewise allows us to +configuration manifold at the point $\mathbf x$. This likewise allows us to define the complexity of points with a specific energy, stability, and maximum eigenvalue as \begin{equation} \Sigma_{\lambda^*}(E,\mu) @@ -396,12 +396,16 @@ define the complexity of points with a specific energy, stability, and maximum e In practice, this can be computed by introducing replicas to treat the logarithm ($\log x=\lim_{n\to0}\frac\partial{\partial n}x^n$) and replicating again to treat each of the normalizations in the numerator. This leads to the expression -\begin{equation} +\begin{equation} \label{eq:max.complexity.expanded} \begin{aligned} \Sigma_{\lambda^*}(E,\mu) - &=\lim_{\beta\to\infty}\lim_{n\to0}\frac1N\frac\partial{\partial n}\int\prod_{a=1}^n\Bigg[d\mu_H(\mathbf s_a,\pmb\omega_a\mid E,\mu)\,\delta\big(N\lambda^*-(\mathbf x_a^1)^T\operatorname{Hess}H(\mathbf s_a,\pmb\omega_a)\mathbf x_a^1\big)\\ + &=\lim_{\beta\to\infty}\lim_{n\to0}\frac1N\frac\partial{\partial n}\int\prod_{a=1}^n\Bigg[d\mu_H(\mathbf x_a,\pmb\omega_a\mid E,\mu)\,\delta\big(N\lambda^*-(\mathbf s_a^1)^T\operatorname{Hess}H(\mathbf x_a,\pmb\omega_a)\mathbf s_a^1\big)\\ &\hspace{12em}\times\lim_{m_a\to0} - \left(\prod_{b=1}^{m_a} d\mathbf x_a^b\,\delta(N-(\mathbf x_a^b)^T\mathbf x_a^b)\delta((\mathbf x_a^b)^T\partial\mathbf g(\mathbf s_a))e^{\beta(\mathbf x_a^b)^T\operatorname{Hess}H(\mathbf s_a,\pmb\omega_a)\mathbf x_a^b}\right)\Bigg] + \left(\prod_{b=1}^{m_a} d\mathbf s_a^b + \,\delta\big(N-(\mathbf s_a^b)^T\mathbf s_a^b\big) + \,\delta\big((\mathbf s_a^b)^T\partial\mathbf g(\mathbf x_a)\big) + \,e^{\beta(\mathbf s_a^b)^T\operatorname{Hess}H(\mathbf x_a,\pmb\omega_a)\mathbf s_a^b}\right) + \Bigg] \end{aligned} \end{equation} \end{widetext} @@ -419,20 +423,116 @@ Finally, the marginal complexity is defined by evaluating the complexity conditi \subsection{Spherical spin glasses} +The spherical spin glasses are a family of models that encompass every +isotropic Gaussian field on the hypersphere $0=\mathbf x^T\mathbf x-N$ for +$\mathbf x\in\mathbb R^N$. One can consider the models as defined by centered Gaussian functions $H$ such that the covariance between two points in the configuration space is +\begin{equation} + \overline{H(\mathbf x)H(\mathbf x')}=Nf\left(\frac{\mathbf x^T\mathbf x'}N\right) +\end{equation} +for some function $f$ with positive series coefficients. Such functions can be considered to be made up of all-to-all tensorial interactions, with +\begin{equation} + H(\mathbf x) + =\sum_{p=0}^\infty\frac{\sqrt{f^{(p)}(0)}}{2N^{p-1}}J_{i_1\cdots i_p}x_{i_1}\cdots x_{i_p} +\end{equation} +and the elements of the tensors $J$ being independently distributed with the +unit normal distribution. + +The marginal optima of these models can be studied without the methods +described here, and have been in the past \cite{Folena_2020_Rethinking, +Kent-Dobias_2023_How}. First, these models are Gaussian, so at large $N$ the +Hessian is statistically independent of the gradient and energy +\cite{Bray_2007_Statistics}. Therefore, conditioning the Hessian can be done +mostly independently from the problem of counting stationary points. Second, in +these models the Hessian at every point in the landscape belongs to the GOE +class with the same width of the spectrum $\mu_\mathrm m=2\sqrt{f''(1)}$. +Therefore, all marginal optima in these systems have the same constant shift +$\mu=\pm\mu_\mathrm m$. Despite the fact the complexity of marginal optima is +well known by simpler methods, it is instructive to carry through the +calculation for this case, since we will something about its application in +more nontrivial settings. + +The procedure to treat the complexity of the spherical models has been made in +detail elsewhere \cite{Kent-Dobias_2023_How}. Here we will merely sketch the steps that are standard. We start by translating elements of the Kac--Rice measure into terms more familiar to physicists. This means writing \begin{align} - C_{ab}=\frac1N\mathbf s_a\cdot\mathbf s_b - && - R_{ab}=-i\frac1N\mathbf s_a\cdot\hat{\mathbf s}_b + \delta\big(\nabla H(\mathbf x_a,\pmb\omega_a)\big) + &=\int\frac{d\hat{\mathbf x}_a}{(2\pi)^N}e^{i\hat{\mathbf x}_a^T\nabla H(\mathbf x_a,\pmb\omega_a)} \\ + \delta\big(NE-H(\mathbf x_a)\big) + &=\int\frac{d\hat\beta_a}{2\pi}e^{\hat\beta_a(NE-H(\mathbf x_a))} \\ + \delta\big(N\lambda^*-\mathbf s^T\operatorname{Hess}H(\mathbf x_a,\pmb\omega)\mathbf s\big) + &=\int\frac{d\hat\lambda_a}{2\pi}e^{\hat\lambda_a(N\lambda^*-\mathbf s^T\operatorname{Hess}H(\mathbf x_a,\pmb\omega)\mathbf s)} +\end{align} +for the Dirac $\delta$ functions. At this point we will also discuss an +important step we will use repeatedly in this paper: to drop the absolute value +signs around the determinant in the Kac--Rice measure. This can potentially +lead to severe problems with the complexity. However, it is a justified step +when the parameters of the problem, i.e., $E$, $\mu$, and $\lambda^*$ put us in +a regime where the exponential majority of stationary points have the same +index. This is true for maxima and minima, and for saddle points with a single +outlier. Dropping the absolute value sign allows us to write +\begin{equation} + \det\operatorname{Hess}H(\mathbf x_a, \pmb\omega_a) + =\int d\pmb\eta_a\,d\bar{\pmb\eta}_a\,e^{\bar{\pmb\eta}_a^T\operatorname{Hess}H(\mathbf x_a,\pmb\omega)\pmb\eta_a} +\end{equation} +for $N$-dimensional Grassmann variables $\bar{\pmb\eta}$ and $\pmb\eta$. For +the spherical models this step is unnecessary, since there are other ways to +treat the determinant keeping the absolute value signs, as in previous works +\cite{Folena_2020_Rethinking, Kent-Dobias_2023_How}. However, since other of +our examples are for models where the same techniques are impossible, it is +useful to see the fermionic method in action in this simple case. + +Once these substitutions have been made, the entire expression +\eqref{eq:max.complexity.expanded} is an exponential integral whose argument is +a linear functional of $H$. This allows for the average to be taken over the +disorder. If we gather all the $H$-dependant pieces into the linear functional +$\mathcal O$ then the average gives +\begin{equation} + \begin{aligned} + \overline{ + e^{\sum_a^n\mathcal O_aH(\mathbf x_a)} + } + &=e^{\frac12\sum_a^n\sum_b^n\mathcal O_a\mathcal O_b\overline{H(\mathbf x_a)H(\mathbf x_b)}} \\ + &=e^{N\frac12\sum_a^n\sum_b^n\mathcal O_a\mathcal O_bf\big(\frac{\mathbf x_a^T\mathbf x_b}N\big)} + \end{aligned} +\end{equation} +The result is an integral that only depends on the many vector variables we +have introduced through their scalar products with each other. We therefore make a change of variables in the integration from those vectors to matrices that encode their possible scalar products. These matrices are +\begin{align} + C_{ab}=\frac1N\mathbf x_a\cdot\mathbf x_b && - D_{ab}=\frac1N\hat{\mathbf s}_a\cdot\hat{\mathbf s}_b + R_{ab}=-i\frac1N\mathbf x_a\cdot\hat{\mathbf x}_b \\ - A_{ab}^{cd}=\frac1N\mathbf x_a^c\cdot\mathbf x_b^d + D_{ab}=\frac1N\hat{\mathbf x}_a\cdot\hat{\mathbf x}_b && - X^c_{ab}=\frac1N\mathbf s_a\cdot\mathbf x_b^c + F_{ab}=\frac1N\bar{\pmb\eta}_a^T\pmb\eta_b + \\ + A_{ab}^{cd}=\frac1N\mathbf s_a^c\cdot\mathbf s_b^d && - \hat X^c_{ab}=\frac1N\hat{\mathbf s}_a\cdot\mathbf x_b^c + X^c_{ab}=\frac1N\mathbf x_a\cdot\mathbf s_b^c + \\ + \hat X^c_{ab}=\frac1N\hat{\mathbf x}_a\cdot\mathbf s_b^c \end{align} +Order parameters that mix the normal and Grassmann variables generically vanish +in these settings \cite{Kurchan_1992_Supersymmetry}. +After these steps, which follow identically to those more carefully outlined in +the cited papers \cite{Folena_2020_Rethinking, Kent-Dobias_2023_How}, we arrive at a form of the integral as over an effective action +\begin{equation} + \begin{aligned} + &\Sigma_{\lambda^*}(E,\mu) + =\lim_{\beta\to\infty}\lim_{n\to0}\frac1N\frac\partial{\partial n} + \int dC\,dR\,dD\,dF \\ + &dA\,dX\,d\hat X\, + d\hat\beta\,d\hat\lambda\,e^{N + n\mathcal S_\mathrm{KR}(\hat\beta,\omega,C,R,D,F) + +N\mathcal S_\beta(\omega,\hat\lambda,A,X,\hat X) + } + \end{aligned} +\end{equation} +The structure of the integrand, with the effective action split between two +terms which only share a dependence on the Lagrange multiplier $\omega$ that +enforces the constraint, is generic to Gaussian problems. This is the +appearance in practice of the fact mentioned before that conditions on the +Hessian do not mostly effect the rest of the complexity problem. \begin{widetext} \begin{equation} \begin{aligned} @@ -463,10 +563,24 @@ describes overlaps between eigenvectors at different stationary points and shoul We will discuss at the end of this paper when these order parameters can be expected to be nonzero, but in this and most isotropic problems all of the $X$s, $\hat X$s, and $A^{ab}$ for $a\neq b$ are zero. \begin{equation} - \Sigma_\textrm{marginal}(E) - =\operatorname{max}_\omega\big[\Sigma(E,\omega)+G_{\sqrt{f''(1)}}(\omega)\big] + \Sigma_{\lambda^*}(E,\mu) + =\Sigma(E,\mu)+G_{\lambda^*}(\mu) +\end{equation} +where $G$ is precisely the function \eqref{eq:goe.large.dev} we found in the +case of a GOE matrix added to an identity, with $\sigma=\sqrt{f''(1)}$. We find the marginal complexity by solving +\begin{equation} + 0 + =\frac\partial{\partial\lambda^*}\Sigma_{\lambda^*}(E,\mu_\mathrm m(E))\bigg|_{\lambda^*=0} + =\frac\partial{\partial\lambda^*}G_{\lambda^*}(\mu_\mathrm m(E))\bigg|_{\lambda^*=0} +\end{equation} +which gives $\mu_m(E)=2\sqrt{f''(1)}$ independent of $E$, as we presaged above. Since $G_0(\mu_\mathrm m)=0$, this gives finally +\begin{equation} + \Sigma_\mathrm m(E) + =\Sigma_0(E,\mu_\mathrm m(E)) + =\Sigma(E,\mu_\mathrm m) \end{equation} -where the maximum over $\omega$ needs to lie at a real value. +that the marginal complexity in these models is simply the ordinary complexity +evaluated at a fixed trace of the Hessian. \subsection{Twin spherical spin glasses} @@ -537,6 +651,9 @@ The energy or cost function is the sum of squares of the $V_k$, or \end{equation} The landscape complexity and large deviations of the ground state for this problem were recently studied in a linear context, with $f(q)=\sigma^2+aq$ \cite{Fyodorov_2020_Counting, Fyodorov_2022_Optimization}. Some results on the ground state of the general nonlinear problem can also be found in \cite{Tublin_2022_A}. In particular, that work indicates that the low-lying minima of the problem tend to be either replica symmetric or full replica symmetry breaking. This is not good news for our analysis or marginal states, because in the former case the problem is typically easy to solve, and in the latter the analysis becomes much more technically challenging. +\cite{Urbani_2023_A, Kamali_2023_Dynamical, Kamali_2023_Stochastic, Urbani_2024_Statistical} +\cite{Montanari_2023_Solving, Montanari_2024_On} + Fortunately, the \emph{maxima} of this problem have a more amenable structure for study, as they are typically described by 1-RSB like structure. There is a heuristic intuition for this: in the limit of $M\to1$, this problem is just the @@ -635,6 +752,43 @@ which produces \end{bmatrix} \end{equation} +\begin{equation} + \begin{aligned} + &\mathcal S + =-\frac1n\frac\alpha2\left\{\log\det\left[ + \hat\beta f(C)+\Big( + f'(C)\odot D+(G\odot G-R\odot R)\odot f''(C) + \Big)f(C) + +(I+R\odot f'(C))^2 + \right]-\log\det(I+G\odot f'(C))^2\right\} \\ + &+\frac1n\frac12\Big(\log\det(CD+R^2)-\log\det G^2\Big) + +\hat\beta E+(g_d-r_d)\mu + \end{aligned} +\end{equation} +where $\odot$ gives the Hadamard or componentwise product between the matrices, while other products and powers are matrix products and powers. + +\begin{equation} + \begin{aligned} + &\hat\beta E+\mu(g_d-r_d)+\frac12\log\frac{d_d+r_d^2}{g_d^2} \\ + &-\frac\alpha2\log\left[ + 1+\hat\beta\big(f(1)-f(0)\big) + \Big(d_d\big(f(1)-f(0)\big)+r_d\big(2+r_df'(1)\big)\Big)f'(1) + +(g_d^2-r_d^2)\big(f(1)-f(0)\big)f''(1) + \right] \\ + &-\alpha f(0)\left( + \big(f(1)-f(0)\big)+\frac{1+r_d\big(2+r_df'(1)\big)f'(1)}{\hat\beta+d_df'(1)+(g_d^2-r_d^2)f''(1)} + \right)^{-1} + \end{aligned} +\end{equation} + +In the case where $\mu$ is not specified, in which the model is supersymmetric, $D=\hat\beta R$ and the effective action becomes particularly simple: +\begin{equation} + \hat\beta e + -\frac12\frac{\alpha f(0)}{1+\hat\beta\big(f(1)-f(0)\big)+r_df'(1)} + -\frac\alpha2\log\left(1+\frac{\hat\beta\big(f(1)-f(0)\big)}{1+r_df'(1)}\right) + +\frac12\log\frac{\hat\beta+r_d}{r_d} +\end{equation} + The condition fixing the maximum eigenvalue adds to the integrand \begin{equation} \frac12\beta\sum_b^{m_a}\mathbf s^T_b(\mathbf v^k_a(\mathbf v^k_a)^T+w^k_a\partial\partial V^k(\mathbf x_a)+\omega I)\mathbf s_b -- cgit v1.2.3-70-g09d2