From 3bf1f4d1779ea0b37ab0fb9a6c2da8f7e7dda3cd Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Wed, 6 Jul 2022 15:41:44 +0200 Subject: Lots of notation changes --- frsb_kac-rice.tex | 123 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 68 insertions(+), 55 deletions(-) (limited to 'frsb_kac-rice.tex') diff --git a/frsb_kac-rice.tex b/frsb_kac-rice.tex index fc2d911..9fc4b34 100644 --- a/frsb_kac-rice.tex +++ b/frsb_kac-rice.tex @@ -70,9 +70,10 @@ To constrain the model to the sphere, we use a Lagrange multiplier $\mu$, with t \end{equation} At any critical point, the gradient and Hessian are -\begin{equation} - \operatorname{Grad}H=\partial H+\mu z \qquad ; \qquad \operatorname{Hess}H=\partial\partial H+\mu I -\end{equation} +\begin{align} + \nabla H=\partial H+\mu s && + \operatorname{Hess}H=\partial\partial H+\mu I +\end{align} The important observation was made by Bray and Dean \cite{Bray_2007_Statistics} that gradient and Hessian are independent for random Gaussian disorder. The average over disorder @@ -181,8 +182,11 @@ fixing the energy density $E$, and another delta function to count the number of saddles with trace of the Hessian $=\mu^*$. The latter will give us everything we need to characterize the saddles, as we shall see later \begin{equation} - \mathcal N(E, \mu^*) - =\int ds d\mu\,\delta(NE-H(s))\delta\big(\partial H(s)+\mu s\big)\Big|\det(\partial\partial H(s)+\mu I) \Big| \delta\Big({\mbox{Tr}} [\partial\partial H(s)+\mu I)-N\mu^*] \Big) + \begin{aligned} + \mathcal N(E, \mu^*) + &=\int ds\, d\mu\,\delta\big(\tfrac12(\|s\|^2-N)\big)\,\delta\big(\nabla H(s,\mu)\big)\,\big|\det\operatorname{Hess}H(s,\mu)\big| \\ + &\hspace{10pc}\times\delta\big(NE-H(s)\big)\delta\big(N\mu^*-\operatorname{Tr}\operatorname{Hess}H(s,\mu)\big) + \end{aligned} \end{equation} This number will typically be exponential in $N$. In order to find typical counts when disorder is averaged, we will want to average its logarithm @@ -233,9 +237,11 @@ it is an interesting object of study. In order to average the complexity over disorder properly, the logarithm must be dealt with. We use the standard replica trick, writing \begin{equation} \begin{aligned} - \log\mathcal N(E,{\cal I}_o) - &=\lim_{n\to0}\frac\partial{\partial n}\mathcal N^n(E,{\cal I}_o) \\ - &=\lim_{n\to0}\frac\partial{\partial n}\int\prod_a^n ds_a\,\delta(NE-H(s_a))\delta(\partial H(s_a)+\mu s_a)|\det(\partial\partial H(s_a)+\mu I)| \Theta\left[{\cal I}(s_a,\mu)-{\cal I}_o)\right] + \log\mathcal N(E,\mu^*) + &=\lim_{n\to0}\frac\partial{\partial n}\mathcal N^n(E,\mu^*) \\ + &=\lim_{n\to0}\frac\partial{\partial n}\int\prod_a^n ds_a\,d\mu_a\, + \delta\big(\tfrac12(\|s_a\|^2-N)\big)\,\delta\big(\nabla H(s_a,\mu_a)\big)\,\big|\det\operatorname{Hess}H(s_a,\mu_a)\big| \\ + &\hspace{13pc} \times\delta\big(NE-H(s_a)\big)\delta\big(N\mu^*-\operatorname{Tr}\operatorname{Hess}H(s_a,\mu_a)\big) \end{aligned} \end{equation} The replicated Kac--Rice formula was introduced by Ros et al.~\cite{Ros_2019_Complex}, and its @@ -246,10 +252,11 @@ therefore able to write \begin{equation} \begin{aligned} \Sigma(E, \mu^*) - &=\lim_{N\to\infty}\frac1N\lim_{n\to0}\frac\partial{\partial n}\int\left(\prod_a^nds_a\right)\,\overline{\prod_a^n \delta(NE-H(s_a))\delta(\partial H(s_a)+\mu s_a)}\nonumber\\ - & + &=\lim_{N\to\infty}\frac1N\lim_{n\to0}\frac\partial{\partial n}\int\left(\prod_a^nds_a\,d\mu_a\right)\, + \overline{\prod_a^n \delta\big(\tfrac12(\|s_a\|^2-N)\big)\,\delta\big(\nabla H(s_a,\mu_a)\big)\delta(NE-H(s_a))}\\ + &\hspace{10pc} \times - \overline{\prod_a^n |\det(\partial\partial H(s_a)+\mu I)|\delta({\mbox{Tr}} [\partial\partial H(s_a)+\mu I)-N\mu^*] )} + \overline{\prod_a^n |\det\operatorname{Hess}(s_a,\mu_a)|\,\delta\big(N\mu^*-\operatorname{Tr}\operatorname{Hess}H(s_a,\mu_a)\big)} \end{aligned} \end{equation} @@ -278,17 +285,20 @@ index density \end{equation} When $\mu>\mu_m$, the critical points are minima whose sloppiest eigenvalue is $\mu-\mu_m$. -The factor $\Theta[({\cal I}(s_a,\mu)-{\cal I}_0]$ selects a domain of integration of $\mu,s_a$. To largest order in $N$, the average over the product of determinants factorizes into the product of averages, each of which is given by the same expression depending only on $\mu$: \begin{equation} \begin{aligned} - & \prod_a^n \overline{|\det(\partial\partial H(s_a)+\mu I)| - \delta({\mbox{Tr}} [\partial\partial H(s_a)+\mu I)-N\mu^*] ) - } \rightarrow e^{n{\cal D}(\mu^*)}\delta(\mu-\mu^*) \quad {\mbox{with}}\\ + \overline{\prod_a^n |\det\operatorname{Hess}(s_a,\mu_a)|\,\delta\big(N\mu^*-\operatorname{Tr}\operatorname{Hess}H(s_a,\mu_a)\big)} + \rightarrow e^{nN{\cal D}(\mu^*)}\prod_a^n\delta(\mu_a-\mu^*) + \end{aligned} +\end{equation} +with +\begin{equation} + \begin{aligned} \mathcal D(\mu) - &=\frac1N\overline{\log|\det(\partial\partial H(s_a)+\mu I)|} + &=\frac1N\overline{\log|\det\operatorname{Hess}H(s,\mu)|} =\int d\lambda\,\rho(\lambda+\mu)\log|\lambda| \\ &=\operatorname{Re}\left\{ \frac12\left(1+\frac\mu{2f''(1)}\left(\mu-\sqrt{\mu^2-4f''(1)}\right)\right) @@ -306,9 +316,9 @@ the second derivative matrix The $\delta$-functions are treated by writing them in the Fourier basis, introducing auxiliary fields $\hat s_a$ and $\hat\beta$, \begin{equation} - \prod_a^n\delta(NE-H(s_a))\delta(\partial H(s_a)+\mu s_a) - =\int \frac{d\hat\beta}{2\pi}\prod_a^n\frac{d\hat s_a}{2\pi} - e^{\hat\beta(NE-H(s_a))+i\hat s_a\cdot(\partial H(s_a)+\mu s_a)} + \prod_a^n \delta\big(\tfrac12(\|s_a\|^2-N)\big)\,\delta\big(\nabla H(s_a,\mu^*)\big)\delta(NE-H(s_a)) + =\int\frac{d\hat\mu}{2\pi}\,\frac{d\hat\beta}{2\pi}\prod_a^n\frac{d\hat s_a}{2\pi} + e^{\frac12\hat\mu(\|s_a\|^2-N)+\hat\beta(NE-H(s_a))+i\hat s_a\cdot(\partial H(s_a)+\mu^*s_a)} \end{equation} $\hat \beta$ is a parameter conjugate to the state energies, i.e. playing the role of an inverse temperature for the metastable states. The average over disorder can now be taken, and since everything is Gaussian it gives @@ -355,10 +365,10 @@ change of variables in the integration from $s_a$ and $\hat s_a$ to these three matrices, we arrive at the form for the complexity \begin{equation} \begin{aligned} - &\Sigma(E,\mu) - =\mathcal D(\mu)+\hat\beta E+\\ + &\Sigma(E,\mu^*) + =\mathcal D(\mu^*)+\hat\beta E-\frac12\hat\mu+\\ &\lim_{n\to0}\frac1n\left( - -\mu\operatorname{Tr}R + \frac12\hat\mu\operatorname{Tr}C-\mu^*\operatorname{Tr}R +\frac12\sum_{ab}\left[ \hat\beta^2f(C_{ab})+(2\hat\beta R_{ab}-D_{ab})f'(C_{ab}) +R_{ab}^2f''(C_{ab}) @@ -367,7 +377,7 @@ matrices, we arrive at the form for the complexity \right) \end{aligned} \end{equation} -where $\hat\beta$, $C$, $R$ and $D$ must be evaluated at extrema of this +where $\hat\mu$, $\hat\beta$, $C$, $R$ and $D$ must be evaluated at extrema of this expression. @@ -375,15 +385,14 @@ expression. -The same information is contained, and better expressed in its -Legendre +The same information is contained, and better expressed in its Legendre transform \begin{equation} \begin{aligned} - &G(\hat \beta,\mu) - =\mathcal D(\mu)+\\ + &G(\hat \beta,\mu^*) + =\mathcal D(\mu^*)-\frac12\hat\mu\\ &\lim_{n\to0}\frac1n\left( - -\mu\operatorname{Tr}R + \frac12\hat\mu\operatorname{Tr}C-\mu^*\operatorname{Tr}R +\frac12\sum_{ab}\left[ \hat\beta^2f(C_{ab})+(2\hat\beta R_{ab}-D_{ab})f'(C_{ab}) +R_{ab}^2f''(C_{ab}) @@ -392,15 +401,16 @@ transform \right) \end{aligned} \end{equation} -Denoting $R_d \equiv \frac 1 n {\mbox Tr} R$, we have the double Legendre transform $K(\hat \beta, R_d)$: +Denoting $r_d \equiv \frac 1 n {\mbox Tr} R$, we have the double Legendre transform $K(\hat \beta, r_d)$: \begin{equation} - e^{N K(\hat \beta, R_d)} =\int \; d\mu de \; e^{N\Sigma(E,\mu)+R_d\mu -\hat \beta E +{\cal{D}}(\mu)} + e^{N K(\hat \beta, r_d)} =\int \, d\mu^* \,dE \, e^{N\left\{\Sigma(E,\mu^*)+r_d\mu^* -\hat\beta E -\mathcal D(\mu^*)\right\}} \end{equation} given by \begin{equation} \begin{aligned} - &K(\hat \beta,R_d) + &K(\hat \beta,r_d) = \lim_{n\to0}\frac1n\left( + \frac{\hat\mu}2\operatorname{Tr}(C-I) +\frac12\sum_{ab}\left[ \hat\beta^2f(C_{ab})+(2\hat\beta R_{ab}-D_{ab})f'(C_{ab}) +R_{ab}^2f''(C_{ab}) @@ -409,7 +419,10 @@ given by \right) \end{aligned} \end{equation} -$R_d$ is conjugate to $\mu$ and through it to the Index density, while $\hat \beta$ plays the role of an inverse temperature conjugate to the complexity, that has been used since the beginning of the spin-glass field. In this way $K(R_d,\hat \beta)$ contains all the information about saddle densities. +$r_d$ is conjugate to $\mu^*$ and through it to the index density, while $\hat +\beta$ plays the role of an inverse temperature conjugate to the complexity, +that has been used since the beginning of the spin-glass field. In this way +$K(\hat \beta,r_d)$ contains all the information about saddle densities. @@ -424,39 +437,39 @@ i.e., to follow Parisi's scheme. This assumption immediately simplifies the extremal conditions, since hierarchical matrices commute and are closed under matrix products and Hadamard products. The extremal conditions are \begin{align} + 0&=\frac{\partial\Sigma}{\partial\hat\mu} + =\frac12(c_d-1) \\ 0&=\frac{\partial\Sigma}{\partial\hat\beta} - =E+\sum_{ab}\left[\hat\beta f(C_{ab})+R_{ab}f'(C_{ab})\right] \label{eq:cond.b} \\ - \frac{\tilde c}2I&=\frac{\partial\Sigma}{\partial C} + =E+\lim_{n\to0}\frac1n\sum_{ab}\left[\hat\beta f(C_{ab})+R_{ab}f'(C_{ab})\right] \label{eq:cond.b} \\ + 0&=\frac{\partial\Sigma}{\partial C} =\frac12\left[ - \hat\beta^2f'(C)+(2\hat\beta R-D)\odot f''(C)+R\odot R\odot f'''(C) + \hat\mu I+\hat\beta^2f'(C)+(2\hat\beta R-D)\odot f''(C)+R\odot R\odot f'''(C) +(CD+R^2)^{-1}D \right] \label{eq:cond.q} \\ 0&=\frac{\partial\Sigma}{\partial R} - =-\mu I+\hat\beta f'(C)+R\odot f''(C) + =-\mu^* I+\hat\beta f'(C)+R\odot f''(C) +(CD+R^2)^{-1}R \label{eq:cond.r} \\ 0&=\frac{\partial\Sigma}{\partial D} =-\frac12f'(C) +\frac12(CD+R^2)^{-1}C \label{eq:cond.d} \end{align} -where $\odot$ denotes the Hadamard product, or the componentwise product. The -equation for \eqref{eq:cond.q} would not be true on the diagonal save for the -arbitrary factor $\tilde c$. Equation \eqref{eq:cond.d} implies that +where $\odot$ denotes the Hadamard product, or the componentwise product. Equation \eqref{eq:cond.d} implies that \begin{equation} \label{eq:D.solution} D=f'(C)^{-1}-RC^{-1}R \end{equation} -In addition to these equations, one is also often interested in maximizing the complexity as a function of $\mu$, to find the dominant or most common type of stationary points. These are given by the condition +In addition to these equations, one is also often interested in maximizing the complexity as a function of $\mu^*$, to find the dominant or most common type of stationary points. These are given by the condition \begin{equation} \label{eq:cond.mu} - 0=\frac{\partial\Sigma}{\partial\mu} - =\mathcal D'(\mu)-r_d + 0=\frac{\partial\Sigma}{\partial\mu^*} + =\mathcal D'(\mu^*)-r_d \end{equation} -Since $\mathcal D(\mu)$ is effectively a piecewise function, with different forms for $\mu$ greater or less than $\mu_m$, there are two regimes. When $\mu>\mu_m$, the critical points are minima, and \eqref{eq:cond.mu} implies +Since $\mathcal D(\mu^*)$ is effectively a piecewise function, with different forms for $\mu^*$ greater or less than $\mu_m$, there are two regimes. When $\mu^*>\mu_m$, the critical points are minima, and \eqref{eq:cond.mu} implies \begin{equation} \label{eq:mu.minima} - \mu=\frac1{r_d}+r_df''(1) + \mu^*=\frac1{r_d}+r_df''(1) \end{equation} -When $\mu<\mu_m$, they are saddles, and +When $\mu^*<\mu_m$, they are saddles, and \begin{equation} \label{eq:mu.saddles} - \mu=2f''(1)r_d + \mu^*=2f''(1)r_d \end{equation} @@ -471,14 +484,14 @@ that $D=\hat\beta R$ \cite{Annibale_2003_The}. Under which conditions can this r Any result of supersymmetry can only be valid when the symmetry itself is valid, which means the determinant must be positive. This is only guaranteed -for minima, which have $\mu>\mu_m$. Moreover, this identity heavily constrains +for minima, which have $\mu^*>\mu_m$. Moreover, this identity heavily constrains the form that the rest of the solution can take. Assuming the supersymmetry holds, \eqref{eq:cond.q} implies \begin{equation} - \tilde cI=\hat\beta^2f'(C)+\hat\beta R\odot f''(C)+R\odot R\odot f'''(C)+\hat\beta(CD+R^2)^{-1}R + 0=\hat\mu I+\hat\beta^2f'(C)+\hat\beta R\odot f''(C)+R\odot R\odot f'''(C)+\hat\beta(CD+R^2)^{-1}R \end{equation} Substituting \eqref{eq:cond.r} for the factor $(CD+R^2)^{-1}R$, we find substantial cancellation, and finally \begin{equation} \label{eq:R.diagonal} - (\tilde c-\mu)I=R\odot R\odot f'''(C) + 0=(\hat\mu+\mu^*)I+R\odot R\odot f'''(C) \end{equation} If $C$ has a nontrivial off-diagonal structure and supersymmetry holds, then the off-diagonal of $R$ must vanish, and therefore $R=r_dI$. Therefore, a @@ -487,15 +500,15 @@ supersymmetric ansatz is equivalent to a \emph{diagonal} ansatz. Supersymmetry has further implications. Equations \eqref{eq:cond.r} and \eqref{eq:cond.d} can be combined to find \begin{equation} - I=R\left[\mu I-R\odot f''(C)\right]+(D-\hat\beta R)f'(C) + I=R\left[\mu^* I-R\odot f''(C)\right]+(D-\hat\beta R)f'(C) \end{equation} Assuming the supersymmetry holds implies that \begin{equation} - I=R\left[\mu I-R\odot f''(C)\right] + I=R\left[\mu^* I-R\odot f''(C)\right] \end{equation} Understanding that $R$ is diagonal, this implies \begin{equation} - \mu=\frac1{r_d}+r_df''(1) + \mu^*=\frac1{r_d}+r_df''(1) \end{equation} which is precisely the condition \eqref{eq:mu.minima}. Therefore, \emph{the supersymmetric solution only counts dominant minima.} @@ -503,10 +516,10 @@ supersymmetric solution only counts dominant minima.} Inserting the supersymmetric ansatz $D=\hat\beta R$ and $R=r_dI$, one gets \begin{equation} \label{eq:diagonal.action} \begin{aligned} - \Sigma(E,\mu) - =\mathcal D(\mu) + \Sigma(E,\mu^*) + =\mathcal D(\mu^*) + - \hat\beta E-\mu r_d + \hat\beta E-\mu^* r_d +\frac12\hat\beta r_df'(1)+\frac12r_d^2f''(1)+\frac12\log r_d^2 \\ +\frac12\lim_{n\to0}\frac1n\left(\hat\beta^2\sum_{ab}f(C_{ab})+\log\det((\hat\beta/r_d)C+I)\right) -- cgit v1.2.3-70-g09d2