diff options
author | Jaron Kent-Dobias <jaron@kent-dobias.com> | 2024-06-27 09:59:10 +0200 |
---|---|---|
committer | Jaron Kent-Dobias <jaron@kent-dobias.com> | 2024-06-27 09:59:10 +0200 |
commit | da1bc2e5804a0f3cc65a399b57d6ec9dd38f69f9 (patch) | |
tree | 33f486d0085ddf4899177fbdf8a7d0723fe5146a | |
parent | 5a76edad7bec8a0d1a8c5055576a990eadc1bd7f (diff) | |
parent | b03732a7ac6069022a3d4a6c39443e6bd889064c (diff) | |
download | marginal-da1bc2e5804a0f3cc65a399b57d6ec9dd38f69f9.tar.gz marginal-da1bc2e5804a0f3cc65a399b57d6ec9dd38f69f9.tar.bz2 marginal-da1bc2e5804a0f3cc65a399b57d6ec9dd38f69f9.zip |
Merge branch 'master' of git:research/replicated_kac-rice/papers/marginal
-rw-r--r-- | marginal.bib | 10 | ||||
-rw-r--r-- | marginal.tex | 176 |
2 files changed, 110 insertions, 76 deletions
diff --git a/marginal.bib b/marginal.bib index d446cad..45d26ae 100644 --- a/marginal.bib +++ b/marginal.bib @@ -314,6 +314,11 @@ doi = {10.1103/PhysRevE.107.064111} } +@unpublished{Kent-Dobias_2024_Algorithm-independent, + author = {Kent-Dobias, Jaron}, + title = {Algorithm-independent bounds on complex optimization through the statistics of marginal optima} +} + @article{Kent-Dobias_2024_Arrangement, author = {Kent-Dobias, Jaron}, title = {Arrangement of nearby minima and saddles in the mixed spherical energy landscapes}, @@ -329,6 +334,11 @@ issn = {2542-4653} } +@unpublished{Kent-Dobias_2024_Conditioning, + author = {Kent-Dobias, Jaron}, + title = {Conditioning the complexity of random landscapes on marginal optima} +} + @article{Kurchan_1992_Supersymmetry, author = {Kurchan, J.}, title = {Supersymmetry in spin glass dynamics}, diff --git a/marginal.tex b/marginal.tex index f46ed38..3a59487 100644 --- a/marginal.tex +++ b/marginal.tex @@ -1593,12 +1593,11 @@ x\,d\bar{\pmb\eta}\,d\pmb\eta\,\frac{d\hat{\mathbf x}}{(2\pi)^N}$. Besides some deep connections to the physics of BRST, this compact notation dramatically simplifies the analytical treatment of the problem. The energy of stationary points can also be fixed using this notation, by writing \begin{equation} - \int d\pmb\phi\,\frac{d\hat\beta}{2\pi}\,e^{\hat\beta E+\int d1\,(1-\hat\beta\bar\theta_1\theta_1)H(\pmb\phi(1))} + \int d\pmb\phi\,d\hat\beta\,e^{\hat\beta E+\int d1\,(1-\hat\beta\bar\theta_1\theta_1)H(\pmb\phi(1))} \end{equation} which a small calculation confirms results in the same expression as \eqref{eq:delta.energy}. -The reason why this simplification is -possible is because there are a large variety of superspace algebraic and +The reason why this transformation is a simplification is because there are a large variety of superspace algebraic and integral operations with direct corollaries to their ordinary real counterparts. For instance, consider a super linear operator $M(1,2)$, which like the super vector $\pmb\phi$ is made up of a linear combination of $N\times @@ -1613,7 +1612,7 @@ The identity supermatrix is given by \end{equation} Integrals involving superfields contracted into such operators result in schematically familiar expressions, like that of the standard Gaussian: \begin{equation} - \int d\pmb\phi\,e^{\int\,d1\,d2\,\pmb\phi(1)^TM(1,2)\pmb\phi(2)} + \int d\pmb\phi\,e^{-\frac12\int\,d1\,d2\,\pmb\phi(1)^TM(1,2)\pmb\phi(2)} =(\operatorname{sdet}M)^{-1/2} \end{equation} where the usual role of the determinant is replaced by the superdeterminant. @@ -1642,19 +1641,23 @@ superdeterminant of $M$ is given by \operatorname{sdet}M=\det(A-BD^{-1}C)\det(D)^{-1} \end{equation} which is the same for the normal equation for the determinant of a block matrix -save for the inverse of $\det D$. The same method can be used to calculate the -superdeterminant in arbitrary superspaces, where for $\mathbb R^{N|2D}$ each +save for the inverse of $\det D$. Likewise, the supertrace of $M$ is is given by +\begin{equation} + \operatorname{sTr}M=\operatorname{Tr}A-\operatorname{Tr}D +\end{equation} +The same method can be used to calculate the +superdeterminant and supertrace in arbitrary superspaces, where for $\mathbb R^{N|2D}$ each basis has $2^{2D-1}$ elements. For instance, for $\mathbb R^{N|4}$ we have $\mathbf e(1,2)=\{1,\bar\theta_1\theta_1,\bar\theta_2\theta_2,\bar\theta_1\theta_2,\bar\theta_2\theta_1,\bar\theta_1\bar\theta_2,\theta_1\theta_2,\bar\theta_1\theta_1\bar\theta_2\theta_2\}$ and $\mathbf f(1,2)=\{\bar\theta_1,\theta_1,\bar\theta_2,\theta_2,\bar\theta_1\theta_1\bar\theta_2,\bar\theta_2\theta_2\theta_1,\bar\theta_1\theta_1\theta_2,\bar\theta_2\theta_2\theta_1\}$. \section{BRST symmetry} \label{sec:brst} -The superspace representation is also helpful because it can make manifest an -unusual symmetry in the dominant complexity of minima that would otherwise be -obfuscated. This arises from considering the Kac--Rice formula as a kind of -gauge fixing procedure \cite{Zinn-Justin_2002_Quantum}. Around each stationary -point consider making the coordinate transformation $\mathbf u=\nabla H(\mathbf -x)$. Then in the absence of fixing the trace, the Kac--Rice measure becomes +When the trace $\mu$ is not fixed, there is an unusual symmetry in the dominant +complexity of minima \cite{Annibale_2004_Coexistence, Kent-Dobias_2023_How}. +This arises from considering the Kac--Rice formula as a kind of gauge fixing +procedure \cite{Zinn-Justin_2002_Quantum}. Around each stationary point +consider making the coordinate transformation $\mathbf u=\nabla H(\mathbf x)$. +Then in the absence of fixing the trace, the Kac--Rice measure becomes \begin{equation} \int d\nu(\mathbf x,\pmb\omega\mid E) =\int\sum_\sigma d\mathbf u\,\delta(\mathbf u)\, @@ -1679,8 +1682,7 @@ symmetry of the measure can then be written where $\delta\epsilon=-\pmb\eta^T\delta\mathbf u$ is a Grassmann number. This establishes that $\delta\mathbf x=\bar{\pmb\eta}\delta\epsilon$, now linear. The rest of the transformation can be built by requiring that the action is invariant after -expansion in $\delta\epsilon$. Ignoring for a moment the piece of the measure -fixing the trace of the Hessian, this gives +expansion in $\delta\epsilon$. This gives \begin{align} \delta\mathbf x=\bar{\pmb\eta}\,\delta\epsilon && \delta\hat{\mathbf x}=-i\hat\beta\bar{\pmb\eta}\,\delta\epsilon && @@ -1693,7 +1695,9 @@ so that the differential form of the symmetry is -i\hat\beta\bar{\pmb\eta}\cdot\frac\partial{\partial\hat{\mathbf x}} -i\hat{\mathbf x}\cdot\frac\partial{\partial\pmb\eta} \end{equation} -The Ward identities associated with this symmetry give rise to relationships among the order parameters. These identities are +The Ward identities associated with this symmetry give rise to relationships +among the order parameters. These identities come from applying the +differential symmetry to Grassmann-valued order parameters, and are \begin{align} \begin{aligned} 0&=\frac1N\mathcal D\langle\mathbf x_a\cdot\pmb\eta_b\rangle @@ -1820,42 +1824,45 @@ full-RSB setting. Using the $\mathbb R^{N|2}$ superfields \begin{equation} - \pmb\phi_a(1)=\mathbf x+\bar\theta_1\pmb\eta+\bar{\pmb\eta}\theta_1+\bar\theta_1\theta_1\hat{\mathbf x}, + \pmb\phi_a(1)=\mathbf x_a+\bar\theta_1\pmb\eta_a+\bar{\pmb\eta}_a\theta_1+\bar\theta_1\theta_1\hat{\mathbf x}_a, \end{equation} the replicated count of stationary points can be written \begin{equation} \begin{aligned} &\mathcal N(E,\mu)^n - =\int\prod_{a=1}^nd\hat\beta_a\,d\pmb\phi_a\, - \\ - &\qquad\times\exp\left[ - \hat\beta_a E-\frac12\int d1\,B_a(1)\sum_{k=1}^MV^k(\pmb\phi_a(1))^2 - \right] + =\int d\hat\beta\prod_{a=1}^n\,d\pmb\phi_a\, + \exp\bigg[ + N\hat\beta E \\ + &\qquad-\frac12\int d1\,\left( + B(1)\sum_{k=1}^MV_k(\pmb\phi_a(1))^2 + -\mu\big(\|\pmb\phi_a(1)\|^2-N\big) + \right) + \bigg] \end{aligned} \end{equation} -for $B_a(1)=1-\hat\beta_a\bar\theta_1\theta_1$. +for $B(1)=1-\hat\beta\bar\theta_1\theta_1$. The derivation of the complexity follows from here nearly identically to that in Appendix A.2 of \citeauthor{Fyodorov_2022_Optimization} with superoperations replacing standard ones \cite{Fyodorov_2022_Optimization}. First we insert -Dirac $\delta$ functions to fix each of the $M$ energies $V^k(\pmb\phi_a(1))$ as +Dirac $\delta$ functions to fix each of the $M$ energies $V_k(\pmb\phi_a(1))$ as \begin{equation} \label{eq:Vv.delta} \begin{aligned} - &\int dv^k_a\,\delta\big(V^k(\pmb\phi_a(1))-v^k_a(1)\big) + &\delta\big(V_k(\pmb\phi_a(1))-v_{ka}(1)\big) \\ - &\quad=\int dv^k_a\,d\hat v^k_a\,\exp\left[i\int d1\,\hat v^k_a(1)\big(V^k(\pmb\phi_a(1))-v^k_a(1)\big)\right] + &=\int d\hat v_{ka}\,\exp\left[i\int d1\,\hat v_{ka}(1)\big(V_k(\pmb\phi_a(1))-v_{ka}(1)\big)\right] \end{aligned} \end{equation} -The squared $V^k$ appearing in the energy can now be replaced by the variables -$v^k$, leaving the only remaining dependence on the disordered $V$ in the +The squared $V_k$ appearing in the energy can now be replaced by the variables +$v_k$, leaving the only remaining dependence on the disordered $V$ in the contribution of \eqref{eq:Vv.delta}, which is linear. The average over the disorder can then be computed, which yields \begin{equation} \begin{aligned} - &\overline{\sum_{k=1}^M\sum_{a=1}^n\exp\left[i\int d1\,\hat v^k_a(1)V^k(\pmb\phi_a(1))\right]} + &\overline{\sum_{k=1}^M\sum_{a=1}^n\exp\left[i\int d1\,\hat v_{ka}(1)V_k(\pmb\phi_a(1))\right]} \\ & =\exp\left[ - -\frac12\sum_{k=1}^M\sum_{ab=1}^n\int d1\,d2\,\hat v_a^k(1)f\left(\frac{\pmb\phi_a(1)^T\pmb\phi_b(2)}N\right)\hat v_b^k(2) + -\frac12\sum_{k=1}^M\sum_{ab}^n\int d1\,d2\,\hat v_{ka}(1)f\left(\frac{\pmb\phi_a(1)\cdot\pmb\phi_b(2)}N\right)\hat v_{kb}(2) \right] \end{aligned} \end{equation} @@ -1863,24 +1870,26 @@ The result is factorized in the indices $k$ and Gaussian in the superfields $v$ and $\hat v$ with kernel \begin{equation} \begin{bmatrix} - B_a(1)\delta_{ab}\delta(1,2) & i\delta_{ab}\delta(1,2) \\ - i\delta_{ab}\delta(1,2) & f\left(\frac{\pmb\phi_a(1)^T\pmb\phi_b(2)}N\right) + B(1)\delta_{ab}\delta(1,2) & i\delta_{ab}\delta(1,2) \\ + i\delta_{ab}\delta(1,2) & f\left(\frac{\pmb\phi_a(1)\cdot\pmb\phi_b(2)}N\right) \end{bmatrix} \end{equation} -Making the $M$ independent Gaussian integrals, we therefore have +Making the $M$ independent Gaussian integrals, we find \begin{equation} \begin{aligned} &\mathcal N(E,\mu)^n - =\int\left(\prod_{a=1}^nd\hat\beta_a\,d\pmb\phi_a\right) + =\int d\hat\beta\left(\prod_{a=1}^nd\pmb\phi_a\right) \exp\bigg[ - \sum_a^n\hat\beta_aE \\ - &\qquad-\frac M2\log\operatorname{sdet}\left( - \delta_{ab}\delta(1,2)+B_a(1)f\left(\frac{\pmb\phi_a(1)^T\pmb\phi_b(2)}N\right) + nN\hat\beta E+\frac\mu2\sum_a^n\int d1\,\|\pmb\phi_a\|^2 \\ + &\quad-\frac M2\log\operatorname{sdet}\left( + \delta_{ab}\delta(1,2)+B(1)f\left(\frac{\pmb\phi_a(1)\cdot\pmb\phi_b(2)}N\right) \right) \bigg] \end{aligned} \end{equation} -We make a change of variables from the fields $\pmb\phi$ to matrices $\mathbb Q_{ab}(1,2)=\frac1N\pmb\phi_a(1)^T\pmb\phi_b(2)$. This transformation results in a change of measure of the form +We make a change of variables from the fields $\pmb\phi$ to matrices $\mathbb +Q_{ab}(1,2)=\frac1N\pmb\phi_a(1)\cdot\pmb\phi_b(2)$. This transformation results +in a change of measure of the form \begin{equation} \prod_{a=1}^n d\pmb\phi_a=d\mathbb Q\,(\operatorname{sdet}\mathbb Q)^\frac N2 =d\mathbb Q\,\exp\left[\frac N2\log\operatorname{sdet}\mathbb Q\right] @@ -1889,18 +1898,20 @@ We therefore have \begin{equation} \begin{aligned} &\mathcal N(E,\mu)^n - =\int\left(\prod_{a=1}^nd\hat\beta_a\right)\,d\mathbb Q\, - \exp\bigg[ - \sum_a^n\hat\beta_aE - +\frac N2\log\operatorname{sdet}\mathbb Q + =\int d\hat\beta\,d\mathbb Q\, + \exp\bigg\{ + nN\hat\beta E+N\frac\mu2\operatorname{sTr}\mathbb Q + +\frac N2\log\operatorname{sdet}\mathbb Q \\ - &\qquad-\frac M2\log\operatorname{sdet}\left( - \delta_{ab}\delta(1,2)+B_a(1)f(\mathbb Q_{ab}(1,2)) - \right) - \bigg] + &\qquad + -\frac M2\log\operatorname{sdet}\left[ + \delta_{ab}\delta(1,2)+B(1)f(\mathbb Q_{ab}(1,2)) + \right] + \bigg\} \end{aligned} \end{equation} -We now need to blow up our supermatrices into our physical order parameters. We have that +We now need to blow up our supermatrices into our physical order parameters. We +have from the definition of $\pmb\phi$ and $\mathbb Q$ that \begin{equation} \begin{aligned} &\mathbb Q_{ab}(1,2) @@ -1913,7 +1924,7 @@ where $C$, $R$, $D$, and $G$ are the matrices defined in \eqref{eq:order.parameters}. Other possible combinations involving scalar products between fermionic and bosonic variables do not contribute at physical saddle points \cite{Kurchan_1992_Supersymmetry}. Inserting this expansion into -the expression above and evaluating the superdeterminants, we find +the expression above and evaluating the superdeterminants and supertrace, we find \begin{equation} \mathcal N(E,\mu)^n=\int d\hat\beta\,dC\,dR\,dD\,dG\,e^{nN\mathcal S_\mathrm{KR}(\hat\beta,C,R,D,G)} \end{equation} @@ -1921,63 +1932,76 @@ where the effective action is given by \begin{widetext} \begin{equation} \begin{aligned} - &\mathcal S_\mathrm{KR}(\hat\beta,C,R,D,G) - =\hat\beta E-\frac1n\operatorname{Tr}(G+R)\mu - +\frac1n\frac12\Big(\log\det(CD+R^2)-\log\det G^2\Big) + \mathcal S_\mathrm{KR}(\hat\beta,C,R,D,G) + &=\hat\beta E+\lim_{n\to0}\frac1n\Bigg(-\mu\operatorname{Tr}(G+R) + +\frac12\log\det\big[G^{-2}(CD+R^2)\big] + +\alpha\log\det\big[I+G\odot f'(C)\big] \\ - &-\frac1n\frac\alpha2\left\{\log\det\left[ + &\qquad-\frac\alpha2\log\det\left[ \Big( - f'(C)\odot D-\hat\beta I+(G\odot G-R\odot R)\odot f''(C) + f'(C)\odot D-\hat\beta I+(G^{\circ2}-R^{\circ2})\odot f''(C) \Big)f(C) +(I-R\odot f'(C))^2 - \right]-\log\det(I+G\odot f'(C))^2\right\} + \right]\Bigg) \end{aligned} \end{equation} -where $\odot$ gives the Hadamard or componentwise product between the matrices, while other products and powers are matrix products and powers. +where $\odot$ gives the Hadamard or componentwise product between the matrices +and $A^{\circ n}$ gives the Hadamard power of $A$, while other products and +powers are matrix products and powers. -In the case where $\mu$ is not specified, the model has a BRST symmetry whose -Ward identities give $D=\hat\beta R$ and $G=-R$ -\cite{Annibale_2004_Coexistence, Kent-Dobias_2023_How}. Using these relations, -the effective action becomes particularly simple: +In the case where $\mu$ is not specified, we can make use of the BRST symmetry +of Appendix~\ref{sec:brst} whose Ward identities give $D=\hat\beta R$ and +$G=-R$. Using these relations, the effective action becomes particularly +simple: \begin{equation} - \mathcal S(\hat\beta, C, R) + \mathcal S_\mathrm{KR}(\hat\beta, C, R) = \hat\beta E - +\lim_{n\to0}\frac1n\left[ - -\frac\alpha2\log\det\left[ - I-\hat\beta f(C)(I-R\odot f'(C))^{-1} + +\frac12\lim_{n\to0}\frac1n\Big( + \log\det(I+\hat\beta CR^{-1}) + -\alpha\log\det\left[ + I-\hat\beta f(C)\big(I-R\odot f'(C)\big)^{-1} \right] - +\frac12\log\det(I+\hat\beta CR^{-1}) - \right] + \Big) \end{equation} -This effective action is general for arbitrary matrices $C$ and $R$. When using -a replica symmetric ansatz of $C_{ab}=\delta_{ab}+c_0(1-\delta_{ab})$ and +This effective action is general for arbitrary matrices $C$ and $R$, and +therefore arbitrary \textsc{rsb} order. When using a replica symmetric ansatz +of $C_{ab}=\delta_{ab}+c_0(1-\delta_{ab})$ and $R_{ab}=r\delta_{ab}+r_0(1-\delta_{ab})$, the resulting function of $\hat\beta$, $c_0$, $r$, and $r_0$ is \begin{equation} \begin{aligned} - \mathcal S= + &\mathcal S_\mathrm{KR}(\hat\beta,c_0,r,r_0)= \hat\beta E - -\frac\alpha 2\left[ - \log\left(1-\frac{\hat\beta\big(f(1)-f(c_0)\big)}{1-rf'(1)+r_0f'(c_0)}\right) - -\frac{\hat\beta f(c_0)+r_0f'(c_0)}{ - 1-\hat\beta\big(f(1)-f(c_0)\big)-rf'(1)+rf'(c_0) - }+\frac{r_0f'(c_0)}{1-rf'(1)+r_0f'(c_0)} - \right] \\ +\frac12\left[ \log\left(1+\frac{\hat\beta(1-c_0)}{r-r_0}\right) +\frac{\hat\beta c_0+r_0}{\hat\beta(1-c_0)+r-r_0} -\frac{r_0}{r-r_0} \right] + \\ + &\qquad-\frac\alpha 2\left[ + \log\left(1-\frac{\hat\beta\big(f(1)-f(c_0)\big)}{1-rf'(1)+r_0f'(c_0)}\right) + -\frac{\hat\beta f(c_0)+r_0f'(c_0)}{ + 1-\hat\beta\big(f(1)-f(c_0)\big)-rf'(1)+rf'(c_0) + }+\frac{r_0f'(c_0)}{1-rf'(1)+r_0f'(c_0)} + \right] \end{aligned} \end{equation} +\end{widetext} When $f(0)=0$ as in the cases directly studied in this work, this further -simplifies as $c_0=r_0=0$. Extremizing this expression with respect to the +simplifies as $c_0=r_0=0$. The effective action is then +\begin{equation} + \mathcal S_\mathrm{KR}(\hat\beta,r)= + \hat\beta E + +\frac12 + \log\left(1+\frac{\hat\beta}{r}\right) + -\frac\alpha 2 + \log\left(1-\frac{\hat\beta f(1)}{1-rf'(1)}\right) +\end{equation} +Extremizing this expression with respect to the order parameters $\hat\beta$ and $r$ produces the red line of dominant minima shown in Fig.~\ref{fig:ls.complexity}. -\end{widetext} - \bibliography{marginal} \end{document} |