summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJaron Kent-Dobias <jaron@kent-dobias.com>2024-06-11 17:43:20 +0200
committerJaron Kent-Dobias <jaron@kent-dobias.com>2024-06-11 17:43:20 +0200
commitbddf631115ea509dc0b68d88ef7114c1ad4a2b28 (patch)
treef38c08b17dcef054b62a37419b590e5a5ddb4184
parent1fa5d6b6f4c695b6112fde1f2c544f8523c3caa7 (diff)
downloadmarginal-bddf631115ea509dc0b68d88ef7114c1ad4a2b28.tar.gz
marginal-bddf631115ea509dc0b68d88ef7114c1ad4a2b28.tar.bz2
marginal-bddf631115ea509dc0b68d88ef7114c1ad4a2b28.zip
More writing.
-rw-r--r--marginal.tex144
1 files changed, 89 insertions, 55 deletions
diff --git a/marginal.tex b/marginal.tex
index 6c28e08..39adc40 100644
--- a/marginal.tex
+++ b/marginal.tex
@@ -6,6 +6,14 @@
\usepackage{newtxtext,newtxmath}
\usepackage{bbold,anyfontsize}
\usepackage[dvipsnames]{xcolor}
+\usepackage[
+ colorlinks=true,
+ urlcolor=Black,
+ citecolor=Black,
+ filecolor=Black,
+ linkcolor=Black
+]{hyperref} % ref and cite links with pretty colors
+
\begin{document}
@@ -355,6 +363,8 @@ that $\hat\lambda=0$.
\section{Marginal complexity in random landscapes}
+\subsection{Marginal complexity from Kac--Rice}
+
The situation in the study of random landscapes is often as follows: an
ensemble of smooth functions $H:\mathbb R^N\to\mathbb R$ define random
landscapes, often with their configuration space subject to one or more
@@ -378,18 +388,18 @@ with respect to $\mathbf x$ and $\pmb\omega=\{\omega_1,\ldots,\omega_r\}$. The c
=\partial\partial H(\mathbf x)+\sum_{i=1}^r\omega_i\partial\partial g_i(\mathbf x)
\end{align}
The number of stationary points in a landscape for a particular realization $H$ is found by integrating over the Kac--Rice measure
-\begin{equation}
- d\mu_H(\mathbf x,\pmb\omega)=d\mathbf x\,d\pmb\omega\,\delta\big(\nabla H(\mathbf x,\pmb\omega)\big)\,\delta\big(\mathbf g(\mathbf x)\big)\,\big|\det\operatorname{Hess}H(\mathbf x,\pmb\omega)\big|
+\begin{equation} \label{eq:kac-rice.measure}
+ d\nu_H(\mathbf x,\pmb\omega)=d\mathbf x\,d\pmb\omega\,\delta\big(\nabla H(\mathbf x,\pmb\omega)\big)\,\delta\big(\mathbf g(\mathbf x)\big)\,\big|\det\operatorname{Hess}H(\mathbf x,\pmb\omega)\big|
\end{equation}
with a $\delta$-function of the gradient and the constraints ensuring that we
count valid stationary points, and the Hessian entering in the determinant as
the Jacobian of the argument to the $\delta$-function. It is usually more
interesting to condition the count on interesting properties of the stationary
points, like the energy and spectrum trace,
-\begin{equation}
+\begin{equation} \label{eq:kac-rice.measure.2}
\begin{aligned}
- &d\mu_H(\mathbf x,\pmb\omega\mid E,\mu) \\
- &\quad=d\mu_H(\mathbf x,\pmb\omega)\,
+ &d\nu_H(\mathbf x,\pmb\omega\mid E,\mu) \\
+ &\quad=d\nu_H(\mathbf x,\pmb\omega)\,
\delta\big(NE-H(\mathbf x)\big)
\,\delta\big(N\mu-\operatorname{Tr}\operatorname{Hess}H(\mathbf x,\pmb\omega)\big)
\end{aligned}
@@ -399,8 +409,8 @@ We further want to control the value of the minimum eigenvalue of the Hessian at
\begin{equation}
\begin{aligned}
&\mathcal N_H(E,\mu,\lambda^*)
- =\int d\mu_H(\mathbf x,\pmb\omega\mid E,\mu)\,\delta\big(N\lambda^*-\lambda_\mathrm{min}(\operatorname{Hess}H(\mathbf x,\pmb\omega))\big) \\
- &=\lim_{\beta\to\infty}\int d\mu_H(\mathbf x,\pmb\omega\mid E,\mu)
+ =\int d\nu_H(\mathbf x,\pmb\omega\mid E,\mu)\,\delta\big(N\lambda^*-\lambda_\mathrm{min}(\operatorname{Hess}H(\mathbf x,\pmb\omega))\big) \\
+ &=\lim_{\beta\to\infty}\int d\nu_H(\mathbf x,\pmb\omega\mid E,\mu)
\frac{d\mathbf s\,\delta(N-\mathbf s^T\mathbf s)\delta(\mathbf s^T\partial\mathbf g(\mathbf x))e^{-\beta\mathbf s^T\operatorname{Hess}H(\mathbf x,\pmb\omega)\mathbf s}}
{\int d\mathbf s'\,\delta(N-\mathbf s'^T\mathbf s')\delta(\mathbf s'^T\partial\mathbf g(\mathbf x))e^{-\beta\mathbf s'^T\operatorname{Hess}H(\mathbf x,\pmb\omega)\mathbf s'}}
\delta\big(N\lambda^*-\mathbf s^T\operatorname{Hess}H(\mathbf x,\pmb\omega)\mathbf s\big)
@@ -424,7 +434,7 @@ again to treat each of the normalizations in the numerator. This leads to the ex
\begin{equation} \label{eq:min.complexity.expanded}
\begin{aligned}
\Sigma_{\lambda^*}(E,\mu)
- &=\lim_{\beta\to\infty}\lim_{n\to0}\frac1N\frac\partial{\partial n}\int\prod_{a=1}^n\Bigg[d\mu_H(\mathbf x_a,\pmb\omega_a\mid E,\mu)\,\delta\big(N\lambda^*-(\mathbf s_a^1)^T\operatorname{Hess}H(\mathbf x_a,\pmb\omega_a)\mathbf s_a^1\big)\\
+ &=\lim_{\beta\to\infty}\lim_{n\to0}\frac1N\frac\partial{\partial n}\int\prod_{a=1}^n\Bigg[d\nu_H(\mathbf x_a,\pmb\omega_a\mid E,\mu)\,\delta\big(N\lambda^*-(\mathbf s_a^1)^T\operatorname{Hess}H(\mathbf x_a,\pmb\omega_a)\mathbf s_a^1\big)\\
&\hspace{12em}\times\lim_{m_a\to0}
\left(\prod_{\alpha=1}^{m_a} d\mathbf s_a^\alpha
\,\delta\big(N-(\mathbf s_a^\alpha)^T\mathbf s_a^\alpha\big)
@@ -450,7 +460,11 @@ Finally, the marginal complexity is defined by evaluating the complexity conditi
Several elements of the computation of the marginal complexity, and indeed the
ordinary dominant complexity, follow from the formulae of the above section in
-the same way.
+the same way. The physicists' approach to this problem seeks to convert all of
+the Kac--Rice measure defined in \eqref{eq:kac-rice.measure} and
+\eqref{eq:kac-rice.measure.2} into elements of an exponential integral over
+configuration space. To begin with, all Dirac $\delta$ functions are
+expressed using their Fourier representation, with
\begin{align}
\label{eq:delta.grad}
&\delta\big(\nabla H(\mathbf x_a,\pmb\omega_a)\big)
@@ -458,30 +472,38 @@ the same way.
\label{eq:delta.energy}
&\delta\big(NE-H(\mathbf x_a)\big)
=\int\frac{d\hat\beta_a}{2\pi}e^{\hat\beta_a(NE-H(\mathbf x_a))} \\
- &\delta\big(N\lambda^*-\mathbf s^T\operatorname{Hess}H(\mathbf x_a,\pmb\omega)\mathbf s\big)
\label{eq:delta.eigen}
- =\int\frac{d\hat\lambda_a}{2\pi}e^{\hat\lambda_a(N\lambda^*-\mathbf s^T\operatorname{Hess}H(\mathbf x_a,\pmb\omega)\mathbf s)}
+ &\begin{aligned}
+ &\delta\big(N\lambda^*-(\mathbf s_a^1)^T\operatorname{Hess}H(\mathbf x_a,\pmb\omega)\mathbf s_a^1\big) \\
+ &\qquad\qquad\qquad=\int\frac{d\hat\lambda_a}{2\pi}e^{\hat\lambda_a(N\lambda^*-(\mathbf s_a^1)^T\operatorname{Hess}H(\mathbf x_a,\pmb\omega)\mathbf s_a^1)}
+ \end{aligned}
\end{align}
-
-Here we will merely sketch the steps that are standard. We start by translating elements of the Kac--Rice measure into terms more familiar to physicists. This means writing \eqref{eq:delta.grad}, \eqref{eq:delta.energy}, and \eqref{eq:delta.eigen}
-for the Dirac $\delta$ functions. At this point we will also discuss an
-important step we will use repeatedly in this paper: to drop the absolute value
-signs around the determinant in the Kac--Rice measure. This can potentially
-lead to severe problems with the complexity. However, it is a justified step
-when the parameters of the problem, i.e., $E$, $\mu$, and $\lambda^*$ put us in
-a regime where the exponential majority of stationary points have the same
-index. This is true for maxima and minima, and for saddle points whose spectra have a strictly positive bulk with a fixed number of negative
-outliers. Dropping the absolute value sign allows us to write
-\begin{equation}
+To do this we have introduced auxiliary fields $\hat{\mathbf x}_a$,
+$\hat\beta_a$, and $\hat\lambda_a$. Since the permutation symmetry of vector
+elements is preserved in \textsc{rsb} order, the order parameters $\hat\beta$
+and $\hat\lambda$ will quickly lose their indices, since they will ubiquitously
+be constant over the replicas at the eventual saddle point solution.
+
+We would like to make a similar treatment of the determinant of the Hessian
+that appears in \eqref{eq:kac-rice.measure}. The standard approach is to drop
+the absolute value function around the determinant. This can potentially lead
+to severe problems with the complexity. However, it is a justified step when
+the parameters of the problem, i.e., $E$, $\mu$, and $\lambda^*$ put us in a
+regime where the exponential majority of stationary points have the same index.
+This is true for maxima and minima, and for saddle points whose spectra have a
+strictly positive bulk with a fixed number of negative outliers. It is in
+particular a safe operation for this problem of marginal minima, which lie
+right at the edge of disaster. Dropping the absolute value sign allows us to
+write
+\begin{equation} \label{eq:determinant}
\det\operatorname{Hess}H(\mathbf x_a, \pmb\omega_a)
- =\int d\pmb\eta_a\,d\bar{\pmb\eta}_a\,e^{\bar{\pmb\eta}_a^T\operatorname{Hess}H(\mathbf x_a,\pmb\omega)\pmb\eta_a}
+ =\int d\bar{\pmb\eta}_a\,d\pmb\eta_a\,e^{-\bar{\pmb\eta}_a^T\operatorname{Hess}H(\mathbf x_a,\pmb\omega)\pmb\eta_a}
\end{equation}
for $N$-dimensional Grassmann variables $\bar{\pmb\eta}_a$ and $\pmb\eta_a$. For
the spherical models this step is unnecessary, since there are other ways to
treat the determinant keeping the absolute value signs, as in previous works
-\cite{Folena_2020_Rethinking, Kent-Dobias_2023_How}. However, since other of
-our examples are for models where the same techniques are impossible, it is
-useful to see the fermionic method in action in this simple case.
+\cite{Folena_2020_Rethinking, Kent-Dobias_2023_How}. However, other of
+our examples are for models where the same techniques are impossible.
For the cases studied here, fixing the trace results in a relationship
between $\mu$ and the Lagrange multipliers enforcing the constraints. This is
@@ -556,6 +578,7 @@ which encodes various aspects of the complexity problem, and the measures
\\
d\pmb\omega_a&=\prod_{i=1}^rd\omega_{ai}\,\delta\big(N\mu-\omega_{ai}\partial\partial g_i(\mathbf x_a)\big)
\end{align}
+that collect the individual measures of the various fields embedded in the superfield.
\end{widetext}
With this way of writing the replicated count, the problem of marginal
complexity temporarily takes the schematic form of an equilibrium calculation
@@ -569,21 +592,21 @@ of the calculation, terms involving the superspace must be expanded.
\subsection{Spherical spin glasses}
The spherical spin glasses are a family of models that encompass every
-isotropic Gaussian field on the hypersphere $0=\mathbf x^T\mathbf x-N$ for
-$\mathbf x\in\mathbb R^N$. One can consider the models as defined by centered Gaussian functions $H$ such that the covariance between two points in the configuration space is
+isotropic Gaussian field on the hypersphere defined by all $\mathbf x\in\mathbb R^N$ such that $0=\mathbf x^T\mathbf x-N$. One can consider the models as defined by centered Gaussian functions $H$ such that the covariance between two points in the configuration space is
\begin{equation}
\overline{H(\mathbf x)H(\mathbf x')}=Nf\left(\frac{\mathbf x^T\mathbf x'}N\right)
\end{equation}
for some function $f$ with positive series coefficients. Such functions can be considered to be made up of all-to-all tensorial interactions, with
\begin{equation}
H(\mathbf x)
- =\sum_{p=0}^\infty\frac{\sqrt{f^{(p)}(0)}}{2N^{p-1}}J_{i_1\cdots i_p}x_{i_1}\cdots x_{i_p}
+ =\sum_{p=0}^\infty\frac1{p!}\sqrt{\frac{f^{(p)}(0)}{N^{p-1}}}
+ \sum_{i_1\cdots i_p}^NJ_{i_1\cdots i_p}x_{i_1}\cdots x_{i_p}
\end{equation}
and the elements of the tensors $J$ being independently distributed with the
unit normal distribution.
The marginal optima of these models can be studied without the methods
-described here, and have been in the past \cite{Folena_2020_Rethinking,
+introduced in this paper, and have been in the past \cite{Folena_2020_Rethinking,
Kent-Dobias_2023_How}. First, these models are Gaussian, so at large $N$ the
Hessian is statistically independent of the gradient and energy
\cite{Bray_2007_Statistics}. Therefore, conditioning the Hessian can be done
@@ -593,11 +616,15 @@ class with the same width of the spectrum $\mu_\mathrm m=2\sqrt{f''(1)}$.
Therefore, all marginal optima in these systems have the same constant shift
$\mu=\pm\mu_\mathrm m$. Despite the fact the complexity of marginal optima is
well known by simpler methods, it is instructive to carry through the
-calculation for this case, since we will something about its application in
+calculation for this case, since we will learn something about its application in
more nontrivial settings.
The procedure to treat the complexity of the spherical models has been made in
-detail elsewhere \cite{Kent-Dobias_2023_How}.
+detail elsewhere \cite{Kent-Dobias_2023_How}. Here we make only a sketch of the
+steps involved. First the substitutions \eqref{eq:delta.grad},
+\eqref{eq:delta.energy}, and \eqref{eq:delta.eigen} are made to convert the
+Dirac $\delta$ functions into exponential integrals, and the substitution
+\eqref{eq:determinant} is made to likewise convert the determinant.
Once these substitutions have been made, the entire expression
\eqref{eq:min.complexity.expanded} is an exponential integral whose argument is
@@ -617,23 +644,40 @@ The result is an integral that only depends on the many vector variables we
have introduced through their scalar products with each other. We therefore make a change of variables in the integration from those vectors to matrices that encode their possible scalar products. These matrices are
\begin{equation} \label{eq:order.parameters}
\begin{aligned}
- C_{ab}=\frac1N\mathbf x_a\cdot\mathbf x_b
- &&
- R_{ab}=-i\frac1N\mathbf x_a\cdot\hat{\mathbf x}_b
+ &C_{ab}=\frac1N\mathbf x_a\cdot\mathbf x_b
+ \qquad\qquad
+ &R_{ab}=-i\frac1N\mathbf x_a\cdot\hat{\mathbf x}_b&
\\
- D_{ab}=\frac1N\hat{\mathbf x}_a\cdot\hat{\mathbf x}_b
- &&
- F_{ab}=\frac1N\bar{\pmb\eta}_a^T\pmb\eta_b
+ &D_{ab}=\frac1N\hat{\mathbf x}_a\cdot\hat{\mathbf x}_b
+ &G_{ab}=\frac1N\bar{\pmb\eta}_a^T\pmb\eta_b&
\\
- A_{ab}^{cd}=\frac1N\mathbf s_a^c\cdot\mathbf s_b^d
- &&
- X^c_{ab}=\frac1N\mathbf x_a\cdot\mathbf s_b^c
+ &A_{ab}^{\alpha\gamma}=\frac1N\mathbf s_a^\alpha\cdot\mathbf s_b^\gamma
+ &X^\alpha_{ab}=\frac1N\mathbf x_a\cdot\mathbf s_b^\alpha&
\\
- \hat X^c_{ab}=\frac1N\hat{\mathbf x}_a\cdot\mathbf s_b^c
+ &\hat X^\alpha_{ab}=-i\frac1N\hat{\mathbf x}_a\cdot\mathbf s_b^\alpha&&
\end{aligned}
\end{equation}
Order parameters that mix the normal and Grassmann variables generically vanish
-in these settings \cite{Kurchan_1992_Supersymmetry}.
+in these settings and we don't consider them here \cite{Kurchan_1992_Supersymmetry}.
+This transformation changes the measure of the integral, with
+\begin{equation}
+ \begin{aligned}
+ &\prod_{a=1}^nd\mathbf x_a\,\frac{d\hat{\mathbf x}_a}{(2\pi)^N}\,d\bar{\pmb\eta}_a\,d\pmb\eta\,\prod_{\alpha=1}^{m_a}d\mathbf s_a^\alpha \\
+ &\quad=dC\,dR\,dD\,dG\,dA\,dX\,d\hat X\,(\det J)^{N/2}(\det G)^{-N/2}
+ \end{aligned}
+\end{equation}
+where $J$ is the Jacobian of the transformation and takes the form
+\begin{equation} \label{eq:coordinate.jacobian}
+ J=\begin{bmatrix}
+ C&iR&X^1&\cdots&X^n \\
+ iR&D&i\hat X^1&\cdots&i\hat X^m\\
+ (X^1)^T&i(\hat X^1)^T&A^{11}&\cdots&A^{1n}\\
+ \vdots&\vdots&\vdots&\ddots&\vdots\\
+ (X^n)^T&i(\hat X^n)^T&A^{n1}&\cdots&A^{nn}
+ \end{bmatrix}
+\end{equation}
+and the contribution of the Grassmann integrals produces its own inverted
+Jacobian.
After these steps, which follow identically to those more carefully outlined in
the cited papers \cite{Folena_2020_Rethinking, Kent-Dobias_2023_How}, we arrive at a form of the integral as over an effective action
@@ -641,26 +685,16 @@ the cited papers \cite{Folena_2020_Rethinking, Kent-Dobias_2023_How}, we arrive
\begin{aligned}
&\Sigma_{\lambda^*}(E,\mu)
=\lim_{\beta\to\infty}\lim_{n\to0}\frac1N\frac\partial{\partial n}
- \int dC\,dR\,dD\,dF \\
+ \int dC\,dR\,dD\,dG \\
&dA\,dX\,d\hat X\,
d\hat\beta\,d\hat\lambda\,e^{N
- n\mathcal S_\mathrm{KR}(\hat\beta,\omega,C,R,D,F)
+ n\mathcal S_\mathrm{KR}(\hat\beta,\omega,C,R,D,G)
+N\mathcal S_\beta(\omega,\hat\lambda,A,X,\hat X)
- +\frac12N\log\det J
}
\end{aligned}
\end{equation}
where the matrix $J$ is the Jacobian associated with the change of variables
from the $\mathbf x$, $\hat{\mathbf x}$, and $\mathbf s$, and has the form
-\begin{equation} \label{eq:coordinate.jacobian}
- J=\begin{bmatrix}
- C&iR&X^1&\cdots&X^n \\
- iR&D&i\hat X^1&\cdots&i\hat X^m\\
- (X^1)^T&i(\hat X^1)^T&A^{11}&\cdots&A^{1n}\\
- \vdots&\vdots&\vdots&\ddots&\vdots\\
- (X^n)^T&i(\hat X^n)^T&A^{n1}&\cdots&A^{nn}
- \end{bmatrix}
-\end{equation}
The structure of the integrand, with the effective action split between two
terms which only share a dependence on the Lagrange multiplier $\omega$ that
enforces the constraint, is generic to Gaussian problems. This is the