summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJaron Kent-Dobias <jaron@kent-dobias.com>2024-06-27 12:26:23 +0200
committerJaron Kent-Dobias <jaron@kent-dobias.com>2024-06-27 12:26:23 +0200
commit54a0abc832b0df79ad1bf170c84684850c8da9b4 (patch)
tree65640105da30558a7124ef43a04af4a86966575d
parentabab4928e793343d8d7fc917648cbeb43610b93f (diff)
downloadmarginal-54a0abc832b0df79ad1bf170c84684850c8da9b4.tar.gz
marginal-54a0abc832b0df79ad1bf170c84684850c8da9b4.tar.bz2
marginal-54a0abc832b0df79ad1bf170c84684850c8da9b4.zip
Writing in the sum of squares section.
-rw-r--r--marginal.tex60
1 files changed, 39 insertions, 21 deletions
diff --git a/marginal.tex b/marginal.tex
index 099f603..fc89c5a 100644
--- a/marginal.tex
+++ b/marginal.tex
@@ -1275,7 +1275,8 @@ landscape: the problem of random nonlinear least squares optimization. Though,
for reasons we will see it is easier to make predictions for random nonlinear
\emph{most} squares, i.e., the problem of maximizing the sum of squared terms.
We again take a spherical configuration space with $\mathbf x\in S^{N-1}$ and $0=g(\mathbf x)=\frac12(\|\mathbf x\|^2-N)$ as in the spherical spin glasses, and consider a set
-of $M$ random functions $V_k:\mathbf S^{N-1}\to\mathbb R$ that are centered Gaussians with covariance
+of $M=\alpha N$ random functions $V_k:\mathbf S^{N-1}\to\mathbb R$ that are
+centered Gaussians with covariance
\begin{equation}
\overline{V_i(\mathbf x)V_j(\mathbf x')}=\delta_{ij}f\left(\frac{\mathbf x\cdot\mathbf x'}N\right)
\end{equation}
@@ -1344,12 +1345,14 @@ $\lambda^*$ is given by
&=\int d\hat\beta\,d\hat\lambda\prod_{a=1}^n\lim_{m_a\to0}\prod_{\alpha=1}^{m_a}d\pmb\phi_a^\alpha
\exp\left\{
\delta^{\alpha1}N(\hat\beta E+\hat\lambda\lambda^*)
- -\frac12\int d1\,d2\,B^\alpha(1,2)\left[\sum_{k=1}^MV_k(\pmb\phi_a^\alpha)^2
- -\mu(\|\pmb\phi_a^\alpha\|^2-N)\right]
+ -\frac12\int d1\,d2\,\left[B^\alpha(1,2)\sum_{k=1}^MV_k(\pmb\phi_a^\alpha(1,2))^2
+ -\mu\|\pmb\phi_a^\alpha(1,2)\|^2\right]
\right\}
\end{aligned}
\end{equation}
-The first step to evaluate this expression is to linearize the dependence on the random functions $V$. This is accomplished by inserting into the integral a Dirac $\delta$ function fixing the value of the energy for each replica, or
+The first step to evaluate this expression is to linearize the dependence on
+the random functions $V$. This is accomplished by inserting into the integral a
+Dirac $\delta$ function fixing the value of the energy for each replica, or
\begin{equation}
\delta\big(
V_k(\pmb\phi_a^\alpha(1,2))-v_{ka}^\alpha(1,2)
@@ -1363,38 +1366,49 @@ The first step to evaluate this expression is to linearize the dependence on the
where we have introduced auxiliary fields $\hat v$. With this inserted into the
integral, all other instances of $V$ are replaced by $v$, and the only
remaining dependence on the disorder is from the term $\hat vV$ arising from
-the Fourier representation of the Dirac $\delta$ function. This term is linear in $V$, and therefore the random functions can be averaged over to produce
+the Fourier representation of the Dirac $\delta$ function. This term is linear
+in $V$, and therefore the random functions can be averaged over to produce
\begin{equation}
\overline{
\exp\left[
- i\sum_{ka\alpha}\int d1\,d2\,\hat v_{ka}^\alpha(1,2)
+ i\sum_k^M\sum_a^n\sum_\alpha^{m_a}\int d1\,d2\,\hat v_{ka}^\alpha(1,2)
V_k(\pmb\phi_a^\alpha(1,2))
\right]
}
=
- -\frac N2\sum_{ab}^n\sum_{\alpha\gamma}^{m_a}\sum_k^{\alpha N}\int d1\,d2\,d3\,d4\,
- \hat v_{ka}^\alpha(1,2)f\big(\pmb\phi_a^\alpha(1,2)^T\pmb\phi_b^\gamma(3,4)\big)\hat v_{kb}^\gamma(3,4)
+ -\frac N2\sum_{ab}^n\sum_{\alpha\gamma}^{m_a}\sum_k^M\int d1\,d2\,d3\,d4\,
+ \hat v_{ka}^\alpha(1,2)f\big(\pmb\phi_a^\alpha(1,2)\cdot\pmb\phi_b^\gamma(3,4)\big)\hat v_{kb}^\gamma(3,4)
\end{equation}
-The entire integrand is now quadratic in the $v$ and $\hat v$ with the kernel
+\end{widetext}
+The entire integrand is now factorized in the indices $k$ and quadratic in the
+$v$ and $\hat v$ with the kernel
\begin{equation}
\begin{bmatrix}
- B_a^\alpha(1,2)\delta(1,3)\delta(2,4)\delta_{ab}\delta^{\alpha\gamma} & i\delta(1,3)\,\delta(2,4) \delta_{ab}\delta^{\alpha\gamma}\\
- i\delta(1,3)\,\delta(2,4) \delta_{ab}\delta^{\alpha\gamma}& f\big(\pmb\phi_a^\alpha(1,2)^T\pmb\phi_b^\gamma(3,4)\big)
+ B^\alpha(1,2)\delta(1,3)\delta(2,4)\delta_{ab}\delta^{\alpha\gamma}
+ & i\delta(1,3)\,\delta(2,4) \delta_{ab}\delta^{\alpha\gamma}\\
+ i\delta(1,3)\,\delta(2,4) \delta_{ab}\delta^{\alpha\gamma}
+ & f\big(\pmb\phi_a^\alpha(1,2)\cdot\pmb\phi_b^\gamma(3,4)\big)
\end{bmatrix}
\end{equation}
The integration over the $v$ and $\hat v$ results in a term in the effective action of the form
-\begin{equation}
- -\frac M2\log\operatorname{sdet}\left(
- \delta(1,3)\,\delta(2,4) \delta_{ab}\delta^{\alpha\gamma}
- +B_a^\alpha(1,2)f\big(\pmb\phi_a^\alpha(1,2)^T\pmb\phi_b^\gamma(3,4)\big)
- \right)
+\begin{equation} \label{eq:sdet.1}
+ \begin{aligned}
+ &-\frac M2\log\operatorname{sdet}\bigg[
+ \delta(1,3)\,\delta(2,4) \delta_{ab}\delta^{\alpha\gamma} \\
+ &\hspace{7em}+B^\alpha(1,2)f\big(\pmb\phi_a^\alpha(1,2)\cdot\pmb\phi_b^\gamma(3,4)\big)
+ \bigg]
+ \end{aligned}
\end{equation}
-When expanded, this supermatrix is constructed of the scalar products of the
-real and Grassmann vectors that make up $\pmb\phi$. The change of variables to
-these order parameters again results in the Jacobian of \eqref{eq:coordinate.jacobian}, contributing
+When expanded, the supermatrix
+$\pmb\phi_a^\alpha(1,2)\cdot\pmb\phi_b^\gamma(3,4)$ is constructed of the
+scalar products of the real and Grassmann vectors that make up $\pmb\phi$. The
+change of variables to these order parameters again results in the Jacobian of
+\eqref{eq:coordinate.jacobian}, contributing
\begin{equation}
\frac N2\log\det J(C,R,D,Q,X,\hat X)-\frac N2\log\det G^2
\end{equation}
+to the effective action.
+
Up to this point, the expressions above are general and independent of a given
ansatz. However, we expect that the order parameters $X$ and $\hat X$ are zero,
since this case is isotropic. Applying this ansatz here avoids a dramatically
@@ -1405,7 +1419,9 @@ that $Q^{\alpha\gamma}=Q_{aa}^{\alpha\gamma}$ independently of the index $a$,
implying that correlations in the tangent space of typical stationary points
are the same.
-Given these simplifying forms of the ansatz, taking the superdeterminant yields
+Given these simplifying forms of the ansatz, taking the superdeterminant in
+\eqref{eq:sdet.1} yields
+\begin{widetext}
\begin{equation}
\begin{aligned}
\log\det\left\{
@@ -1418,6 +1434,7 @@ Given these simplifying forms of the ansatz, taking the superdeterminant yields
-2\log\det(I+G\odot f'(C))
\end{aligned}
\end{equation}
+\end{widetext}
where once again $\odot$ is the Hadamard product and $A^{\circ n}$ gives the
Hadamard power of $A$. We can already see one substantive difference between
the structure of this problem and that of the spherical models: the effective
@@ -1438,6 +1455,7 @@ We further take a planted replica symmetric structure for the matrix $Q$,
identical to that in \eqref{eq:Q.structure}. The resulting effective action is
the same as if we had made an annealed calculation in the complexity, though
the previous expressions are general.
+\begin{widetext}
\begin{equation}
\begin{aligned}
\mathcal S_\beta
@@ -1861,7 +1879,7 @@ dominant optima in the random nonlinear least squares problem of section
\ref{sec:least.squares}. While in this paper we only treat problems with a
replica symmetric structure, formulas for the effective action are generic to
any structure and provide a starting point for analyzing the challenging
-full-RSB setting.
+full \textsc{rsb} setting.
Using the $\mathbb R^{N|2}$ superfields
\begin{equation}