From b8597c6627fdd05d0aebf79167e7629882983d3f Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Fri, 25 Oct 2024 15:19:26 +0200 Subject: Replaced arXiv reference with published article: Erba_2024_Quenches --- marginal.bib | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/marginal.bib b/marginal.bib index 62b9abc..3e9eb8b 100644 --- a/marginal.bib +++ b/marginal.bib @@ -277,18 +277,6 @@ booktitle = {2022 IEEE 63rd Annual Symposium on Foundations of Computer Science (FOCS)} } -@unpublished{Erba_2024_Quenches, - author = {Erba, Vittorio and Behrens, Freya and Krzakala, Florent and Zdeborová, Lenka}, - title = {Quenches in the {Sherrington-Kirkpatrick} model}, - year = {2024}, - month = {may}, - url = {http://arxiv.org/abs/2405.04267v2}, - archiveprefix = {arXiv}, - eprint = {2405.04267v2}, - eprintclass = {cond-mat.dis-nn}, - eprinttype = {arxiv} -} - @article{Foini_2012_On, author = {Foini, Laura and Krzakala, Florent and Zamponi, Francesco}, title = {On the relation between kinetically constrained models of glass dynamics and the random first-order transition theory}, @@ -1079,3 +1067,18 @@ isbn = {0198509235} } +@article{Erba_2024_Quenches, + author = {Erba, Vittorio and Behrens, Freya and Krzakala, Florent and Zdeborová, Lenka}, + title = {Quenches in the {Sherrington–Kirkpatrick} model}, + journal = {Journal of Statistical Mechanics: Theory and Experiment}, + publisher = {IOP Publishing}, + year = {2024}, + month = {August}, + number = {8}, + volume = {2024}, + pages = {083302}, + url = {http://dx.doi.org/10.1088/1742-5468/ad685a}, + doi = {10.1088/1742-5468/ad685a}, + issn = {1742-5468} +} + -- cgit v1.2.3-70-g09d2 From ce8ff3c8932af48b43a3aacdf6b4f34f100c6d8e Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Fri, 25 Oct 2024 15:19:52 +0200 Subject: Fixed mistake in the definiton of the supermatrix of a superoperator, Appendix A --- marginal.tex | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/marginal.tex b/marginal.tex index 508b674..8efa79f 100644 --- a/marginal.tex +++ b/marginal.tex @@ -1772,21 +1772,30 @@ Integrals involving superfields contracted into such operators result in schemat \end{equation} where the usual role of the determinant is replaced by the superdeterminant. The superdeterminant can be defined using the ordinary determinant by writing a -block version of the matrix $M$: if $\mathbf e(1)=\{1,\bar\theta_1\theta_1\}$ is +block version of the matrix $M$. If $\mathbf e(1)=\{1,i\bar\theta_1\theta_1\}$ is the basis vector of the even subspace of the superspace and $\mathbf -f(1)=\{\bar\theta_1,\theta_1\}$ is that of the odd subspace, then we can form a +f(1)=\{i\bar\theta_1,i\theta_1\}$ is that of the odd subspace, dual bases $\mathbf e^\dagger(1)=\{i\bar\theta_1\theta_1,1\}$ and $\mathbf f^\dagger(1)=\{\theta_1,-\bar\theta_1\}$ can be defined by the requirement that +\begin{align} + \int d1\,\mathbf e(1)\mathbf e^\dagger(1)=iI + && + \int d1\,\mathbf f(1)\mathbf f^\dagger(1)=iI \\ + \int d1\,\mathbf e(1)\mathbf f^\dagger(1)=0 + && + \int d1\,\mathbf f(1)\mathbf e^\dagger(1)=0 +\end{align} +With such bases and dual bases defined, we can form a block representation of $M$ in analogy to the matrix form of an operator in quantum mechanics by \begin{equation} \int d1\,d2\,\begin{bmatrix} - \mathbf e(1)M(1,2)\mathbf e(2)^T + \mathbf e(1)M(1,2)\mathbf e^\dagger(2) & - \mathbf e(1)M(1,2)\mathbf f(2)^T + \mathbf e(1)M(1,2)\mathbf f^\dagger(2) \\ - \mathbf f(1)M(1,2)\mathbf e(2)^T + \mathbf f(1)M(1,2)\mathbf e^\dagger(2) & - \mathbf f(1)M(1,2)\mathbf f(2)^T + \mathbf f(1)M(1,2)\mathbf f^\dagger(2) \end{bmatrix} - =\begin{bmatrix} + =i\begin{bmatrix} A & B \\ C & D \end{bmatrix} \end{equation} @@ -1802,7 +1811,12 @@ save for the inverse of $\det D$. Likewise, the supertrace of $M$ is is given by \end{equation} The same method can be used to calculate the superdeterminant and supertrace in arbitrary superspaces, where for $\mathbb R^{N|2D}$ each -basis has $2^{2D-1}$ elements. For instance, for $\mathbb R^{N|4}$ we have $\mathbf e(1,2)=\{1,\bar\theta_1\theta_1,\bar\theta_2\theta_2,\bar\theta_1\theta_2,\bar\theta_2\theta_1,\bar\theta_1\bar\theta_2,\theta_1\theta_2,\bar\theta_1\theta_1\bar\theta_2\theta_2\}$ and $\mathbf f(1,2)=\{\bar\theta_1,\theta_1,\bar\theta_2,\theta_2,\bar\theta_1\theta_1\bar\theta_2,\bar\theta_2\theta_2\theta_1,\bar\theta_1\theta_1\theta_2,\bar\theta_2\theta_2\theta_1\}$. +basis has $2^{2D-1}$ elements. For instance, for $\mathbb R^{N|4}$ we have +\begin{align} + &\mathbf e(1,2)=\{1,i\bar\theta_1\theta_1,i\bar\theta_2\theta_2,i\bar\theta_1\theta_2,i\bar\theta_2\theta_1,i\bar\theta_1\bar\theta_2,i\theta_1\theta_2,\bar\theta_1\theta_1\bar\theta_2\theta_2\}\notag \\ + &\mathbf f(1,2)=\{i\bar\theta_1,i\theta_1,i\bar\theta_2,i\theta_2,\bar\theta_1\theta_1\bar\theta_2,\bar\theta_2\theta_2\theta_1,\bar\theta_1\theta_1\theta_2,\bar\theta_2\theta_2\theta_1\} +\end{align} +with the dual bases defined analogously to those above. \section{BRST symmetry} \label{sec:brst} -- cgit v1.2.3-70-g09d2 From 3fdbfe8a8b79f810c173b7eaf657f6fd834d6c0b Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Fri, 25 Oct 2024 15:41:22 +0200 Subject: Clarified that eigenvalue integral relies on symmetry of matrix. --- marginal.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/marginal.tex b/marginal.tex index 8efa79f..ddd31da 100644 --- a/marginal.tex +++ b/marginal.tex @@ -141,7 +141,7 @@ at the bottom on the spectrum. \subsection{The general method} -Consider an $N\times N$ real matrix $A$. An arbitrary function $g$ of the +Consider an $N\times N$ real symmetric matrix $A$. An arbitrary function $g$ of the minimum eigenvalue of $A$ can be expressed using integrals over $\mathbf s\in\mathbb R^N$ as \begin{equation} \label{eq:λmin} -- cgit v1.2.3-70-g09d2 From 6aad3c973dd44c6e21022e1193f5d6c623d4e23b Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Fri, 25 Oct 2024 16:09:14 +0200 Subject: Fixed typo in integral equation for supermatrix multiplication. --- marginal.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/marginal.tex b/marginal.tex index ddd31da..fac7c48 100644 --- a/marginal.tex +++ b/marginal.tex @@ -1759,7 +1759,7 @@ like the super vector $\pmb\phi$ is made up of a linear combination of $N\times N$ regular or Grassmann matrices indexed by every nonvanishing combination of the Grassmann indices $\bar\theta_1,\theta_1,\bar\theta_2,\theta_2$. Such a supermatrix acts on supervectors by ordinary matrix multiplication and convolution in the Grassmann indices, i.e., \begin{equation} - (M\pmb\phi)(1)=\int d1\,M(1,2)\pmb\phi(2) + (M\pmb\phi)(1)=\int d2\,M(1,2)\pmb\phi(2) \end{equation} The identity supermatrix is given by \begin{equation} -- cgit v1.2.3-70-g09d2 From 157b8b12bdad4646773d1c596f99af6a1b4d9c9d Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Fri, 25 Oct 2024 16:11:21 +0200 Subject: Fixed grammatically bad sentence. --- marginal.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/marginal.tex b/marginal.tex index fac7c48..acd3d4a 100644 --- a/marginal.tex +++ b/marginal.tex @@ -1288,8 +1288,8 @@ $\epsilon$ is increased, the most common type of marginal minimum drifts toward points with $\omega_1>\omega_2$. Multispherical spin glasses may be an interesting platform for testing ideas -about which among the possible marginal minima can dynamics, -and cannot. In the limit where $\epsilon=0$ and the configurations of the +about which among the possible marginal minima can attract dynamics +and which cannot. In the limit where $\epsilon=0$ and the configurations of the two spheres are independent, the minima found dynamically should be marginal on both subspaces. Just because technically on the expanded configuration space the Cartesian product of a deep stable minimum on one sphere and a marginal minimum on the other is -- cgit v1.2.3-70-g09d2 From d73d3cdc03337fde998db900ed3232151e75f729 Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Fri, 25 Oct 2024 19:00:16 +0200 Subject: Clarified the definitions of marginal minima and pseudogap in the opening paragraphs --- marginal.tex | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/marginal.tex b/marginal.tex index acd3d4a..d9e1d47 100644 --- a/marginal.tex +++ b/marginal.tex @@ -61,7 +61,10 @@ dynamics would get stuck at a specific energy level, called the threshold energy. The threshold energy is the energy level at which level sets of the landscape transition from containing mostly saddle points to containing mostly minima. The level set associated with this threshold energy contains mostly \emph{marginal -minima}, or minima that have a pseudogap in the spectrum of their Hessian. +minima}, or minima whose Hessian matrix has a continuous spectral density over +all sufficiently small positive eigenvalues. In most circumstances the spectrum +is \emph{pseudogapped}, which means that the spectral density smoothly +approaches zero as zero eigenvalue is approached from above. However, recent work found that the threshold energy is not important even for simple gradient descent dynamics \cite{Folena_2020_Rethinking, Folena_2023_On, ElAlaoui_2020_Algorithmic}. -- cgit v1.2.3-70-g09d2 From 35c4e960648856414d3425eddb69881e9028d6f9 Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Mon, 28 Oct 2024 16:35:14 +0100 Subject: Clarified notation surrounding gradient and Hessian, and standardized arXiv bib entries. --- marginal.bib | 99 ++++++++++++++++++++++++++---------------------------------- marginal.tex | 13 ++++++-- 2 files changed, 53 insertions(+), 59 deletions(-) diff --git a/marginal.bib b/marginal.bib index 3e9eb8b..784d3f0 100644 --- a/marginal.bib +++ b/marginal.bib @@ -239,12 +239,10 @@ @unpublished{ElAlaoui_2020_Algorithmic, author = {El Alaoui, Ahmed and Montanari, Andrea}, title = {Algorithmic Thresholds in Mean Field Spin Glasses}, - year = {2020}, - month = {Sept}, url = {http://arxiv.org/abs/2009.11481v1}, archiveprefix = {arXiv}, date = {2020-09-24T04:22:42Z}, - eprint = {2009.11481v1}, + eprint = {2009.11481}, eprintclass = {cond-mat.stat-mech}, eprinttype = {arxiv}, primaryclass = {cond-mat.stat-mech} @@ -515,29 +513,23 @@ @unpublished{Huang_2023_Algorithmic, author = {Huang, Brice and Sellke, Mark}, title = {Algorithmic Threshold for Multi-Species Spherical Spin Glasses}, - year = {2023}, - month = {mar}, - url = {http://arxiv.org/abs/2303.12172v2}, + url = {http://arxiv.org/abs/2303.12172}, archiveprefix = {arXiv}, - date = {2023-03-21T20:09:08Z}, - eprint = {2303.12172v2}, + eprint = {2303.12172}, eprintclass = {math.PR}, - eprinttype = {arxiv}, - urldate = {2024-06-13T13:10:56.404805Z} + primaryclass = {math.PR}, + eprinttype = {arxiv} } @unpublished{Huang_2023_Strong, author = {Huang, Brice and Sellke, Mark}, title = {Strong Topological Trivialization of Multi-Species Spherical Spin Glasses}, - year = {2023}, - month = {aug}, - url = {http://arxiv.org/abs/2308.09677v2}, + url = {http://arxiv.org/abs/2308.09677}, archiveprefix = {arXiv}, - date = {2023-08-18T16:56:19Z}, - eprint = {2308.09677v2}, + eprint = {2308.09677}, eprintclass = {math.PR}, - eprinttype = {arxiv}, - urldate = {2024-06-13T13:07:13.561947Z} + primaryclass = {math.PR}, + eprinttype = {arxiv} } @article{Huang_2024_Optimization, @@ -600,13 +592,11 @@ @unpublished{Kamali_2023_Stochastic, author = {Kamali, Persia Jana and Urbani, Pierfrancesco}, title = {Stochastic Gradient Descent outperforms Gradient Descent in recovering a high-dimensional signal in a glassy energy landscape}, - year = {2023}, - month = {sep}, url = {http://arxiv.org/abs/2309.04788v2}, - note = {}, archiveprefix = {arXiv}, - eprint = {2309.04788v2}, + eprint = {2309.04788}, eprintclass = {cs.LG}, + primaryclass = {cs.LG}, eprinttype = {arxiv} } @@ -627,11 +617,10 @@ @unpublished{Kent-Dobias_2024_Algorithm-independent, author = {Kent-Dobias, Jaron}, title = {Algorithm-independent bounds on complex optimization through the statistics of marginal optima}, - year = {2024}, url = {https://arxiv.org/abs/2407.02092}, archiveprefix = {arXiv}, - eprint = {2407.02092}, - primaryclass = {cond-mat.dis-nn} + primaryclass = {cond-mat.dis-nn}, + eprint = {2407.02092} } @article{Kent-Dobias_2024_Arrangement, @@ -652,7 +641,6 @@ @unpublished{Kent-Dobias_2024_Conditioning, author = {Kent-Dobias, Jaron}, title = {Conditioning the complexity of random landscapes on marginal optima}, - year = {2024}, url = {https://arxiv.org/abs/2407.02082}, archiveprefix = {arXiv}, eprint = {2407.02082}, @@ -765,26 +753,23 @@ @unpublished{Montanari_2023_Solving, author = {Montanari, Andrea and Subag, Eliran}, title = {Solving overparametrized systems of random equations: I. Model and algorithms for approximate solutions}, - year = {2023}, - month = {jun}, url = {http://arxiv.org/abs/2306.13326v1}, note = {}, archiveprefix = {arXiv}, - eprint = {2306.13326v1}, + eprint = {2306.13326}, eprintclass = {math.PR}, + primaryclass = {math.PR}, eprinttype = {arxiv} } @unpublished{Montanari_2024_On, author = {Montanari, Andrea and Subag, Eliran}, title = {On {Smale}'s 17th problem over the reals}, - year = {2024}, - month = {may}, - url = {http://arxiv.org/abs/2405.01735v1}, - note = {}, + url = {http://arxiv.org/abs/2405.01735}, archiveprefix = {arXiv}, - eprint = {2405.01735v1}, + eprint = {2405.01735}, eprintclass = {cs.DS}, + primaryclass = {cs.DS}, eprinttype = {arxiv} } @@ -835,11 +820,9 @@ @unpublished{Parisi_1995-01_On, author = {Parisi, Giorgio}, title = {On the Statistical Properties of the Large Time Zero Temperature Dynamics of the {SK} Model}, - year = {1995}, - month = {jan}, - url = {http://arxiv.org/abs/cond-mat/9501045v1}, + url = {http://arxiv.org/abs/cond-mat/9501045}, archiveprefix = {arXiv}, - eprint = {cond-mat/9501045v1}, + eprint = {cond-mat/9501045}, eprinttype = {arxiv} } @@ -887,12 +870,11 @@ @unpublished{Shklovskii_2024_Half, author = {Shklovskii, B. I.}, title = {Half century of {Efros}-{Shklovskii} {Coulomb} gap. Romance with {Coulomb} interaction and disorder}, - year = {2024}, - month = {mar}, - url = {http://arxiv.org/abs/2403.19793v5}, + url = {http://arxiv.org/abs/2403.19793}, archiveprefix = {arXiv}, - eprint = {2403.19793v5}, + eprint = {2403.19793}, eprintclass = {cond-mat.mtrl-sci}, + primaryclass = {cond-mat.mtrl-sci}, eprinttype = {arxiv} } @@ -928,15 +910,12 @@ @unpublished{Subag_2021_TAP, author = {Subag, Eliran}, title = {{TAP} approach for multi-species spherical spin glasses {I}: general theory}, - year = {2021}, - month = {nov}, url = {http://arxiv.org/abs/2111.07132v1}, archiveprefix = {arXiv}, - date = {2021-11-13T15:21:40Z}, - eprint = {2111.07132v1}, + eprint = {2111.07132}, eprintclass = {math.PR}, - eprinttype = {arxiv}, - urldate = {2024-06-13T13:04:28.790463Z} + primaryclass = {math.PR}, + eprinttype = {arxiv} } @article{Subag_2023_TAP, @@ -992,25 +971,22 @@ @unpublished{Urbani_2024_Statistical, author = {Urbani, Pierfrancesco}, title = {Statistical physics of complex systems: glasses, spin glasses, continuous constraint satisfaction problems, high-dimensional inference and neural networks}, - year = {2024}, - month = {may}, - url = {http://arxiv.org/abs/2405.06384v1}, - note = {}, + url = {http://arxiv.org/abs/2405.06384}, archiveprefix = {arXiv}, - eprint = {2405.06384v1}, + eprint = {2405.06384}, eprintclass = {cond-mat.dis-nn}, + primaryclass = {cond-mat.dis-nn}, eprinttype = {arxiv} } @unpublished{Vivo_2024_Random, author = {Vivo, Pierpaolo}, title = {Random Linear Systems with Quadratic Constraints: from Random Matrix Theory to replicas and back}, - year = {2024}, - month = {jan}, - url = {http://arxiv.org/abs/2401.03209v2}, + url = {http://arxiv.org/abs/2401.03209}, archiveprefix = {arXiv}, - eprint = {2401.03209v2}, + eprint = {2401.03209}, eprintclass = {cond-mat.stat-mech}, + primaryclass = {cond-mat.stat-mech}, eprinttype = {arxiv} } @@ -1082,3 +1058,14 @@ issn = {1742-5468} } +@unpublished{Kent-Dobias_2024_On, + author = {Kent-Dobias, Jaron}, + title = {On the topology of solutions to random continuous constraint satisfaction problems}, + url = {http://arxiv.org/abs/2409.12781}, + archiveprefix = {arXiv}, + eprint = {2409.12781}, + eprintclass = {cond-mat.dis-nn}, + primaryclass = {cond-mat.dis-nn}, + eprinttype = {arxiv} +} + diff --git a/marginal.tex b/marginal.tex index d9e1d47..b9fabc9 100644 --- a/marginal.tex +++ b/marginal.tex @@ -469,8 +469,9 @@ extremizing the Lagrangian L(\mathbf x,\pmb\omega)=H(\mathbf x)+\sum_{i=1}^r\omega_ig_i(\mathbf x) \end{equation} with respect to $\mathbf x$ and the Lagrange multipliers -$\pmb\omega=\{\omega_1,\ldots,\omega_r\}$. The corresponding gradient and -Hessian of the energy associated with this constrained extremal problem are +$\pmb\omega=\{\omega_1,\ldots,\omega_r\}$. To write the gradient and Hessian of the energy, which are necessary to count stationary points, care must be taken to ensure they are constrained to the tangent space of the configuration manifold. For our purposes, the Lagrangian formalism offers a solution: the gradient $\nabla H:\mathbb R^N\times\mathbb R^r\to\mathbb R^N$ and +Hessian $\operatorname{Hess} H:\mathbb R^N\times\mathbb R^r\to\mathbb R^{N\times N}$ of the energy $H$ can be written as the simple vector derivatives of +the Lagrangian $L$, with \begin{align} &\nabla H(\mathbf x,\pmb\omega) =\partial L(\mathbf x,\pmb\omega) @@ -483,7 +484,13 @@ Hessian of the energy associated with this constrained extremal problem are \end{aligned} \end{align} where $\partial=\frac\partial{\partial\mathbf x}$ will always represent the -derivative with respect to the vector argument $\mathbf x$. +derivative with respect to the vector argument $\mathbf x$. Note that unlike +the energy, which is a function of the configuration $\mathbf x$ alone, the +gradient and Hessian depend also on the Lagrange multipliers $\pmb\omega$. In situations +with an extensive number of constraints, it is important to take seriously +contributions of the form $\frac{\partial^2L}{\partial\mathbf +x\partial\pmb\omega}$ to the Hessian \cite{Kent-Dobias_2024_On}. However, the cases we study here have +$N^0$ constraints and these contributions appear as finite-$N$ corrections. The number of stationary points in a landscape for a particular function $H$ is found by -- cgit v1.2.3-70-g09d2 From 49e34257f5974cf63ab925f260457a1d5a7be079 Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Mon, 28 Oct 2024 16:35:59 +0100 Subject: Fixed mistake surrounding the relationship between μ and Lagrange multipliers in some cases. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- marginal.tex | 70 ++++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/marginal.tex b/marginal.tex index b9fabc9..442e7e2 100644 --- a/marginal.tex +++ b/marginal.tex @@ -641,16 +641,31 @@ treat the determinant keeping the absolute value signs, as in previous works \cite{Folena_2020_Rethinking, Kent-Dobias_2023_How}. However, other of our examples are for models where the same techniques are impossible. -For the cases studied here, fixing the trace results in a relationship -between $\mu$ and the Lagrange multipliers enforcing the constraints. This is -because the trace of $\partial\partial H$ is typically an order of $N$ smaller -than the trace of $\partial\partial g_i$. The result is that +Finally, the $\delta$-function fixing the trace of the Hessian to $\mu$ in +\eqref{eq:kac-rice.measure.2} must be addressed. One could treat it using a +Fourier representation as in (\ref{eq:delta.grad}--\ref{eq:delta.eigen}), but +this is inconvenient because a term of the form +$\operatorname{Tr}\partial\partial H(\mathbf x)$ in the exponential integrand +cannot be neatly captured in superspace representation introduced in the next +section. However, in the cases we study in this paper a simplification can be made: the trace of $\partial\partial H$ can be separated into two pieces, one +that is spatially independent and one that is typically small, or +\begin{equation} \label{eq:mu.star} + \operatorname{Tr}\partial\partial H(\mathbf x)=N\mu^*_H+\Delta_H(\mathbf x) +\end{equation} +where $\overline{\mu^*_H}=\mu^*$ and $\overline{\Delta_H(\mathbf x)}=O(N^0)$. +Then fixing the trace of the Hessian to $\mu$ implies that \begin{equation} - \mu - =\frac1N\operatorname{Tr}\operatorname{Hess}H(\mathbf x) - =\frac1N\sum_{i=1}^r\omega_i\operatorname{Tr}\partial\partial g_i(\mathbf x) - +O(N^{-1}) + \begin{aligned} + \mu + &=\frac1N\operatorname{Tr}\operatorname{Hess}H(\mathbf x) + =\frac1N\left(\partial\partial H(\mathbf x)+ + \sum_{i=1}^r\omega_i\operatorname{Tr}\partial\partial g_i(\mathbf x)\right) + \\ + &=\mu^*+\frac1N\sum_{i=1}^r\omega_i\operatorname{Tr}\partial\partial g_i(\mathbf x) + +O(N^{-1}) + \end{aligned} \end{equation} +for typical samples $H$. In particular, here we study only cases with quadratic $g_i$, which results in a linear expression relating $\mu$ and the $\omega_i$ that is independent of $\mathbf x$. Since $H$ contains the disorder of the problem, this simplification means @@ -720,7 +735,7 @@ functions and the determinant made. The new measures \delta\big((\mathbf s_a^\alpha)^T\partial\mathbf g(\mathbf x_a)\big) \\ d\pmb\omega&=\bigg(\prod_{i=1}^rd\omega_i\bigg) - \,\delta\bigg(N\mu-\sum_i^r\omega_i\operatorname{Tr}\partial\partial g_i\bigg) + \,\delta\bigg(N\mu-\mu^*-\sum_i^r\omega_i\operatorname{Tr}\partial\partial g_i\bigg) \end{align} collect the individual measures of the various fields embedded in the superfield, along with their constraints. \end{widetext} @@ -765,7 +780,8 @@ unit normal distribution \cite{Crisanti_1993_The}. We focus on marginal minima in models with $f'(0)=0$, which corresponds to models without a random external field. Such a random field would correspond in each individual sample $H$ to a signal, and therefore complicate the analysis by correlating the positions of -stationary points and the eigenvectors of their Hessians. +stationary points and the eigenvectors of their Hessians. Here, $\mu^*$ of +\eqref{eq:mu.star} is zero. The marginal optima of these models can be studied without the methods introduced in this paper, and have been in the past \cite{Folena_2020_Rethinking, @@ -1030,7 +1046,7 @@ for $\mathbf x,\mathbf x'\in\mathbb R^N$ by \overline{H_i(\mathbf x)H_j(\mathbf x')} =N\delta_{ij}f_i\left(\frac{\mathbf x\cdot\mathbf x'}N\right) \end{equation} -with the functions $f_1$ and $f_2$ not necessarily the same. +with the functions $f_1$ and $f_2$ not necessarily the same. As for the spherical spin glasses, $\mu^*$ of \eqref{eq:mu.star} is zero. In this problem, there is an energetic competition between the independent spin glass energies on each sphere and their tendency to align or anti-align through @@ -1374,9 +1390,12 @@ As in the previous sections, we used the method of Lagrange multipliers to analy -V_k(\mathbf x)\partial\partial V_k(\mathbf x)\right]+\omega I \end{aligned} \end{align} -As in the spherical and multispherical spin glasses, fixing the trace of the Hessian -is equivalent to constraining the value of the Lagrange -multiplier $\omega=\mu$. +Unlike in the spherical and multispherical spin glasses, the value $\mu^*$ +defined in \eqref{eq:mu.star} giving the typical value of +$\frac1N\operatorname{Tr}\partial\partial H$ is not always zero. Instead +$\mu^*=-f'(0)$, nonzero where there is a linear term in $V$. Fixing the trace +of the Hessian is therefore equivalent to setting $\omega=\mu+f'(0)$. + The derivation of the marginal complexity for this model is complicated, but can be made schematically like that of the derivation of the equilibrium free @@ -1389,10 +1408,11 @@ $\lambda^*$ is given by \begin{aligned} \mathcal N(E,\mu,\lambda^*)^n &=\int d\hat\beta\,d\hat\lambda\prod_{a=1}^n\lim_{m_a\to0}\prod_{\alpha=1}^{m_a}d\pmb\phi_a^\alpha - \exp\left\{ + \\ + &\qquad\times\exp\left\{ \delta^{\alpha1}N(\hat\beta E+\hat\lambda\lambda^*) -\frac12\int d1\,d2\,\left[B^\alpha(1,2)\sum_{k=1}^MV_k(\pmb\phi_a^\alpha(1,2))^2 - -\mu\|\pmb\phi_a^\alpha(1,2)\|^2\right] + -\big(\mu+f'(0)\big)\|\pmb\phi_a^\alpha(1,2)\|^2\right] \right\} \end{aligned} \end{equation} @@ -1509,7 +1529,7 @@ with an effective action \begin{equation} \begin{aligned} &\mathcal S_\mathrm{RSS}(\hat\beta,\hat\lambda,r,d,g,q_0,\tilde q_0\mid\lambda^*,E,\mu,\beta) - =\hat\beta E-\mu(r+g+\hat\lambda) + =\hat\beta E-\big(\mu+f'(0)\big)(r+g+\hat\lambda) +\hat\lambda\lambda^* +\frac12\log\left(\frac{d+r^2}{g^2} \times\frac{1-2q_0+\tilde q_0^2}{(1-q_0)^2}\right) \\ @@ -1532,7 +1552,7 @@ taking the zero-temperature limit, we find \begin{equation} \begin{aligned} &\mathcal S_\mathrm{RSS}(\hat\beta,\hat\lambda,r,d,g,y,\Delta z\mid\lambda^*,E,\mu,\infty) - =\hat\beta E-\mu(r+g+\hat\lambda) + =\hat\beta E-\big(\mu+f'(0)\big)(r+g+\hat\lambda) +\hat\lambda\lambda^* +\frac12\log\left(\frac{d+r^2}{g^2}\times\frac{y^2-2\Delta z}{y^2}\right) \\ @@ -2012,9 +2032,9 @@ the replicated count of stationary points can be written =\int d\hat\beta\prod_{a=1}^n\,d\pmb\phi_a\, \exp\bigg[ N\hat\beta E \\ - &\qquad-\frac12\int d1\,\left( + &-\frac12\int d1\,\left( B(1)\sum_{k=1}^MV_k(\pmb\phi_a(1))^2 - -\mu\|\pmb\phi_a(1)\|^2 + -\big(\mu+f'(0)\big)\|\pmb\phi_a(1)\|^2 \right) \bigg] \end{aligned} @@ -2057,9 +2077,9 @@ Making the $M$ independent Gaussian integrals, we find \begin{equation} \begin{aligned} &\mathcal N(E,\mu)^n - =\int d\hat\beta\left(\prod_{a=1}^nd\pmb\phi_a\right) - \exp\bigg[ - nN\hat\beta E+\frac\mu2\sum_a^n\int d1\,\|\pmb\phi_a\|^2 \\ + =\int d\hat\beta\left(\prod_{a=1}^nd\pmb\phi_a\right) \\ + &\times\exp\bigg[ + nN\hat\beta E+\frac{\mu+f'(0)}2\sum_a^n\int d1\,\|\pmb\phi_a\|^2 \\ &\quad-\frac M2\log\operatorname{sdet}\left( \delta_{ab}\delta(1,2)+B(1)f\left(\frac{\pmb\phi_a(1)\cdot\pmb\phi_b(2)}N\right) \right) @@ -2080,7 +2100,7 @@ We therefore have &\mathcal N(E,\mu)^n =\int d\hat\beta\,d\mathbb Q\, \exp\bigg\{ - nN\hat\beta E+N\frac\mu2\operatorname{sTr}\mathbb Q + nN\hat\beta E+N\frac{\mu+f'(0)}2\operatorname{sTr}\mathbb Q +\frac N2\log\operatorname{sdet}\mathbb Q -\frac M2\log\operatorname{sdet}\left[ \delta_{ab}\delta(1,2)+B(1)f(\mathbb Q_{ab}(1,2)) @@ -2110,7 +2130,7 @@ where the effective action is given by \begin{equation} \begin{aligned} \mathcal S_\mathrm{KR}(\hat\beta,C,R,D,G) - &=\hat\beta E+\lim_{n\to0}\frac1n\Bigg(-\mu\operatorname{Tr}(G+R) + &=\hat\beta E+\lim_{n\to0}\frac1n\Bigg(-\big(\mu+f'(0)\big)\operatorname{Tr}(G+R) +\frac12\log\det\big[G^{-2}(CD+R^2)\big] +\alpha\log\det\big[I+G\odot f'(C)\big] \\ -- cgit v1.2.3-70-g09d2 From 405f6727a6915c61e09160fba52dd8832c2207e3 Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Tue, 29 Oct 2024 10:19:16 +0100 Subject: Slightly modified convention for superbases. --- marginal.tex | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/marginal.tex b/marginal.tex index 442e7e2..c8d0b8b 100644 --- a/marginal.tex +++ b/marginal.tex @@ -1804,26 +1804,26 @@ where the usual role of the determinant is replaced by the superdeterminant. The superdeterminant can be defined using the ordinary determinant by writing a block version of the matrix $M$. If $\mathbf e(1)=\{1,i\bar\theta_1\theta_1\}$ is the basis vector of the even subspace of the superspace and $\mathbf -f(1)=\{i\bar\theta_1,i\theta_1\}$ is that of the odd subspace, dual bases $\mathbf e^\dagger(1)=\{i\bar\theta_1\theta_1,1\}$ and $\mathbf f^\dagger(1)=\{\theta_1,-\bar\theta_1\}$ can be defined by the requirement that +f(1)=\{i\bar\theta_1,i\theta_1\}$ is that of the odd subspace, dual bases $\mathbf e^\dagger(1)=\{i\bar\theta_1\theta_1,1\}$ and $\mathbf f^\dagger(1)=\{-\theta_1,\bar\theta_1\}$ can be defined by the requirement that \begin{align} - \int d1\,\mathbf e(1)\mathbf e^\dagger(1)=iI + &\int d1\,e_i^\dagger(1)e_j(1)=i\delta_{ij} && - \int d1\,\mathbf f(1)\mathbf f^\dagger(1)=iI \\ - \int d1\,\mathbf e(1)\mathbf f^\dagger(1)=0 + \int d1\,f_i^\dagger(1)f_j(1)=i\delta_{ij} \\ + &\int d1\,e_i^\dagger(1)f_j(1)=0 && - \int d1\,\mathbf f(1)\mathbf e^\dagger(1)=0 + \int d1\,f_i^\dagger(1)e_j(1)=0 \end{align} With such bases and dual bases defined, we can form a block representation of $M$ in analogy to the matrix form of an operator in quantum mechanics by \begin{equation} \int d1\,d2\,\begin{bmatrix} - \mathbf e(1)M(1,2)\mathbf e^\dagger(2) + \mathbf e^\dagger(1)M(1,2)\mathbf e(2) & - \mathbf e(1)M(1,2)\mathbf f^\dagger(2) + \mathbf e^\dagger(1)M(1,2)\mathbf f(2) \\ - \mathbf f(1)M(1,2)\mathbf e^\dagger(2) + \mathbf f^\dagger(1)M(1,2)\mathbf e(2) & - \mathbf f(1)M(1,2)\mathbf f^\dagger(2) + \mathbf f^\dagger(1)M(1,2)\mathbf f(2) \end{bmatrix} =i\begin{bmatrix} A & B \\ C & D -- cgit v1.2.3-70-g09d2 From 568324cd4bc0cf2dd6a81464b1c4c700ee7ebfa5 Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Tue, 29 Oct 2024 10:22:37 +0100 Subject: Another tweak to superbases. --- marginal.tex | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/marginal.tex b/marginal.tex index c8d0b8b..88809d2 100644 --- a/marginal.tex +++ b/marginal.tex @@ -1802,13 +1802,13 @@ Integrals involving superfields contracted into such operators result in schemat \end{equation} where the usual role of the determinant is replaced by the superdeterminant. The superdeterminant can be defined using the ordinary determinant by writing a -block version of the matrix $M$. If $\mathbf e(1)=\{1,i\bar\theta_1\theta_1\}$ is +block version of the matrix $M$. If $\mathbf e(1)=\{1,\bar\theta_1\theta_1\}$ is the basis vector of the even subspace of the superspace and $\mathbf -f(1)=\{i\bar\theta_1,i\theta_1\}$ is that of the odd subspace, dual bases $\mathbf e^\dagger(1)=\{i\bar\theta_1\theta_1,1\}$ and $\mathbf f^\dagger(1)=\{-\theta_1,\bar\theta_1\}$ can be defined by the requirement that +f(1)=\{\bar\theta_1,\theta_1\}$ is that of the odd subspace, dual bases $\mathbf e^\dagger(1)=\{\bar\theta_1\theta_1,1\}$ and $\mathbf f^\dagger(1)=\{-\theta_1,\bar\theta_1\}$ can be defined by the requirement that \begin{align} - &\int d1\,e_i^\dagger(1)e_j(1)=i\delta_{ij} + &\int d1\,e_i^\dagger(1)e_j(1)=\delta_{ij} && - \int d1\,f_i^\dagger(1)f_j(1)=i\delta_{ij} \\ + \int d1\,f_i^\dagger(1)f_j(1)=\delta_{ij} \\ &\int d1\,e_i^\dagger(1)f_j(1)=0 && \int d1\,f_i^\dagger(1)e_j(1)=0 @@ -1825,7 +1825,7 @@ block representation of $M$ in analogy to the matrix form of an operator in quan & \mathbf f^\dagger(1)M(1,2)\mathbf f(2) \end{bmatrix} - =i\begin{bmatrix} + =\begin{bmatrix} A & B \\ C & D \end{bmatrix} \end{equation} -- cgit v1.2.3-70-g09d2 From f824dea3df7492fecfa95d34b33900a533bfd699 Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Tue, 29 Oct 2024 10:24:54 +0100 Subject: Changed also convention for superbasis in R^N|4 --- marginal.tex | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/marginal.tex b/marginal.tex index 88809d2..85743c3 100644 --- a/marginal.tex +++ b/marginal.tex @@ -1843,8 +1843,17 @@ The same method can be used to calculate the superdeterminant and supertrace in arbitrary superspaces, where for $\mathbb R^{N|2D}$ each basis has $2^{2D-1}$ elements. For instance, for $\mathbb R^{N|4}$ we have \begin{align} - &\mathbf e(1,2)=\{1,i\bar\theta_1\theta_1,i\bar\theta_2\theta_2,i\bar\theta_1\theta_2,i\bar\theta_2\theta_1,i\bar\theta_1\bar\theta_2,i\theta_1\theta_2,\bar\theta_1\theta_1\bar\theta_2\theta_2\}\notag \\ - &\mathbf f(1,2)=\{i\bar\theta_1,i\theta_1,i\bar\theta_2,i\theta_2,\bar\theta_1\theta_1\bar\theta_2,\bar\theta_2\theta_2\theta_1,\bar\theta_1\theta_1\theta_2,\bar\theta_2\theta_2\theta_1\} + &\mathbf e(1,2)=\{ + 1,\bar\theta_1\theta_1,\bar\theta_2\theta_2, + \bar\theta_1\theta_2,\bar\theta_2\theta_1, + \bar\theta_1\bar\theta_2,\theta_1\theta_2, + \bar\theta_1\theta_1\bar\theta_2\theta_2 + \}\notag \\ + &\mathbf f(1,2)=\{ + \bar\theta_1,\theta_1,\bar\theta_2,\theta_2, + \bar\theta_1\theta_1\bar\theta_2,\bar\theta_2\theta_2\theta_1, + \bar\theta_1\theta_1\theta_2,\bar\theta_2\theta_2\theta_1 + \} \end{align} with the dual bases defined analogously to those above. -- cgit v1.2.3-70-g09d2 From 9cba972037904577a402062c35193ce05f0eb2ea Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Tue, 29 Oct 2024 10:46:26 +0100 Subject: Restored paragraph on difference between this work and Müller et al. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- marginal.tex | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/marginal.tex b/marginal.tex index 85743c3..3902aa8 100644 --- a/marginal.tex +++ b/marginal.tex @@ -1705,6 +1705,25 @@ self-similarity and stochastic stability of minima have recently been suggested as a route to understanding this problem, but this approach is still in its infancy \cite{Urbani_2024_Statistical}. +The title of our paper and that of \citeauthor{Muller_2006_Marginal} suggest +they address the same topic, but this is not the case +\cite{Muller_2006_Marginal}. That work differs in three important and +fundamental ways. First, it describes minima of the TAP free energy and +involves peculiarities specific to the TAP. Second, it describes dominant +minima which happen to be marginal, not a condition for finding subdominant marginal minima. Finally, it +focuses on minima with a single soft direction (which are the typical minima of +the low temperature Sherrington--Kirkpatrick TAP free energy), while we aim to +avoid such minima in favor of ones that have a pseudogap (which we argue are relevant +to out-of-equilibrium dynamics). The fact that the typical minima studied by +\citeauthor{Muller_2006_Marginal} are not marginal in this latter sense may +provide an intuitive explanation for the seeming discrepancy between the proof +that the low-energy Sherrington--Kirkpatrick model cannot be sampled +\cite{ElAlaoui_2022_Sampling} and the proof that a message passing algorithm +can find near-ground states \cite{Montanari_2021_Optimization}: the algorithm +finds the atypical low-lying states that are marginal in the sense considered +here but cannot find the typical ones that are marginal in the sense of +\citeauthor{Muller_2006_Marginal}. + \begin{acknowledgements} JK-D is supported by a \textsc{DynSysMath} Specific Initiative of the INFN. \end{acknowledgements} -- cgit v1.2.3-70-g09d2 From 2849f72a3988443bee53c93b959902c1079db19f Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Tue, 29 Oct 2024 10:47:11 +0100 Subject: Fixed capitalization of Gibbs in bib entry --- marginal.bib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/marginal.bib b/marginal.bib index 784d3f0..f6656c3 100644 --- a/marginal.bib +++ b/marginal.bib @@ -264,7 +264,7 @@ @inproceedings{ElAlaoui_2022_Sampling, author = {El Alaoui, Ahmed and Montanari, Andrea and Sellke, Mark}, - title = {Sampling from the {Sherrington}-{Kirkpatrick} Gibbs measure via algorithmic stochastic localization}, + title = {Sampling from the {Sherrington}-{Kirkpatrick} {Gibbs} measure via algorithmic stochastic localization}, publisher = {IEEE}, year = {2022}, month = {10}, -- cgit v1.2.3-70-g09d2 From 8afe75733c423d131e1cbed04e12930cfacbd256 Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Tue, 29 Oct 2024 10:51:10 +0100 Subject: Tweaked wording in paragraph on relationship with work of Müller et al. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- marginal.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/marginal.tex b/marginal.tex index 3902aa8..faa45ba 100644 --- a/marginal.tex +++ b/marginal.tex @@ -1721,7 +1721,7 @@ that the low-energy Sherrington--Kirkpatrick model cannot be sampled \cite{ElAlaoui_2022_Sampling} and the proof that a message passing algorithm can find near-ground states \cite{Montanari_2021_Optimization}: the algorithm finds the atypical low-lying states that are marginal in the sense considered -here but cannot find the typical ones that are marginal in the sense of +here but cannot find the typical ones considered by \citeauthor{Muller_2006_Marginal}. \begin{acknowledgements} -- cgit v1.2.3-70-g09d2 From cae62fde30c9da6a91daf478ecd318366d2a9d1a Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Tue, 29 Oct 2024 10:56:09 +0100 Subject: Changed mentions of "companion" paper to "related work" or similar --- marginal.tex | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/marginal.tex b/marginal.tex index faa45ba..32f1fd5 100644 --- a/marginal.tex +++ b/marginal.tex @@ -119,7 +119,7 @@ continuous spectrum, we enforce the condition that the spectrum has a pseudogap, and is therefore marginal. We demonstrate the method on the spherical spin glasses, where it is unnecessary but instructive, and on extensions of the spherical models where the technique is more useful. -In a companion paper, we compare the marginal complexity with the performance +In a related work, we compare the marginal complexity with the performance of gradient descent and approximate message passing algorithms \cite{Kent-Dobias_2024_Algorithm-independent}. An outline of this paper follows. In Section \ref{sec:eigenvalue} we introduce the technique for conditioning on @@ -1652,7 +1652,7 @@ to determine the marginal stability continue to hold even in non-Gaussian cases where the complexity and the condition to fix the minimum eigenvalue are tangled together. -In our companion paper, we use a sum of squared random functions model to explore the relationship between the marginal +In a related paper, we use a sum of squared random functions model to explore the relationship between the marginal complexity and the performance of two generic algorithms: gradient descent and approximate message passing \cite{Kent-Dobias_2024_Algorithm-independent}. We show that the range of @@ -1678,8 +1678,7 @@ We have introduced a method for conditioning complexity on the marginality of stationary points. This method is general, and permits conditioning without first needing to understand the statistics of the Hessian at stationary points. We used our approach to study marginal complexity in three different models of random landscapes, showing that the method works and can be -applied to models whose marginal complexity was not previously known. In our -companion paper, we further show that marginal complexity in the third +applied to models whose marginal complexity was not previously known. In related work, we further show that marginal complexity in the third model of sums of squared random functions can be used to effectively bound algorithmic performance \cite{Kent-Dobias_2024_Algorithm-independent}. -- cgit v1.2.3-70-g09d2 From 8af129d0bda10cb76bf4cf9e7fbcb40febe7d338 Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Tue, 29 Oct 2024 10:58:52 +0100 Subject: Clarified conditions for decomposition of action Added language to prevent implying that Gaussianity is sufficient to see the decomposition of the effective action into two loosly connected terms. --- marginal.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/marginal.tex b/marginal.tex index 32f1fd5..cf9db4c 100644 --- a/marginal.tex +++ b/marginal.tex @@ -956,9 +956,9 @@ the contributions from the marginal pieces of the calculation, and is given by \end{equation} \end{widetext} The fact that the complexity can be split into two relatively independent -pieces in this way is a characteristic of the Gaussian nature of the spherical +pieces in this way is a characteristic of the isotropic and Gaussian nature of the spherical spin glass. In Section \ref{sec:least.squares} we will study a model whose -energy is not Gaussian and where such a decomposition is impossible. +energy is isotropic but not Gaussian and where such a decomposition is impossible. There are some dramatic simplifications that emerge from the structure of this particular problem. First, notice that the dependence on the parameters $X$ and $\hat X$ are purely quadratic. -- cgit v1.2.3-70-g09d2 From 8b2218dbcb8b05059312fa7741e5510c0ca077f2 Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Tue, 29 Oct 2024 13:46:30 +0100 Subject: Grammer tweak --- marginal.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/marginal.tex b/marginal.tex index cf9db4c..3e89bd7 100644 --- a/marginal.tex +++ b/marginal.tex @@ -61,7 +61,7 @@ dynamics would get stuck at a specific energy level, called the threshold energy. The threshold energy is the energy level at which level sets of the landscape transition from containing mostly saddle points to containing mostly minima. The level set associated with this threshold energy contains mostly \emph{marginal -minima}, or minima whose Hessian matrix has a continuous spectral density over +minima}, or minima whose Hessian matrix have a continuous spectral density over all sufficiently small positive eigenvalues. In most circumstances the spectrum is \emph{pseudogapped}, which means that the spectral density smoothly approaches zero as zero eigenvalue is approached from above. -- cgit v1.2.3-70-g09d2 From 79309cd70841ecc1a0c34776ab962785c1e6ebce Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Tue, 29 Oct 2024 13:59:40 +0100 Subject: Wording tweak. --- marginal.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/marginal.tex b/marginal.tex index 3e89bd7..1f1143c 100644 --- a/marginal.tex +++ b/marginal.tex @@ -669,7 +669,7 @@ for typical samples $H$. In particular, here we study only cases with quadratic $g_i$, which results in a linear expression relating $\mu$ and the $\omega_i$ that is independent of $\mathbf x$. Since $H$ contains the disorder of the problem, this simplification means -that the effect of fixing the trace is independent of the disorder and only +that the effect of fixing the trace is largely independent of the disorder and mostly depends on properties of the constraint manifold. \subsection{Superspace representation} -- cgit v1.2.3-70-g09d2 From d944977b38915e8fe51ef1cca68b95d0a8107217 Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Tue, 29 Oct 2024 14:00:55 +0100 Subject: Wording tweak. --- marginal.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/marginal.tex b/marginal.tex index 1f1143c..30d3dce 100644 --- a/marginal.tex +++ b/marginal.tex @@ -1394,7 +1394,7 @@ Unlike in the spherical and multispherical spin glasses, the value $\mu^*$ defined in \eqref{eq:mu.star} giving the typical value of $\frac1N\operatorname{Tr}\partial\partial H$ is not always zero. Instead $\mu^*=-f'(0)$, nonzero where there is a linear term in $V$. Fixing the trace -of the Hessian is therefore equivalent to setting $\omega=\mu+f'(0)$. +of the Hessian is therefore equivalent to constraining the value of the Lagrange multiplier $\omega=\mu+f'(0)$. The derivation of the marginal complexity for this model is complicated, but -- cgit v1.2.3-70-g09d2 From 77cf86b193f24630890990e105fe40730d353fd0 Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Tue, 29 Oct 2024 14:49:46 +0100 Subject: Fixed spelling mistake. --- marginal.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/marginal.tex b/marginal.tex index 30d3dce..7a81a74 100644 --- a/marginal.tex +++ b/marginal.tex @@ -1245,7 +1245,7 @@ asymptotic behavior of the overlaps. These take the form $q^{ij}_0=q^{ij}_d-y^{ij}_0\beta^{-1}-z^{ij}_0\beta^{-2}$. Notice that in this case, the asymptotic behavior of the off-diagonal elements is to approach the value of the diagonal rather than to approach one. We also require $\tilde q^{ij}_d=q^{ij}_d-\tilde y^{ij}_d\beta^{-1}-\tilde -z^{ij}_d\beta^{-2}$, i.e., that the tilde diagonal terms also approache the +z^{ij}_d\beta^{-2}$, i.e., that the tilde diagonal terms also approach the same diagonal value as the untilde terms, but with potentially different rates. As before, in order for the logarithmic term to stay finite, there are necessary -- cgit v1.2.3-70-g09d2 From 1a2018495c5ef8d2ad84a496ede7b8cbab486a15 Mon Sep 17 00:00:00 2001 From: Jaron Kent-Dobias Date: Tue, 29 Oct 2024 15:09:11 +0100 Subject: Added response to reviewers. --- response.txt | 153 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 response.txt diff --git a/response.txt b/response.txt new file mode 100644 index 0000000..e072f5d --- /dev/null +++ b/response.txt @@ -0,0 +1,153 @@ +I thank the referees for their useful feedback, which led to positive changes +to the manuscript. All changes made since the first submission can be found +highlighted in an attached PDF generated by latexdiff. + +In addition to the changes made in response to referee comments detailed below, +there were three other changes made to the resubmitted manuscript: + + - References to a "companion paper" were changed to a "related work", since + the two papers are not being considered as companions. + + - Soon after submission I identified a mistake in Appendix A regarding the + matrix form of a super linear operator. This mistake did not affect any of + the formulae or results of the rest of the manuscript, but has nevertheless + been amended. + + - I found and repaired a spelling mistake in the paragraph after what is now + equation 67 + +Report of the First Referee: +> 1) Terms such as "marginal minima" and "pseudogap" are used without clear +> definitions. These terms refers to different concepts depending on various +> fields, which can yield meaningless confusions. Provide clear definitions for +> these technical terms when they appear at the first time. + +The text of the second paragraph of the introduction has been expanded to more +precisely define the terms "marginal minima" and "pseudogap". Its final +sentences now read: + + The level set associated with this threshold energy contains mostly + \emph{marginal minima}, or minima whose Hessian matrix have a continuous + spectral density over all sufficiently small positive eigenvalues. In most + circumstances the spectrum is \emph{pseudogapped}, which means that the + spectral density smoothly approaches zero as zero eigenvalue is approached + from above. + +If this level of definition is not sufficiently clear to the reviewer, or if +there are further terms I have neglected, I welcome further comment on the +matter. + +> 2) In eqs (23)-(25), I could not figure out why both notations of L(x,w) and +> H(x, w) are used. If the two notations refer to the identical quantity, it +> should be unified. Otherwise, their difference should be explained. + +This confusion stems from a notational ambiguity. The domain of H and the +domain of ∇H are not the same, and writing ∇H(x, ω) is not meant to imply the +existence of a function H(x, ω). I have expanded the text around equations (24) +and (25) in an attempt to clarify this point. + +> 3) At the first reading, I am confused with eq. (38). The author writes +> "This is because the trace of $\partial \partial H$ is typically an order of +> $N$ smaller than trace of $\partial \partial g_i". This would be true for +> Hamiltonian of eq. (45). However, does it hold for sums of squared random +> functions such as eq. (71)? Let us consider a trivial case +> $V_i(x) = r_i \cdot x/\sqrt{N}$, where $r_i$ is a random vector from +> $N(0, I_{N\times N})$. This makes eq. (71) a quadratic form of a negative +> definite matrix, for which its trace of Hessian scale as $O(N)$. This may be +> an exceptional case. However, statements such as the above before showing +> concrete target systems can confuse readers. I would like to ask the author +> to amend the writing. + +I thank the referee for catching this mistake. Fortunately, its effect on the +manuscript was minor, because correctly accounting for cases like the referee +describes results in only a constant correction to μ. Since only the relative +value of μ is important for identifying marginal minima, the marginal +complexity calculated while neglecting it is still correct, as in the model +examined in the "related work" arXiv:2407.02092 which has such a linear term. + +I have changed the text of the manuscript and several equations to correct this +mistake. This can be seen in the vicinity of equations (using the new +manuscript's numbering) 38/39, after equation 46, after equation 59, and after +equation 75. In Sections IV.C and D this leads to changes in display math that +were not captured by the latexdiff, in equations 76, 85, 86, D2, D6, D8, and +D11, all consisting of replacing μ with μ + f'(0). + +Report of the Second Referee: +> (1) The first two examples (spherical spin glasses and multi-spherical spin +> glasses) exhibit the property that the complexity of marginal states splits into +> two contributions: the “unconstrained” complexity and a large deviation function +> associated with the smallest eigenvalue of the Hessian. In the text it is +> claimed that this behavior follows from the Gaussian nature of the Hessian. Is +> this statement general? If one constructs models whose Hessians are not +> invariant—for example, with an entry-dependent variance pattern—can one still +> expect this statement to hold? + +This question is an astute one, and I cannot speak to whether Gaussianity alone +is a sufficient condition for the separation of the action. Positing properties +of the Hessian is not enough for reasoning about this, since the key question +is how correlations between the Hessian, gradient, and energy compare in +magnitude with their self correlations. So, one would need to construct an +ensemble of random functions whose Hessian has such a property to begin +addressing this. + +Rather than venture into this probably rich research line, I have simply +clarified in the text after what is now equation 53 that this is characteristic +of isotropic and Gaussian random functions. + +> (2) It appears that Eq. (66) and its zero-temperature limit, when evaluated at +> the saddle point, provide a parametrization of the large deviation functions for +> the smallest Hessian eigenvalue, analogous to Eq. (52) for the GOE case. Is +> there any way to express this large deviation function more transparently, or in +> a form that makes the limit ϵ→0 easier to read? + +Unfortunately I have not found a way to nicely express such a thing. The +zero-temperature limit of what is now equation 67 is a much more unwieldy +expression than equation 67 itself, and is not appropriate for inclusion in a +manuscript let alone a source of intuitive insight. Though the referee's +suggestion of a reduction in the ε→0 limit does exist, it involves the +nontrivial coordination of limiting saddle-point values in the variables making +up the expression. + +> (3) Maybe the author can comment on the relation between his approach and the +> methods developed in the past to track marginal minima (mostly in the sense of +> an isolated eigenvalue of the Hessian rather than pseudogapped), such as: +> +> Marginal states in mean-field glasses +> Markus Müller, Luca Leuzzi, and Andrea Crisanti +> PHYSICAL REVIEW B 74, 134431 2006 + +I have added a final paragraph to the conclusion discussion the relationship +between these two papers. It reads + + The title of our paper and that of \citeauthor{Muller_2006_Marginal} suggest + they address the same topic, but this is not the case + \cite{Muller_2006_Marginal}. That work differs in three important and + fundamental ways. First, it describes minima of the TAP free energy and + involves peculiarities specific to the TAP. Second, it describes dominant + minima which happen to be marginal, not a condition for finding subdominant + marginal minima. Finally, it focuses on minima with a single soft direction + (which are the typical minima of the low temperature Sherrington--Kirkpatrick + TAP free energy), while we aim to avoid such minima in favor of ones that + have a pseudogap (which we argue are relevant to out-of-equilibrium + dynamics). The fact that the typical minima studied by + \citeauthor{Muller_2006_Marginal} are not marginal in this latter sense may + provide an intuitive explanation for the seeming discrepancy between the + proof that the low-energy Sherrington--Kirkpatrick model cannot be sampled + \cite{ElAlaoui_2022_Sampling} and the proof that a message passing algorithm + can find near-ground states \cite{Montanari_2021_Optimization}: the algorithm + finds the atypical low-lying states that are marginal in the sense considered + here but cannot find the typical ones considered by + \citeauthor{Muller_2006_Marginal}. + +> (4) When introducing the method around Eq. (1), it should be stated that this +> works for symmetric matrices A. + +The text new reflects this. + +> Typos: +> +> Eq (A8): integral should be over d2 +> Page 11, last line second column: “minima can dynamics” —> a verb is missing +> here + +These small mistakes have been fixed in the new manuscript. -- cgit v1.2.3-70-g09d2