summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJaron Kent-Dobias <jaron@kent-dobias.com>2024-08-03 15:51:41 +0200
committerJaron Kent-Dobias <jaron@kent-dobias.com>2024-08-03 15:51:41 +0200
commit427a8fa280f32f0a04a539cbbd85471934e2f97a (patch)
tree8dd9b94299b3b7952c8a7ab58c6496d6332c613b
parentae9115876b609316ff02e0b596956d2459bc3448 (diff)
downloadSciPostPhys_18_158-427a8fa280f32f0a04a539cbbd85471934e2f97a.tar.gz
SciPostPhys_18_158-427a8fa280f32f0a04a539cbbd85471934e2f97a.tar.bz2
SciPostPhys_18_158-427a8fa280f32f0a04a539cbbd85471934e2f97a.zip
Lots of writing.
-rw-r--r--figs/quenched.pdfbin10836 -> 10849 bytes
-rw-r--r--topology.bib146
-rw-r--r--topology.tex99
3 files changed, 205 insertions, 40 deletions
diff --git a/figs/quenched.pdf b/figs/quenched.pdf
index dac0d02..f2fa3ad 100644
--- a/figs/quenched.pdf
+++ b/figs/quenched.pdf
Binary files differ
diff --git a/topology.bib b/topology.bib
index 1b19671..293c288 100644
--- a/topology.bib
+++ b/topology.bib
@@ -1,3 +1,33 @@
+@article{Altieri_2019_Constraint,
+ author = {Altieri, Ada and Franz, Silvio},
+ title = {Constraint satisfaction mechanisms for marginal stability and criticality in large ecosystems},
+ journal = {Physical Review E},
+ publisher = {American Physical Society (APS)},
+ year = {2019},
+ month = {January},
+ number = {1},
+ volume = {99},
+ pages = {010401},
+ url = {http://dx.doi.org/10.1103/PhysRevE.99.010401},
+ doi = {10.1103/physreve.99.010401},
+ issn = {2470-0053}
+}
+
+@article{Annesi_2023_Star-shaped,
+ author = {Annesi, Brandon Livio and Lauditi, Clarissa and Lucibello, Carlo and Malatesta, Enrico M. and Perugini, Gabriele and Pittorino, Fabrizio and Saglietti, Luca},
+ title = {Star-Shaped Space of Solutions of the Spherical Negative Perceptron},
+ journal = {Physical Review Letters},
+ publisher = {American Physical Society (APS)},
+ year = {2023},
+ month = {11},
+ number = {22},
+ volume = {131},
+ pages = {227301},
+ url = {http://dx.doi.org/10.1103/PhysRevLett.131.227301},
+ doi = {10.1103/physrevlett.131.227301},
+ issn = {1079-7114}
+}
+
@book{Audin_2014_Morse,
author = {Audin, Michèle and Damian, Mihai},
title = {Morse theory and {Floer} homology},
@@ -9,6 +39,107 @@
translator = {Erné, Reinie}
}
+@article{Baldassi_2016_Unreasonable,
+ author = {Baldassi, Carlo and Borgs, Christian and Chayes, Jennifer T. and Ingrosso, Alessandro and Lucibello, Carlo and Saglietti, Luca and Zecchina, Riccardo},
+ title = {Unreasonable effectiveness of learning neural networks: From accessible states and robust ensembles to basic algorithmic schemes},
+ journal = {Proceedings of the National Academy of Sciences},
+ publisher = {Proceedings of the National Academy of Sciences},
+ year = {2016},
+ month = {11},
+ number = {48},
+ volume = {113},
+ pages = {E7655--E7662},
+ url = {http://dx.doi.org/10.1073/pnas.1608103113},
+ doi = {10.1073/pnas.1608103113},
+ issn = {1091-6490}
+}
+
+@article{Baldassi_2019_Properties,
+ author = {Baldassi, Carlo and Malatesta, Enrico M. and Zecchina, Riccardo},
+ title = {Properties of the Geometry of Solutions and Capacity of Multilayer Neural Networks with Rectified Linear Unit Activations},
+ journal = {Physical Review Letters},
+ publisher = {American Physical Society (APS)},
+ year = {2019},
+ month = {10},
+ number = {17},
+ volume = {123},
+ pages = {170602},
+ url = {https://doi.org/10.1103%2Fphysrevlett.123.170602},
+ doi = {10.1103/physrevlett.123.170602}
+}
+
+@article{Baldassi_2023_Typical,
+ author = {Baldassi, Carlo and Malatesta, Enrico M. and Perugini, Gabriele and Zecchina, Riccardo},
+ title = {Typical and atypical solutions in nonconvex neural networks with discrete and continuous weights},
+ journal = {Physical Review E},
+ publisher = {American Physical Society (APS)},
+ year = {2023},
+ month = {August},
+ number = {2},
+ volume = {108},
+ pages = {024310},
+ url = {http://dx.doi.org/10.1103/PhysRevE.108.024310},
+ doi = {10.1103/physreve.108.024310},
+ issn = {2470-0053}
+}
+
+@unpublished{Beneventano_2023_On,
+ author = {Beneventano, Pierfrancesco},
+ title = {On the Trajectories of {SGD} Without Replacement},
+ year = {2023},
+ month = {dec},
+ url = {http://arxiv.org/abs/2312.16143v2},
+ archiveprefix = {arXiv},
+ eprint = {2312.16143v2},
+ eprintclass = {cs.LG},
+ eprinttype = {arxiv}
+}
+
+@article{Franz_2016_The,
+ author = {Franz, Silvio and Parisi, Giorgio},
+ title = {The simplest model of jamming},
+ journal = {Journal of Physics A: Mathematical and Theoretical},
+ publisher = {IOP Publishing},
+ year = {2016},
+ month = {February},
+ number = {14},
+ volume = {49},
+ pages = {145001},
+ url = {http://dx.doi.org/10.1088/1751-8113/49/14/145001},
+ doi = {10.1088/1751-8113/49/14/145001},
+ issn = {1751-8121}
+}
+
+@article{Franz_2017_Universality,
+ author = {Franz, Silvio and Parisi, Giorgio and Sevelev, Maxime and Urbani, Pierfrancesco and Zamponi, Francesco},
+ title = {Universality of the {SAT-UNSAT} (jamming) threshold in non-convex continuous constraint satisfaction problems},
+ journal = {SciPost Physics},
+ publisher = {Stichting SciPost},
+ year = {2017},
+ month = {June},
+ number = {3},
+ volume = {2},
+ pages = {019},
+ url = {http://dx.doi.org/10.21468/SciPostPhys.2.3.019},
+ doi = {10.21468/scipostphys.2.3.019},
+ issn = {2542-4653}
+}
+
+@article{Franz_2019_Critical,
+ author = {Franz, Silvio and Sclocchi, Antonio and Urbani, Pierfrancesco},
+ title = {Critical Jammed Phase of the Linear Perceptron},
+ journal = {Physical Review Letters},
+ publisher = {American Physical Society (APS)},
+ year = {2019},
+ month = {September},
+ number = {11},
+ volume = {123},
+ pages = {115702},
+ url = {http://dx.doi.org/10.1103/PhysRevLett.123.115702},
+ doi = {10.1103/physrevlett.123.115702},
+ issn = {1079-7114}
+}
+
@article{Fyodorov_2004_Complexity,
author = {Fyodorov, Yan V.},
title = {Complexity of Random Energy Landscapes, Glass Transition, and Absolute Value of the Spectral Determinant of Random Matrices},
@@ -138,6 +269,21 @@
primaryclass = {cond-mat.dis-nn}
}
+@article{Mezard_2009_Constraint,
+ author = {Mézard, Marc and Mora, Thierry},
+ title = {Constraint satisfaction problems and neural networks: A statistical physics perspective},
+ journal = {Journal of Physiology-Paris},
+ publisher = {Elsevier BV},
+ year = {2009},
+ month = {January},
+ number = {1–2},
+ volume = {103},
+ pages = {107--113},
+ url = {http://dx.doi.org/10.1016/j.jphysparis.2009.05.013},
+ doi = {10.1016/j.jphysparis.2009.05.013},
+ issn = {0928-4257}
+}
+
@unpublished{Montanari_2023_Solving,
author = {Montanari, Andrea and Subag, Eliran},
title = {Solving overparametrized systems of random equations: I. Model and algorithms for approximate solutions},
diff --git a/topology.tex b/topology.tex
index decfa6f..5233c54 100644
--- a/topology.tex
+++ b/topology.tex
@@ -25,20 +25,43 @@
\affiliation{Istituto Nazionale di Fisica Nucleare, Sezione di Roma I, Rome, Italy 00184}
\begin{abstract}
- We consider the set of solutions to $M$ random polynomial equations on the
- $(N-1)$-sphere. When solutions exist, they form a manifold. We compute the
- average Euler characteristic of this manifold, and find different behaviors
- depending on $\alpha=M/N$. When $\alpha<1$, the average Euler characteristic
- is subexponential in $N$ but positive, indicating the presence of few
- simply-connected components. When $1\leq\alpha<\alpha_\text{\textsc{sat}}$, it is
- exponentially large in $N$, indicating a shattering transition in the space
- of solutions. Finally, when $\alpha_\text{\textsc{sat}}\leq\alpha$, the number of
- solutions vanish. We further compute the average logarithm of the Euler
- characteristic, which is representative of typical manifolds.
+ We consider the set of solutions to $M$ random polynomial equations with
+ independent Gaussian coefficients on the $(N-1)$-sphere. When solutions
+ exist, they form a manifold. We compute the average Euler characteristic of
+ this manifold, and find different behaviors depending on the variances of the
+ coefficients and $\alpha=M/N$. When $\alpha<1$, the average Euler
+ characteristic is subexponential in $N$ but positive, indicating the presence
+ of few connected components. When $1<\alpha<\alpha_\text{\textsc{sat}}$, it
+ is exponentially large in $N$, indicating a shattering transition of the
+ manifold of solutions into many components. Finally, when
+ $\alpha_\text{\textsc{sat}}<\alpha$, the set of solutions vanishes. Some
+ choices of variances produce $\alpha_\text{\textsc{sat}}<1$, and the shattering
+ transition never takes place. We further compute the average logarithm of the
+ Euler characteristic, which is representative of typical manifolds, and find
+ that most of the quantitative predictions agree.
\end{abstract}
\maketitle
+Constraint satisfaction problems seek configurations that simultaneously
+satisfy a set of equations, and form a basis for thinking about problems as
+diverse as neural networks \cite{Mezard_2009_Constraint}, granular materials
+\cite{Franz_2017_Universality}, ecosystems \cite{Altieri_2019_Constraint}, and
+confluent tissues \cite{Urbani_2023_A}. All but the last of these examples deal
+with sets of inequalities, while the last considers a set of equality
+constraints. Inequality constraints are familiar in situations like zero-cost
+solutions in neural networks with ReLu activations and stable equilibrium in the
+forces between physical objects. Equality constraints naturally appear in the
+zero-gradient solutions to overparameterized smooth neural networks and,
+indeed, in vertex models of tissues.
+
+In such problems, there is great interest in characterizing structure in the
+set of solutions, which can be influential in how algorithms behave when trying
+to solve them \cite{Baldassi_2016_Unreasonable, Baldassi_2019_Properties,
+Beneventano_2023_On}. Here, we show how \emph{topological} information about
+the set of solutions can be calculated in a simple model of satisfying random
+nonlinear equalities. This allows us to reason about the connectivity of this
+solution set.
We consider the problem of finding configurations $\mathbf x\in\mathbb R^N$
lying on the $(N-1)$-sphere $\|\mathbf x\|^2=N$ that simultaneously satisfy $M$
@@ -47,15 +70,18 @@ constraints are taken to be centered Gaussian random functions with covariance
\begin{equation} \label{eq:covariance}
\overline{V_i(\mathbf x)V_j(\mathbf x')}=\delta_{ij}f\left(\frac{\mathbf x\cdot\mathbf x'}N\right)
\end{equation}
-for some choice of $f$.
-When the covariance function $f$ is polynomial, the $V_k$ are also polynomial, with terms of degree $p$ in $f$ corresponding to all possible terms of degree $p$ in $V_k$. In particular, taking
+for some choice of $f$. When the covariance function $f$ is polynomial, the
+$V_k$ are also polynomial, with a term of degree $p$ in $f$ corresponding to
+all possible terms of degree $p$ in $V_k$. In particular, taking
\begin{equation}
V_k(\mathbf x)
=\sum_{p=0}^\infty\frac1{p!}\sqrt{\frac{f^{(p)}(0)}{N^p}}
\sum_{i_1\cdots i_p}^NJ^{(k,p)}_{i_1\cdots i_p}x_{i_1}\cdots x_{i_p}
\end{equation}
-and each of the elements of the tensors $J^{(k,p)}$ as independently
-distributed with a unit normal distribution satisfies \eqref{eq:covariance}.
+with the elements of the tensors $J^{(k,p)}$ as independently distributed
+unit normal random variables satisfies \eqref{eq:covariance}. The size of the
+series coefficients of $f$ therefore control the variances in the coefficients
+of random polynomial constraints.
This problem or small variations thereof have attracted attention recently for
@@ -65,21 +91,19 @@ tissues \cite{Fyodorov_2019_A, Fyodorov_2020_Counting,
Kamali_2023_Stochastic, Urbani_2024_Statistical, Montanari_2023_Solving,
Montanari_2024_On, Kent-Dobias_2024_Conditioning, Kent-Dobias_2024_Algorithm-independent}. In each of these cases, the authors studied properties of
the cost function
-\begin{equation}
+\begin{equation} \label{eq:cost}
\mathcal C(\mathbf x)=\frac12\sum_{k=1}^MV_k(\mathbf x)^2
\end{equation}
-which achieves zero cost only for configurations that satisfy all the
-constraints.
-Here we dispense with defining a cost function, and instead study
-the set of solutions directly.
-
-The set of solutions to our nonlinear random constraint satisfaction problem
+which achieves zero only for configurations that satisfy all the constraints.
+Here we dispense with the cost function and study the set of solutions
+directly.
+This set
can be written as
\begin{equation}
- \Omega=\{\mathbf x\in\mathbb R^N\mid \|\mathbf x\|^2=N,0=V_k(\mathbf x)
- \;\forall\;k=1,\ldots,M\}
+ \Omega=\{\mathbf x\in\mathbb R^N\mid \|\mathbf x\|^2=N,V_k(\mathbf x)=0
+ \;\forall\;k=1,\ldots,M\}.
\end{equation}
-$\Omega$ is almost always a manifold without singular points. The conditions for a singular point are that
+Because the constraints are all smooth functions, $\Omega$ is almost always a manifold without singular points. The conditions for a singular point are that
$0=\frac\partial{\partial\mathbf x}V_k(\mathbf x)$ for all $k$. This is
equivalent to asking that the constraints $V_k$ all have a stationary point at
the same place. When the $V_k$ are independent and random, this is vanishingly
@@ -87,9 +111,10 @@ unlikely, requiring $NM$ independent equations to be simultaneously satisfied.
This means that different connected components of the set of solutions do not
intersect, nor are there self-intersections, without extraordinary fine-tuning.
-One result of this previous work is the value of $\alpha$ for the
-satisfiability transition, which is $\alpha_\text{\textsc{sat}}=f'(1)/f(1)$.
-For $\alpha$ larger than $\alpha_\text{\textsc{sat}}$ solutions do not exist.
+When $M$ is too large, no solutions exist and $\Omega$ becomes the empty set.
+Following previous work, a replica symmetric equilibrium calculation using the
+cost function \eqref{eq:cost} predicts that solutions vanish when the ratio
+$\alpha=M/N$ is larger than $\alpha_\text{\textsc{sat}}=f'(1)/f(1)$. Based on the results of this paper, and the fact that this $\alpha_\text{\textsc{sat}}$ is consistent
The Euler characteristic $\chi$ of a manifold is a topological invariant \cite{Hatcher_2002_Algebraic}. It is
perhaps most familiar in the context of connected compact orientable surfaces, where it
@@ -207,12 +232,15 @@ is positive, and $\overline\chi$ is exponentially large in $N$. Finally, when
$\alpha\geq\alpha_\text{\textsc{sat}}$ the action and $\overline\chi$ are ill-defined.
\begin{figure}
- \includegraphics{figs/characteristic.pdf}
+ \includegraphics{figs/quenched.pdf}
\caption{
The logarithm of the average Euler characteristic $\overline\chi$ as a
- function of $\alpha$. The covariance function is $f(q)=\frac12q^2$ and
- $\alpha_\text{\textsc{sat}}=2$.
+ function of $\alpha$. The covariance function is $f(q)=\frac12+\frac12q^3$ and
+ $\alpha_\text{\textsc{sat}}=\frac32$. The dashed line shows the average of
+ $\log\chi$, the so-called quenched average, whose value differs in the
+ region $1<\alpha<\alpha_\text{\textsc{sat}}$ but whose transition points
+ are the same.
} \label{fig:characteristic}
\end{figure}
@@ -293,6 +321,7 @@ these eigenvalues is difficult to understand directly, in situations where
there is a continuous \textsc{rsb} transition the sign of one of the
eigenvalues changes \cite{Kent-Dobias_2023_When}. At the $\alpha_\text{\textsc{rsb}}$ predicted in \cite{Urbani_2023_A} we see no instability of this kind, and instead only observe such an instability at $\alpha_\text{\textsc{sat}}$.
+\cite{Franz_2016_The, Franz_2017_Universality, Franz_2019_Critical, Annesi_2023_Star-shaped, Baldassi_2023_Typical}
\begin{acknowledgements}
JK-D is supported by a \textsc{DynSysMath} Specific Initiative of the INFN.
@@ -410,14 +439,4 @@ where $\Delta f=f(1)-f(0)$ and $\tilde r_d$ is given by
\end{align}
When $\alpha\to\alpha_\text{\textsc{sat}}=f'(1)/f(1)$ from below, $\tilde r_d\to -1$, which produces $N^{-1}\overline{\log\chi}\to0$.
-\begin{figure}
- \includegraphics{figs/quenched.pdf}
-
- \caption{
- Comparison of the annealed and replica symmetric quenched calculations of
- the logarithmic of the Euler characteristic. The covariance function is
- $f(q)=\frac12+\frac12q^3$ and $\alpha_\text{\textsc{sat}}=\frac32$.
- }
-\end{figure}
-
\end{document}