diff options
author | Jaron Kent-Dobias <jaron@kent-dobias.com> | 2024-08-03 15:51:41 +0200 |
---|---|---|
committer | Jaron Kent-Dobias <jaron@kent-dobias.com> | 2024-08-03 15:51:41 +0200 |
commit | 427a8fa280f32f0a04a539cbbd85471934e2f97a (patch) | |
tree | 8dd9b94299b3b7952c8a7ab58c6496d6332c613b | |
parent | ae9115876b609316ff02e0b596956d2459bc3448 (diff) | |
download | SciPostPhys_18_158-427a8fa280f32f0a04a539cbbd85471934e2f97a.tar.gz SciPostPhys_18_158-427a8fa280f32f0a04a539cbbd85471934e2f97a.tar.bz2 SciPostPhys_18_158-427a8fa280f32f0a04a539cbbd85471934e2f97a.zip |
Lots of writing.
-rw-r--r-- | figs/quenched.pdf | bin | 10836 -> 10849 bytes | |||
-rw-r--r-- | topology.bib | 146 | ||||
-rw-r--r-- | topology.tex | 99 |
3 files changed, 205 insertions, 40 deletions
diff --git a/figs/quenched.pdf b/figs/quenched.pdf Binary files differindex dac0d02..f2fa3ad 100644 --- a/figs/quenched.pdf +++ b/figs/quenched.pdf diff --git a/topology.bib b/topology.bib index 1b19671..293c288 100644 --- a/topology.bib +++ b/topology.bib @@ -1,3 +1,33 @@ +@article{Altieri_2019_Constraint, + author = {Altieri, Ada and Franz, Silvio}, + title = {Constraint satisfaction mechanisms for marginal stability and criticality in large ecosystems}, + journal = {Physical Review E}, + publisher = {American Physical Society (APS)}, + year = {2019}, + month = {January}, + number = {1}, + volume = {99}, + pages = {010401}, + url = {http://dx.doi.org/10.1103/PhysRevE.99.010401}, + doi = {10.1103/physreve.99.010401}, + issn = {2470-0053} +} + +@article{Annesi_2023_Star-shaped, + author = {Annesi, Brandon Livio and Lauditi, Clarissa and Lucibello, Carlo and Malatesta, Enrico M. and Perugini, Gabriele and Pittorino, Fabrizio and Saglietti, Luca}, + title = {Star-Shaped Space of Solutions of the Spherical Negative Perceptron}, + journal = {Physical Review Letters}, + publisher = {American Physical Society (APS)}, + year = {2023}, + month = {11}, + number = {22}, + volume = {131}, + pages = {227301}, + url = {http://dx.doi.org/10.1103/PhysRevLett.131.227301}, + doi = {10.1103/physrevlett.131.227301}, + issn = {1079-7114} +} + @book{Audin_2014_Morse, author = {Audin, Michèle and Damian, Mihai}, title = {Morse theory and {Floer} homology}, @@ -9,6 +39,107 @@ translator = {Erné, Reinie} } +@article{Baldassi_2016_Unreasonable, + author = {Baldassi, Carlo and Borgs, Christian and Chayes, Jennifer T. and Ingrosso, Alessandro and Lucibello, Carlo and Saglietti, Luca and Zecchina, Riccardo}, + title = {Unreasonable effectiveness of learning neural networks: From accessible states and robust ensembles to basic algorithmic schemes}, + journal = {Proceedings of the National Academy of Sciences}, + publisher = {Proceedings of the National Academy of Sciences}, + year = {2016}, + month = {11}, + number = {48}, + volume = {113}, + pages = {E7655--E7662}, + url = {http://dx.doi.org/10.1073/pnas.1608103113}, + doi = {10.1073/pnas.1608103113}, + issn = {1091-6490} +} + +@article{Baldassi_2019_Properties, + author = {Baldassi, Carlo and Malatesta, Enrico M. and Zecchina, Riccardo}, + title = {Properties of the Geometry of Solutions and Capacity of Multilayer Neural Networks with Rectified Linear Unit Activations}, + journal = {Physical Review Letters}, + publisher = {American Physical Society (APS)}, + year = {2019}, + month = {10}, + number = {17}, + volume = {123}, + pages = {170602}, + url = {https://doi.org/10.1103%2Fphysrevlett.123.170602}, + doi = {10.1103/physrevlett.123.170602} +} + +@article{Baldassi_2023_Typical, + author = {Baldassi, Carlo and Malatesta, Enrico M. and Perugini, Gabriele and Zecchina, Riccardo}, + title = {Typical and atypical solutions in nonconvex neural networks with discrete and continuous weights}, + journal = {Physical Review E}, + publisher = {American Physical Society (APS)}, + year = {2023}, + month = {August}, + number = {2}, + volume = {108}, + pages = {024310}, + url = {http://dx.doi.org/10.1103/PhysRevE.108.024310}, + doi = {10.1103/physreve.108.024310}, + issn = {2470-0053} +} + +@unpublished{Beneventano_2023_On, + author = {Beneventano, Pierfrancesco}, + title = {On the Trajectories of {SGD} Without Replacement}, + year = {2023}, + month = {dec}, + url = {http://arxiv.org/abs/2312.16143v2}, + archiveprefix = {arXiv}, + eprint = {2312.16143v2}, + eprintclass = {cs.LG}, + eprinttype = {arxiv} +} + +@article{Franz_2016_The, + author = {Franz, Silvio and Parisi, Giorgio}, + title = {The simplest model of jamming}, + journal = {Journal of Physics A: Mathematical and Theoretical}, + publisher = {IOP Publishing}, + year = {2016}, + month = {February}, + number = {14}, + volume = {49}, + pages = {145001}, + url = {http://dx.doi.org/10.1088/1751-8113/49/14/145001}, + doi = {10.1088/1751-8113/49/14/145001}, + issn = {1751-8121} +} + +@article{Franz_2017_Universality, + author = {Franz, Silvio and Parisi, Giorgio and Sevelev, Maxime and Urbani, Pierfrancesco and Zamponi, Francesco}, + title = {Universality of the {SAT-UNSAT} (jamming) threshold in non-convex continuous constraint satisfaction problems}, + journal = {SciPost Physics}, + publisher = {Stichting SciPost}, + year = {2017}, + month = {June}, + number = {3}, + volume = {2}, + pages = {019}, + url = {http://dx.doi.org/10.21468/SciPostPhys.2.3.019}, + doi = {10.21468/scipostphys.2.3.019}, + issn = {2542-4653} +} + +@article{Franz_2019_Critical, + author = {Franz, Silvio and Sclocchi, Antonio and Urbani, Pierfrancesco}, + title = {Critical Jammed Phase of the Linear Perceptron}, + journal = {Physical Review Letters}, + publisher = {American Physical Society (APS)}, + year = {2019}, + month = {September}, + number = {11}, + volume = {123}, + pages = {115702}, + url = {http://dx.doi.org/10.1103/PhysRevLett.123.115702}, + doi = {10.1103/physrevlett.123.115702}, + issn = {1079-7114} +} + @article{Fyodorov_2004_Complexity, author = {Fyodorov, Yan V.}, title = {Complexity of Random Energy Landscapes, Glass Transition, and Absolute Value of the Spectral Determinant of Random Matrices}, @@ -138,6 +269,21 @@ primaryclass = {cond-mat.dis-nn} } +@article{Mezard_2009_Constraint, + author = {Mézard, Marc and Mora, Thierry}, + title = {Constraint satisfaction problems and neural networks: A statistical physics perspective}, + journal = {Journal of Physiology-Paris}, + publisher = {Elsevier BV}, + year = {2009}, + month = {January}, + number = {1–2}, + volume = {103}, + pages = {107--113}, + url = {http://dx.doi.org/10.1016/j.jphysparis.2009.05.013}, + doi = {10.1016/j.jphysparis.2009.05.013}, + issn = {0928-4257} +} + @unpublished{Montanari_2023_Solving, author = {Montanari, Andrea and Subag, Eliran}, title = {Solving overparametrized systems of random equations: I. Model and algorithms for approximate solutions}, diff --git a/topology.tex b/topology.tex index decfa6f..5233c54 100644 --- a/topology.tex +++ b/topology.tex @@ -25,20 +25,43 @@ \affiliation{Istituto Nazionale di Fisica Nucleare, Sezione di Roma I, Rome, Italy 00184} \begin{abstract} - We consider the set of solutions to $M$ random polynomial equations on the - $(N-1)$-sphere. When solutions exist, they form a manifold. We compute the - average Euler characteristic of this manifold, and find different behaviors - depending on $\alpha=M/N$. When $\alpha<1$, the average Euler characteristic - is subexponential in $N$ but positive, indicating the presence of few - simply-connected components. When $1\leq\alpha<\alpha_\text{\textsc{sat}}$, it is - exponentially large in $N$, indicating a shattering transition in the space - of solutions. Finally, when $\alpha_\text{\textsc{sat}}\leq\alpha$, the number of - solutions vanish. We further compute the average logarithm of the Euler - characteristic, which is representative of typical manifolds. + We consider the set of solutions to $M$ random polynomial equations with + independent Gaussian coefficients on the $(N-1)$-sphere. When solutions + exist, they form a manifold. We compute the average Euler characteristic of + this manifold, and find different behaviors depending on the variances of the + coefficients and $\alpha=M/N$. When $\alpha<1$, the average Euler + characteristic is subexponential in $N$ but positive, indicating the presence + of few connected components. When $1<\alpha<\alpha_\text{\textsc{sat}}$, it + is exponentially large in $N$, indicating a shattering transition of the + manifold of solutions into many components. Finally, when + $\alpha_\text{\textsc{sat}}<\alpha$, the set of solutions vanishes. Some + choices of variances produce $\alpha_\text{\textsc{sat}}<1$, and the shattering + transition never takes place. We further compute the average logarithm of the + Euler characteristic, which is representative of typical manifolds, and find + that most of the quantitative predictions agree. \end{abstract} \maketitle +Constraint satisfaction problems seek configurations that simultaneously +satisfy a set of equations, and form a basis for thinking about problems as +diverse as neural networks \cite{Mezard_2009_Constraint}, granular materials +\cite{Franz_2017_Universality}, ecosystems \cite{Altieri_2019_Constraint}, and +confluent tissues \cite{Urbani_2023_A}. All but the last of these examples deal +with sets of inequalities, while the last considers a set of equality +constraints. Inequality constraints are familiar in situations like zero-cost +solutions in neural networks with ReLu activations and stable equilibrium in the +forces between physical objects. Equality constraints naturally appear in the +zero-gradient solutions to overparameterized smooth neural networks and, +indeed, in vertex models of tissues. + +In such problems, there is great interest in characterizing structure in the +set of solutions, which can be influential in how algorithms behave when trying +to solve them \cite{Baldassi_2016_Unreasonable, Baldassi_2019_Properties, +Beneventano_2023_On}. Here, we show how \emph{topological} information about +the set of solutions can be calculated in a simple model of satisfying random +nonlinear equalities. This allows us to reason about the connectivity of this +solution set. We consider the problem of finding configurations $\mathbf x\in\mathbb R^N$ lying on the $(N-1)$-sphere $\|\mathbf x\|^2=N$ that simultaneously satisfy $M$ @@ -47,15 +70,18 @@ constraints are taken to be centered Gaussian random functions with covariance \begin{equation} \label{eq:covariance} \overline{V_i(\mathbf x)V_j(\mathbf x')}=\delta_{ij}f\left(\frac{\mathbf x\cdot\mathbf x'}N\right) \end{equation} -for some choice of $f$. -When the covariance function $f$ is polynomial, the $V_k$ are also polynomial, with terms of degree $p$ in $f$ corresponding to all possible terms of degree $p$ in $V_k$. In particular, taking +for some choice of $f$. When the covariance function $f$ is polynomial, the +$V_k$ are also polynomial, with a term of degree $p$ in $f$ corresponding to +all possible terms of degree $p$ in $V_k$. In particular, taking \begin{equation} V_k(\mathbf x) =\sum_{p=0}^\infty\frac1{p!}\sqrt{\frac{f^{(p)}(0)}{N^p}} \sum_{i_1\cdots i_p}^NJ^{(k,p)}_{i_1\cdots i_p}x_{i_1}\cdots x_{i_p} \end{equation} -and each of the elements of the tensors $J^{(k,p)}$ as independently -distributed with a unit normal distribution satisfies \eqref{eq:covariance}. +with the elements of the tensors $J^{(k,p)}$ as independently distributed +unit normal random variables satisfies \eqref{eq:covariance}. The size of the +series coefficients of $f$ therefore control the variances in the coefficients +of random polynomial constraints. This problem or small variations thereof have attracted attention recently for @@ -65,21 +91,19 @@ tissues \cite{Fyodorov_2019_A, Fyodorov_2020_Counting, Kamali_2023_Stochastic, Urbani_2024_Statistical, Montanari_2023_Solving, Montanari_2024_On, Kent-Dobias_2024_Conditioning, Kent-Dobias_2024_Algorithm-independent}. In each of these cases, the authors studied properties of the cost function -\begin{equation} +\begin{equation} \label{eq:cost} \mathcal C(\mathbf x)=\frac12\sum_{k=1}^MV_k(\mathbf x)^2 \end{equation} -which achieves zero cost only for configurations that satisfy all the -constraints. -Here we dispense with defining a cost function, and instead study -the set of solutions directly. - -The set of solutions to our nonlinear random constraint satisfaction problem +which achieves zero only for configurations that satisfy all the constraints. +Here we dispense with the cost function and study the set of solutions +directly. +This set can be written as \begin{equation} - \Omega=\{\mathbf x\in\mathbb R^N\mid \|\mathbf x\|^2=N,0=V_k(\mathbf x) - \;\forall\;k=1,\ldots,M\} + \Omega=\{\mathbf x\in\mathbb R^N\mid \|\mathbf x\|^2=N,V_k(\mathbf x)=0 + \;\forall\;k=1,\ldots,M\}. \end{equation} -$\Omega$ is almost always a manifold without singular points. The conditions for a singular point are that +Because the constraints are all smooth functions, $\Omega$ is almost always a manifold without singular points. The conditions for a singular point are that $0=\frac\partial{\partial\mathbf x}V_k(\mathbf x)$ for all $k$. This is equivalent to asking that the constraints $V_k$ all have a stationary point at the same place. When the $V_k$ are independent and random, this is vanishingly @@ -87,9 +111,10 @@ unlikely, requiring $NM$ independent equations to be simultaneously satisfied. This means that different connected components of the set of solutions do not intersect, nor are there self-intersections, without extraordinary fine-tuning. -One result of this previous work is the value of $\alpha$ for the -satisfiability transition, which is $\alpha_\text{\textsc{sat}}=f'(1)/f(1)$. -For $\alpha$ larger than $\alpha_\text{\textsc{sat}}$ solutions do not exist. +When $M$ is too large, no solutions exist and $\Omega$ becomes the empty set. +Following previous work, a replica symmetric equilibrium calculation using the +cost function \eqref{eq:cost} predicts that solutions vanish when the ratio +$\alpha=M/N$ is larger than $\alpha_\text{\textsc{sat}}=f'(1)/f(1)$. Based on the results of this paper, and the fact that this $\alpha_\text{\textsc{sat}}$ is consistent The Euler characteristic $\chi$ of a manifold is a topological invariant \cite{Hatcher_2002_Algebraic}. It is perhaps most familiar in the context of connected compact orientable surfaces, where it @@ -207,12 +232,15 @@ is positive, and $\overline\chi$ is exponentially large in $N$. Finally, when $\alpha\geq\alpha_\text{\textsc{sat}}$ the action and $\overline\chi$ are ill-defined. \begin{figure} - \includegraphics{figs/characteristic.pdf} + \includegraphics{figs/quenched.pdf} \caption{ The logarithm of the average Euler characteristic $\overline\chi$ as a - function of $\alpha$. The covariance function is $f(q)=\frac12q^2$ and - $\alpha_\text{\textsc{sat}}=2$. + function of $\alpha$. The covariance function is $f(q)=\frac12+\frac12q^3$ and + $\alpha_\text{\textsc{sat}}=\frac32$. The dashed line shows the average of + $\log\chi$, the so-called quenched average, whose value differs in the + region $1<\alpha<\alpha_\text{\textsc{sat}}$ but whose transition points + are the same. } \label{fig:characteristic} \end{figure} @@ -293,6 +321,7 @@ these eigenvalues is difficult to understand directly, in situations where there is a continuous \textsc{rsb} transition the sign of one of the eigenvalues changes \cite{Kent-Dobias_2023_When}. At the $\alpha_\text{\textsc{rsb}}$ predicted in \cite{Urbani_2023_A} we see no instability of this kind, and instead only observe such an instability at $\alpha_\text{\textsc{sat}}$. +\cite{Franz_2016_The, Franz_2017_Universality, Franz_2019_Critical, Annesi_2023_Star-shaped, Baldassi_2023_Typical} \begin{acknowledgements} JK-D is supported by a \textsc{DynSysMath} Specific Initiative of the INFN. @@ -410,14 +439,4 @@ where $\Delta f=f(1)-f(0)$ and $\tilde r_d$ is given by \end{align} When $\alpha\to\alpha_\text{\textsc{sat}}=f'(1)/f(1)$ from below, $\tilde r_d\to -1$, which produces $N^{-1}\overline{\log\chi}\to0$. -\begin{figure} - \includegraphics{figs/quenched.pdf} - - \caption{ - Comparison of the annealed and replica symmetric quenched calculations of - the logarithmic of the Euler characteristic. The covariance function is - $f(q)=\frac12+\frac12q^3$ and $\alpha_\text{\textsc{sat}}=\frac32$. - } -\end{figure} - \end{document} |