summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJaron Kent-Dobias <jaron@kent-dobias.com>2025-03-11 14:46:27 -0300
committerJaron Kent-Dobias <jaron@kent-dobias.com>2025-03-11 14:46:27 -0300
commit8f943c8d09c51546bd3a9d8f160310c6370646cd (patch)
tree2947b84ccd6aa72a65aa0e29faa2dbe44bf0cb75
parent5ff4ddf95f9f185be909aaec31b1ca4dc9d6f685 (diff)
downloadSciPostPhys_18_158-8f943c8d09c51546bd3a9d8f160310c6370646cd.tar.gz
SciPostPhys_18_158-8f943c8d09c51546bd3a9d8f160310c6370646cd.tar.bz2
SciPostPhys_18_158-8f943c8d09c51546bd3a9d8f160310c6370646cd.zip
More context for references in second paragraph.arXiv.v3
-rw-r--r--topology.bib68
-rw-r--r--topology.tex5
2 files changed, 70 insertions, 3 deletions
diff --git a/topology.bib b/topology.bib
index 70d053d..347321a 100644
--- a/topology.bib
+++ b/topology.bib
@@ -747,3 +747,71 @@
issn = {1091-6490}
}
+@inproceedings{Wang_2023_Plateau,
+ author = {Wang, Xiang and Wang, Annie N. and Zhou, Mo and Ge, Rong},
+ title = {Plateau in Monotonic Linear Interpolation --- A ``Biased'' View of Loss Landscape for Deep Networks},
+ year = {2023},
+ url = {https://openreview.net/forum?id=z289SIQOQna},
+ booktitle = {The Eleventh International Conference on Learning Representations }
+}
+
+@inproceedings{Vlaar_2022_What,
+ author = {Vlaar, Tiffany J and Frankle, Jonathan},
+ title = {What Can Linear Interpolation of Neural Network Loss Landscapes Tell Us?},
+ publisher = {PMLR},
+ year = {2022},
+ month = {17--23 Jul},
+ volume = {162},
+ pages = {22325--22341},
+ url = {https://proceedings.mlr.press/v162/vlaar22a.html},
+ abstract = {Studying neural network loss landscapes provides insights into the nature of the underlying optimization problems. Unfortunately, loss landscapes are notoriously difficult to visualize in a human-comprehensible fashion. One common way to address this problem is to plot linear slices of the landscape, for example from the initial state of the network to the final state after optimization. On the basis of this analysis, prior work has drawn broader conclusions about the difficulty of the optimization problem. In this paper, we put inferences of this kind to the test, systematically evaluating how linear interpolation and final performance vary when altering the data, choice of initialization, and other optimizer and architecture design choices. Further, we use linear interpolation to study the role played by individual layers and substructures of the network. We find that certain layers are more sensitive to the choice of initialization, but that the shape of the linear path is not indicative of the changes in test accuracy of the model. Our results cast doubt on the broader intuition that the presence or absence of barriers when interpolating necessarily relates to the success of optimization.},
+ booktitle = {Proceedings of the 39th International Conference on Machine Learning},
+ editor = {Chaudhuri, Kamalika and Jegelka, Stefanie and Song, Le and Szepesvari, Csaba and Niu, Gang and Sabato, Sivan},
+ pdf = {https://proceedings.mlr.press/v162/vlaar22a/vlaar22a.pdf},
+ series = {Proceedings of Machine Learning Research}
+}
+
+@unpublished{Goodfellow_2014_Qualitatively,
+ author = {Goodfellow, Ian J. and Vinyals, Oriol and Saxe, Andrew M.},
+ title = {Qualitatively characterizing neural network optimization problems},
+ year = {2014},
+ month = {dec},
+ url = {http://arxiv.org/abs/1412.6544},
+ date = {2014-12-19T21:55:01Z},
+ eprint = {1412.6544},
+ note = {ArXiv preprint},
+ eprintclass = {cs.NE},
+ eprinttype = {arxiv},
+ urldate = {2025-02-13T19:29:53.705054Z}
+}
+
+@inproceedings{Draxler_2018_Essentially,
+ author = {Draxler, Felix and Veschgini, Kambis and Salmhofer, Manfred and Hamprecht, Fred},
+ title = {Essentially No Barriers in Neural Network Energy Landscape},
+ publisher = {PMLR},
+ year = {2018},
+ month = {10--15 Jul},
+ volume = {80},
+ pages = {1309--1318},
+ url = {https://proceedings.mlr.press/v80/draxler18a.html},
+ abstract = {Training neural networks involves finding minima of a high-dimensional non-convex loss function. Relaxing from linear interpolations, we construct continuous paths between minima of recent neural network architectures on CIFAR10 and CIFAR100. Surprisingly, the paths are essentially flat in both the training and test landscapes. This implies that minima are perhaps best seen as points on a single connected manifold of low loss, rather than as the bottoms of distinct valleys.},
+ booktitle = {Proceedings of the 35th International Conference on Machine Learning},
+ editor = {Dy, Jennifer and Krause, Andreas},
+ pdf = {http://proceedings.mlr.press/v80/draxler18a/draxler18a.pdf},
+ series = {Proceedings of Machine Learning Research}
+}
+
+@unpublished{Frankle_2020_Revisiting,
+ author = {Frankle, Jonathan},
+ title = {Revisiting ``Qualitatively Characterizing Neural Network Optimization Problems''},
+ year = {2020},
+ month = {dec},
+ url = {http://arxiv.org/abs/2012.06898},
+ date = {2020-12-12T20:01:33Z},
+ eprint = {2012.06898},
+ note = {ArXiv preprint},
+ eprintclass = {cs.LG},
+ eprinttype = {arxiv},
+ urldate = {2025-02-13T19:32:17.287212Z}
+}
+
diff --git a/topology.tex b/topology.tex
index 6fb6bce..8cd855d 100644
--- a/topology.tex
+++ b/topology.tex
@@ -122,10 +122,9 @@ solutions in neural networks with ReLu activations and stable equilibrium in the
forces between physical objects. Equality constraints naturally appear in the
zero-gradient solutions to overparameterized smooth neural networks and in vertex models of tissues.
-In such problems, there is great interest in characterizing structure in the
+In problems ranging from toy models \cite{Baldassi_2016_Unreasonable, Baldassi_2019_Properties} to real deep neural networks \cite{Goodfellow_2014_Qualitatively, Draxler_2018_Essentially, Frankle_2020_Revisiting, Vlaar_2022_What, Wang_2023_Plateau}, there is great interest in characterizing structure in the
set of solutions, which can influence the behavior of algorithms trying
-to find them \cite{Baldassi_2016_Unreasonable, Baldassi_2019_Properties,
-Beneventano_2023_On}. Here, we show how topological information about
+to find them \cite{Beneventano_2023_On}. Here, we show how topological information about
the set of solutions can be calculated in a simple problem of satisfying random
nonlinear equalities. This allows us to reason about the connectivity and structure of the
solution set. The topological properties revealed by this calculation yield