diff options
Diffstat (limited to 'topology.bib')
-rw-r--r-- | topology.bib | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/topology.bib b/topology.bib index 70d053d..347321a 100644 --- a/topology.bib +++ b/topology.bib @@ -747,3 +747,71 @@ issn = {1091-6490} } +@inproceedings{Wang_2023_Plateau, + author = {Wang, Xiang and Wang, Annie N. and Zhou, Mo and Ge, Rong}, + title = {Plateau in Monotonic Linear Interpolation --- A ``Biased'' View of Loss Landscape for Deep Networks}, + year = {2023}, + url = {https://openreview.net/forum?id=z289SIQOQna}, + booktitle = {The Eleventh International Conference on Learning Representations } +} + +@inproceedings{Vlaar_2022_What, + author = {Vlaar, Tiffany J and Frankle, Jonathan}, + title = {What Can Linear Interpolation of Neural Network Loss Landscapes Tell Us?}, + publisher = {PMLR}, + year = {2022}, + month = {17--23 Jul}, + volume = {162}, + pages = {22325--22341}, + url = {https://proceedings.mlr.press/v162/vlaar22a.html}, + abstract = {Studying neural network loss landscapes provides insights into the nature of the underlying optimization problems. Unfortunately, loss landscapes are notoriously difficult to visualize in a human-comprehensible fashion. One common way to address this problem is to plot linear slices of the landscape, for example from the initial state of the network to the final state after optimization. On the basis of this analysis, prior work has drawn broader conclusions about the difficulty of the optimization problem. In this paper, we put inferences of this kind to the test, systematically evaluating how linear interpolation and final performance vary when altering the data, choice of initialization, and other optimizer and architecture design choices. Further, we use linear interpolation to study the role played by individual layers and substructures of the network. We find that certain layers are more sensitive to the choice of initialization, but that the shape of the linear path is not indicative of the changes in test accuracy of the model. Our results cast doubt on the broader intuition that the presence or absence of barriers when interpolating necessarily relates to the success of optimization.}, + booktitle = {Proceedings of the 39th International Conference on Machine Learning}, + editor = {Chaudhuri, Kamalika and Jegelka, Stefanie and Song, Le and Szepesvari, Csaba and Niu, Gang and Sabato, Sivan}, + pdf = {https://proceedings.mlr.press/v162/vlaar22a/vlaar22a.pdf}, + series = {Proceedings of Machine Learning Research} +} + +@unpublished{Goodfellow_2014_Qualitatively, + author = {Goodfellow, Ian J. and Vinyals, Oriol and Saxe, Andrew M.}, + title = {Qualitatively characterizing neural network optimization problems}, + year = {2014}, + month = {dec}, + url = {http://arxiv.org/abs/1412.6544}, + date = {2014-12-19T21:55:01Z}, + eprint = {1412.6544}, + note = {ArXiv preprint}, + eprintclass = {cs.NE}, + eprinttype = {arxiv}, + urldate = {2025-02-13T19:29:53.705054Z} +} + +@inproceedings{Draxler_2018_Essentially, + author = {Draxler, Felix and Veschgini, Kambis and Salmhofer, Manfred and Hamprecht, Fred}, + title = {Essentially No Barriers in Neural Network Energy Landscape}, + publisher = {PMLR}, + year = {2018}, + month = {10--15 Jul}, + volume = {80}, + pages = {1309--1318}, + url = {https://proceedings.mlr.press/v80/draxler18a.html}, + abstract = {Training neural networks involves finding minima of a high-dimensional non-convex loss function. Relaxing from linear interpolations, we construct continuous paths between minima of recent neural network architectures on CIFAR10 and CIFAR100. Surprisingly, the paths are essentially flat in both the training and test landscapes. This implies that minima are perhaps best seen as points on a single connected manifold of low loss, rather than as the bottoms of distinct valleys.}, + booktitle = {Proceedings of the 35th International Conference on Machine Learning}, + editor = {Dy, Jennifer and Krause, Andreas}, + pdf = {http://proceedings.mlr.press/v80/draxler18a/draxler18a.pdf}, + series = {Proceedings of Machine Learning Research} +} + +@unpublished{Frankle_2020_Revisiting, + author = {Frankle, Jonathan}, + title = {Revisiting ``Qualitatively Characterizing Neural Network Optimization Problems''}, + year = {2020}, + month = {dec}, + url = {http://arxiv.org/abs/2012.06898}, + date = {2020-12-12T20:01:33Z}, + eprint = {2012.06898}, + note = {ArXiv preprint}, + eprintclass = {cs.LG}, + eprinttype = {arxiv}, + urldate = {2025-02-13T19:32:17.287212Z} +} + |