diff options
Diffstat (limited to 'ictp-saifr_colloquium.bib')
-rw-r--r-- | ictp-saifr_colloquium.bib | 239 |
1 files changed, 229 insertions, 10 deletions
diff --git a/ictp-saifr_colloquium.bib b/ictp-saifr_colloquium.bib index 8e19d9b..7d71b32 100644 --- a/ictp-saifr_colloquium.bib +++ b/ictp-saifr_colloquium.bib @@ -1278,16 +1278,6 @@ complexity}, doi = {10.1103/physrevlett.85.5360} } -@unpublished{Kent-Dobias_2024_Conditioning, - author = {JK-D}, - title = {Conditioning the complexity of random landscapes on marginal optima}, - year = {2024}, - url = {https://arxiv.org/abs/2407.02082}, - archiveprefix = {arXiv}, - eprint = {2407.02082}, - primaryclass = {cond-mat.dis-nn} -} - @unpublished{Kent-Dobias_2024_Algorithm-independent, author = {JK-D}, title = {Algorithm-independent bounds on complex optimization through the statistics of marginal optima}, @@ -1339,3 +1329,232 @@ complexity}, eprinttype = {arxiv} } +@inproceedings{Draxler_2018_Essentially, + author = {Draxler, Felix and Veschgini, Kambis and Salmhofer, Manfred and Hamprecht, Fred}, + title = {Essentially No Barriers in Neural Network Energy Landscape}, + publisher = {PMLR}, + year = {2018}, + month = {10--15 Jul}, + volume = {80}, + pages = {1309--1318}, + url = {https://proceedings.mlr.press/v80/draxler18a.html}, + abstract = {Training neural networks involves finding minima of a high-dimensional non-convex loss function. Relaxing from linear interpolations, we construct continuous paths between minima of recent neural network architectures on CIFAR10 and CIFAR100. Surprisingly, the paths are essentially flat in both the training and test landscapes. This implies that minima are perhaps best seen as points on a single connected manifold of low loss, rather than as the bottoms of distinct valleys.}, + booktitle = {Proceedings of the 35th International Conference on Machine Learning}, + editor = {Dy, Jennifer and Krause, Andreas}, + pdf = {http://proceedings.mlr.press/v80/draxler18a/draxler18a.pdf}, + series = {Proceedings of Machine Learning Research} +} + +@article{Frankle_2020_Revisiting, + author = {Frankle, Jonathan}, + title = {Revisiting ``Qualitatively Characterizing Neural Network Optimization Problems''}, + year = {2020}, + month = {dec}, + url = {http://arxiv.org/abs/2012.06898v1}, + date = {2020-12-12T20:01:33Z}, + eprint = {2012.06898v1}, + eprintclass = {cs.LG}, + eprinttype = {arxiv}, + urldate = {2025-02-13T19:32:17.287212Z} +} + +@inproceedings{Garipov_2018_Loss, + author = {Garipov, Timur and Izmailov, Pavel and Podoprikhin, Dmitrii and Vetrov, Dmitry P and Wilson, Andrew G}, + title = {Loss Surfaces, Mode Connectivity, and Fast Ensembling of DNNs}, + publisher = {Curran Associates, Inc.}, + year = {2018}, + volume = {31}, + pages = {}, + url = {https://proceedings.neurips.cc/paper_files/paper/2018/file/be3087e74e9100d4bc4c6268cdbe8456-Paper.pdf}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {Bengio, S. and Wallach, H. and Larochelle, H. and Grauman, K. and Cesa-Bianchi, N. and Garnett, R.} +} + +@article{Goodfellow_2014_Qualitatively, + author = {Goodfellow, Ian J. and Vinyals, Oriol and Saxe, Andrew M.}, + title = {Qualitatively characterizing neural network optimization problems}, + year = {2014}, + month = {dec}, + url = {http://arxiv.org/abs/1412.6544v6}, + date = {2014-12-19T21:55:01Z}, + eprint = {1412.6544v6}, + eprintclass = {cs.NE}, + eprinttype = {arxiv}, + urldate = {2025-02-13T19:29:53.705054Z} +} + +@inproceedings{Vlaar_2022_What, + author = {Vlaar, Tiffany J and Frankle, Jonathan}, + title = {What Can Linear Interpolation of Neural Network Loss Landscapes Tell Us?}, + publisher = {PMLR}, + year = {2022}, + month = {17--23 Jul}, + volume = {162}, + pages = {22325--22341}, + url = {https://proceedings.mlr.press/v162/vlaar22a.html}, + abstract = {Studying neural network loss landscapes provides insights into the nature of the underlying optimization problems. Unfortunately, loss landscapes are notoriously difficult to visualize in a human-comprehensible fashion. One common way to address this problem is to plot linear slices of the landscape, for example from the initial state of the network to the final state after optimization. On the basis of this analysis, prior work has drawn broader conclusions about the difficulty of the optimization problem. In this paper, we put inferences of this kind to the test, systematically evaluating how linear interpolation and final performance vary when altering the data, choice of initialization, and other optimizer and architecture design choices. Further, we use linear interpolation to study the role played by individual layers and substructures of the network. We find that certain layers are more sensitive to the choice of initialization, but that the shape of the linear path is not indicative of the changes in test accuracy of the model. Our results cast doubt on the broader intuition that the presence or absence of barriers when interpolating necessarily relates to the success of optimization.}, + booktitle = {Proceedings of the 39th International Conference on Machine Learning}, + editor = {Chaudhuri, Kamalika and Jegelka, Stefanie and Song, Le and Szepesvari, Csaba and Niu, Gang and Sabato, Sivan}, + pdf = {https://proceedings.mlr.press/v162/vlaar22a/vlaar22a.pdf}, + series = {Proceedings of Machine Learning Research} +} + +@inproceedings{Wang_2023_Plateau, + author = {Wang, Xiang and Wang, Annie N. and Zhou, Mo and Ge, Rong}, + title = {Plateau in Monotonic Linear Interpolation --- A ``Biased'' View of Loss Landscape for Deep Networks}, + year = {2023}, + url = {https://openreview.net/forum?id=z289SIQOQna}, + booktitle = {The Eleventh International Conference on Learning Representations } +} + +@unpublished{Beneventano_2023_On, + author = {Beneventano, Pierfrancesco}, + title = {On the Trajectories of {SGD} Without Replacement}, + year = {2023}, + month = {dec}, + url = {http://arxiv.org/abs/2312.16143}, + archiveprefix = {arXiv}, + eprint = {2312.16143}, + eprintclass = {cs.LG}, + eprinttype = {arxiv} +} + +@article{Neyshabur_2017_Implicit, + author = {Neyshabur, Behnam}, + title = {Implicit Regularization in Deep Learning}, + year = {2017}, + month = {sep}, + url = {http://arxiv.org/abs/1709.01953v2}, + date = {2017-09-06T18:12:04Z}, + eprint = {1709.01953v2}, + eprintclass = {cs.LG}, + eprinttype = {arxiv}, + urldate = {2025-02-13T20:12:50.483287Z} +} + +@article{Fyodorov_2019_A, + author = {Fyodorov, Yan V.}, + title = {A Spin Glass Model for Reconstructing Nonlinearly Encrypted Signals Corrupted by Noise}, + journal = {Journal of Statistical Physics}, + publisher = {Springer Science and Business Media LLC}, + year = {2019}, + month = {1}, + number = {5}, + volume = {175}, + pages = {789--818}, + url = {https://doi.org/10.1007%2Fs10955-018-02217-9}, + doi = {10.1007/s10955-018-02217-9} +} + +@article{Fyodorov_2020_Counting, + author = {Fyodorov, Y. V. and Tublin, R.}, + title = {Counting Stationary Points of the Loss Function in the Simplest Constrained Least-square Optimization}, + journal = {Acta Physica Polonica B}, + publisher = {Jagiellonian University}, + year = {2020}, + number = {7}, + volume = {51}, + pages = {1663}, + url = {http://dx.doi.org/10.5506/APhysPolB.51.1663}, + doi = {10.5506/aphyspolb.51.1663}, + issn = {1509-5770} +} + +@article{Fyodorov_2022_Optimization, + author = {Fyodorov, Yan V and Tublin, Rashel}, + title = {Optimization landscape in the simplest constrained random least-square problem}, + journal = {Journal of Physics A: Mathematical and Theoretical}, + publisher = {IOP Publishing}, + year = {2022}, + month = {May}, + number = {24}, + volume = {55}, + pages = {244008}, + url = {http://dx.doi.org/10.1088/1751-8121/ac6d8e}, + doi = {10.1088/1751-8121/ac6d8e}, + issn = {1751-8121} +} + +@article{Kent-Dobias_2024_Conditioning, + author = {Kent-Dobias, Jaron}, + title = {Conditioning the complexity of random landscapes on marginal optima}, + journal = {Physical Review E}, + publisher = {American Physical Society (APS)}, + year = {2024}, + month = {December}, + number = {6}, + volume = {110}, + pages = {064148}, + url = {http://dx.doi.org/10.1103/PhysRevE.110.064148}, + doi = {10.1103/physreve.110.064148}, + issn = {2470-0053} +} + +@article{Kamali_2023_Dynamical, + author = {Kamali, Persia Jana and Urbani, Pierfrancesco}, + title = {Dynamical mean field theory for models of confluent tissues and beyond}, + journal = {SciPost Physics}, + publisher = {Stichting SciPost}, + year = {2023}, + month = {November}, + number = {5}, + volume = {15}, + pages = {219}, + url = {http://dx.doi.org/10.21468/SciPostPhys.15.5.219}, + doi = {10.21468/scipostphys.15.5.219}, + issn = {2542-4653} +} + +@unpublished{Kamali_2023_Stochastic, + author = {Kamali, Persia Jana and Urbani, Pierfrancesco}, + title = {Stochastic Gradient Descent outperforms Gradient Descent in recovering a high-dimensional signal in a glassy energy landscape}, + year = {2023}, + month = {sep}, + url = {http://arxiv.org/abs/2309.04788}, + archiveprefix = {arXiv}, + eprint = {2309.04788}, + eprintclass = {cs.LG}, + eprinttype = {arxiv} +} + +@article{Urbani_2023_A, + author = {Urbani, Pierfrancesco}, + title = {A continuous constraint satisfaction problem for the rigidity transition in confluent tissues}, + journal = {Journal of Physics A: Mathematical and Theoretical}, + publisher = {IOP Publishing}, + year = {2023}, + month = {February}, + number = {11}, + volume = {56}, + pages = {115003}, + url = {http://dx.doi.org/10.1088/1751-8121/acb742}, + doi = {10.1088/1751-8121/acb742}, + issn = {1751-8121} +} + +@unpublished{Montanari_2023_Solving, + author = {Montanari, Andrea and Subag, Eliran}, + title = {Solving overparametrized systems of random equations: {I}. Model and algorithms for approximate solutions}, + year = {2023}, + month = {jun}, + url = {http://arxiv.org/abs/2306.13326}, + note = {arXiv preprint}, + archiveprefix = {arXiv}, + eprint = {2306.13326}, + eprintclass = {math.PR}, + eprinttype = {arxiv} +} + +@unpublished{Montanari_2024_On, + author = {Montanari, Andrea and Subag, Eliran}, + title = {On {Smale}'s 17th problem over the reals}, + year = {2024}, + month = {may}, + url = {http://arxiv.org/abs/2405.01735}, + note = {arXiv preprint}, + archiveprefix = {arXiv}, + eprint = {2405.01735}, + eprintclass = {cs.DS}, + eprinttype = {arxiv} +} + |