---
references:
- id: Ajay2023Conditional
  accessed:
    - year: 2023
      month: 7
      day: 16
  author:
    - family: Ajay
      given: Anurag
    - family: Du
      given: Yilun
    - family: Gupta
      given: Abhi
    - family: Tenenbaum
      given: Joshua
    - family: Jaakkola
      given: Tommi
    - family: Agrawal
      given: Pulkit
  citation-key: Ajay2023Conditional
  DOI: 10.48550/arXiv.2211.15657
  event-title: ICLR
  issued:
    - year: 2023
      month: 7
      day: 10
  publisher: arXiv
  title: Is Conditional Generative Modeling all you need for Decision-Making?
  type: paper-conference
  URL: http://arxiv.org/abs/2211.15657

- id: Bensoussan2020Machine
  accessed:
    - year: 2020
      month: 11
      day: 30
  author:
    - family: Bensoussan
      given: Alain
    - family: Li
      given: Yiqun
    - family: Nguyen
      given: Dinh Phan Cao
    - family: Tran
      given: Minh-Binh
    - family: Yam
      given: Sheung Chi Phillip
    - family: Zhou
      given: Xiang
  citation-key: Bensoussan2020Machine
  container-title: arXiv:2006.05604 [cs, math, stat]
  issued:
    - year: 2020
      month: 6
      day: 9
  title: Machine Learning and Control Theory
  type: article-journal
  URL: http://arxiv.org/abs/2006.05604

- id: Berrueta2024Maximum
  accessed:
    - year: 2025
      month: 3
      day: 2
  author:
    - family: Berrueta
      given: Thomas A.
    - family: Pinosky
      given: Allison
    - family: Murphey
      given: Todd D.
  citation-key: Berrueta2024Maximum
  container-title: Nature Machine Intelligence
  container-title-short: Nat Mach Intell
  DOI: 10.1038/s42256-024-00829-3
  ISSN: 2522-5839
  issue: '5'
  issued:
    - year: 2024
      month: 5
  language: en
  page: 504-514
  publisher: Nature Publishing Group
  title: Maximum diffusion reinforcement learning
  type: article-journal
  URL: https://arxiv.org/abs/2309.15293v5
  volume: '6'

- id: Brockman2016OpenAI
  accessed:
    - year: 2022
      month: 5
      day: 10
  author:
    - family: Brockman
      given: Greg
    - family: Cheung
      given: Vicki
    - family: Pettersson
      given: Ludwig
    - family: Schneider
      given: Jonas
    - family: Schulman
      given: John
    - family: Tang
      given: Jie
    - family: Zaremba
      given: Wojciech
  citation-key: Brockman2016OpenAI
  container-title: arXiv:1606.01540 [cs]
  issued:
    - year: 2016
      month: 6
      day: 5
  language: en
  title: OpenAI Gym
  type: article-journal
  URL: http://arxiv.org/abs/1606.01540

- id: Clifton2020QLearning
  accessed:
    - year: 2020
      month: 3
      day: 11
  author:
    - family: Clifton
      given: Jesse
    - family: Laber
      given: Eric
  citation-key: Clifton2020QLearning
  container-title: Annual Review of Statistics and Its Application
  DOI: 10.1146/annurev-statistics-031219-041220
  issue: '1'
  issued:
    - year: 2020
  page: 279-301
  title: 'Q-Learning: Theory and Applications'
  type: article-journal
  volume: '7'

- id: DayanReinforcement
  author:
    - family: Dayan
      given: Peter
    - family: Watkins
      given: Christopher JCH
  citation-key: DayanReinforcement
  container-title: Encyclopedia of Cognitve Science
  title: Reinforcement Learning
  type: chapter

- id: Drori2022Deep
  author:
    - family: Drori
      given: Iddo
  citation-key: Drori2022Deep
  container-author:
    - family: Drori
      given: Iddo
  container-title: The science of deep learning
  issued:
    - year: 2022
  publisher: Cambridge University Press
  title: Deep reinforcement learning
  type: chapter

- id: Drori2022Reinforcement
  author:
    - family: Drori
      given: Iddo
  citation-key: Drori2022Reinforcement
  container-author:
    - family: Drori
      given: Iddo
  container-title: The science of deep learning
  issued:
    - year: 2022
  publisher: Cambridge University Press
  title: Reinforcement learning
  type: chapter

- id: Drori2022Science
  author:
    - family: Drori
      given: Iddo
  citation-key: Drori2022Science
  issued:
    - year: 2022
  publisher: Cambridge University Press
  title: The science of deep learning
  type: book
  URL: http://www.dlbook.org

- id: Fellows2019VIREL
  accessed:
    - year: 2024
      month: 5
      day: 28
  author:
    - family: Fellows
      given: Matthew
    - family: Mahajan
      given: Anuj
    - family: Rudner
      given: Tim G. J.
    - family: Whiteson
      given: Shimon
  citation-key: Fellows2019VIREL
  container-title: Advances in Neural Information Processing Systems
  issued:
    - year: 2019
  publisher: Curran Associates, Inc.
  title: 'VIREL: A Variational Inference Framework for Reinforcement Learning'
  type: paper-conference
  URL: >-
    https://proceedings.neurips.cc/paper_files/paper/2019/hash/582967e09f1b30ca2539968da0a174fa-Abstract.html
  volume: '32'

- id: Jaakkola1995Reinforcement
  accessed:
    - year: 2017
      month: 9
      day: 13
  author:
    - family: Jaakkola
      given: Tommi
    - family: Singh
      given: Satinder P.
    - family: Jordan
      given: Michael I.
  citation-key: Jaakkola1995Reinforcement
  container-title: Advances in neural information processing systems
  issued:
    - year: 1995
  page: 345–352
  title: >-
    Reinforcement learning algorithm for partially observable Markov decision
    problems
  type: paper-conference
  URL: >-
    http://papers.nips.cc/paper/951-reinforcement-learning-algorithm-for-partially-observable-markov-decision-problems.pdf

- id: Kaelbling1996Reinforcement
  accessed:
    - year: 2014
      month: 11
      day: 27
  author:
    - family: Kaelbling
      given: L. P.
    - family: Littman
      given: M. L.
    - family: Moore
      given: A. W.
  citation-key: Kaelbling1996Reinforcement
  container-title: Journal of Artifical Intelligence Research
  issued:
    - year: 1996
      month: 4
      day: 30
  title: 'Reinforcement Learning: A Survey'
  type: article-journal
  URL: http://arxiv.org/abs/cs/9605103
  volume: '4'

- id: Kochenderfer2022Algorithms
  author:
    - family: Kochenderfer
      given: Mykel J.
    - family: Wheeler
      given: Tim Allan
    - family: Wray
      given: Kyle H.
  citation-key: Kochenderfer2022Algorithms
  event-place: Cambridge, Massachusetts London, UK
  ISBN: 978-0-262-04701-2
  issued:
    - year: 2022
  language: eng
  number-of-pages: '678'
  publisher: Massachusetts Institute of Technology
  publisher-place: Cambridge, Massachusetts London, UK
  title: Algorithms for decision making
  type: book

- id: Korbak2022RL
  accessed:
    - year: 2024
      month: 5
      day: 28
  author:
    - family: Korbak
      given: Tomasz
    - family: Perez
      given: Ethan
    - family: Buckley
      given: Christopher L.
  citation-key: Korbak2022RL
  DOI: 10.48550/arXiv.2205.11275
  issued:
    - year: 2022
      month: 10
      day: 21
  number: arXiv:2205.11275
  publisher: arXiv
  title: RL with KL penalties is better viewed as Bayesian inference
  type: article
  URL: http://arxiv.org/abs/2205.11275

- id: Krakovsky2016Reinforcement
  accessed:
    - year: 2016
      month: 7
      day: 29
  author:
    - family: Krakovsky
      given: Marina
  citation-key: Krakovsky2016Reinforcement
  container-title: Commun. ACM
  DOI: 10.1145/2949662
  ISSN: 0001-0782
  issue: '8'
  issued:
    - year: 2016
      month: 7
  page: 12–14
  title: Reinforcement Renaissance
  type: article-journal
  volume: '59'

- id: Krishnamurthy2016ContextualMDPs
  accessed:
    - year: 2016
      month: 3
      day: 26
  author:
    - family: Krishnamurthy
      given: Akshay
    - family: Agarwal
      given: Alekh
    - family: Langford
      given: John
  citation-key: Krishnamurthy2016ContextualMDPs
  container-title: arXiv:1602.02722 [cs, stat]
  issued:
    - year: 2016
      month: 2
      day: 8
  title: Contextual-MDPs for PAC-Reinforcement Learning with Rich Observations
  type: article-journal
  URL: http://arxiv.org/abs/1602.02722

- id: Lehman2022Evolution
  accessed:
    - year: 2023
      month: 9
      day: 7
  author:
    - family: Lehman
      given: Joel
    - family: Gordon
      given: Jonathan
    - family: Jain
      given: Shawn
    - family: Ndousse
      given: Kamal
    - family: Yeh
      given: Cathy
    - family: Stanley
      given: Kenneth O.
  citation-key: Lehman2022Evolution
  DOI: 10.48550/arXiv.2206.08896
  issued:
    - year: 2022
      month: 6
      day: 17
  number: arXiv:2206.08896
  publisher: arXiv
  title: Evolution through Large Models
  type: article
  URL: http://arxiv.org/abs/2206.08896

- id: Levine2018Reinforcement
  accessed:
    - year: 2021
      month: 11
      day: 15
  author:
    - family: Levine
      given: Sergey
  citation-key: Levine2018Reinforcement
  container-title: arXiv:1805.00909 [cs, stat]
  issued:
    - year: 2018
      month: 5
      day: 20
  language: en
  title: >-
    Reinforcement Learning and Control as Probabilistic Inference: Tutorial and
    Review
  type: article-journal
  URL: http://arxiv.org/abs/1805.00909

- id: Mania2018Simple
  accessed:
    - year: 2021
      month: 3
      day: 31
  author:
    - family: Mania
      given: Horia
    - family: Guy
      given: Aurelia
    - family: Recht
      given: Benjamin
  citation-key: Mania2018Simple
  container-title: arXiv:1803.07055 [cs, math, stat]
  issued:
    - year: 2018
      month: 3
      day: 19
  title: >-
    Simple random search provides a competitive approach to reinforcement
    learning
  type: article-journal
  URL: http://arxiv.org/abs/1803.07055

- id: Mukherjee2023Bridging
  accessed:
    - year: 2023
      month: 7
      day: 17
  author:
    - family: Mukherjee
      given: Amartya
    - family: Liu
      given: Jun
  citation-key: Mukherjee2023Bridging
  DOI: 10.48550/arXiv.2302.00237
  issued:
    - year: 2023
      month: 1
      day: 31
  number: arXiv:2302.00237
  publisher: arXiv
  title: >-
    Bridging Physics-Informed Neural Networks with Reinforcement Learning:
    Hamilton-Jacobi-Bellman Proximal Policy Optimization (HJBPPO)
  type: article
  URL: http://arxiv.org/abs/2302.00237

- id: Novelli2024Operator
  accessed:
    - year: 2025
      month: 2
      day: 17
  author:
    - family: Novelli
      given: Pietro
    - family: Pratticò
      given: Marco
    - family: Pontil
      given: Massimiliano
    - family: Ciliberto
      given: Carlo
  citation-key: Novelli2024Operator
  DOI: 10.48550/arXiv.2406.19861
  issued:
    - year: 2024
      month: 10
      day: 30
  number: arXiv:2406.19861
  publisher: arXiv
  title: Operator World Models for Reinforcement Learning
  type: article
  URL: http://arxiv.org/abs/2406.19861

- id: Parisotto2017Neural
  author:
    - family: Parisotto
      given: Emilio
    - family: Salakhutdinov
      given: Ruslan
  citation-key: Parisotto2017Neural
  container-title: arXiv:1702.08360 [cs]
  issued:
    - year: 2017
      month: 2
      day: 27
  title: 'Neural Map: Structured Memory for Deep Reinforcement Learning'
  type: article-journal
  URL: http://arxiv.org/abs/1702.08360

- id: Pfau2016Connecting
  accessed:
    - year: 2019
      month: 5
      day: 29
  author:
    - family: Pfau
      given: David
    - family: Vinyals
      given: Oriol
  citation-key: Pfau2016Connecting
  container-title: arXiv:1610.01945 [cs, stat]
  issued:
    - year: 2016
      month: 10
      day: 6
  title: Connecting Generative Adversarial Networks and Actor-Critic Methods
  type: article-journal
  URL: http://arxiv.org/abs/1610.01945

- id: RamirezRuiz2024Complex
  accessed:
    - year: 2025
      month: 3
      day: 2
  author:
    - family: Ramírez-Ruiz
      given: Jorge
    - family: Grytskyy
      given: Dmytro
    - family: Mastrogiuseppe
      given: Chiara
    - family: Habib
      given: Yamen
    - family: Moreno-Bote
      given: Rubén
  citation-key: RamirezRuiz2024Complex
  container-title: Nature Communications
  container-title-short: Nat Commun
  DOI: 10.1038/s41467-024-49711-1
  ISSN: 2041-1723
  issue: '1'
  issued:
    - year: 2024
      month: 7
      day: 29
  language: en
  page: '6368'
  publisher: Nature Publishing Group
  title: >-
    Complex behavior from intrinsic motivation to occupy future action-state
    path space
  type: article-journal
  volume: '15'

- id: Ren2023Spectral
  accessed:
    - year: 2023
      month: 4
      day: 22
  author:
    - family: Ren
      given: Tongzheng
    - family: Zhang
      given: Tianjun
    - family: Lee
      given: Lisa
    - family: Gonzalez
      given: Joseph E.
    - family: Schuurmans
      given: Dale
    - family: Dai
      given: Bo
  citation-key: Ren2023Spectral
  DOI: 10.48550/arXiv.2208.09515
  issued:
    - year: 2023
      month: 3
      day: 7
  number: arXiv:2208.09515
  publisher: arXiv
  title: Spectral Decomposition Representation for Reinforcement Learning
  type: article
  URL: http://arxiv.org/abs/2208.09515

- id: Ringstrom2022Reward
  accessed:
    - year: 2023
      month: 1
      day: 26
  author:
    - family: Ringstrom
      given: Thomas J.
  citation-key: Ringstrom2022Reward
  DOI: 10.48550/arXiv.2211.10851
  issued:
    - year: 2022
      month: 11
      day: 22
  number: arXiv:2211.10851
  publisher: arXiv
  title: >-
    Reward is not Necessary: How to Create a Compositional Self-Preserving Agent
    for Life-Long Learning
  type: article
  URL: http://arxiv.org/abs/2211.10851

- id: Salimans2017Evolution
  author:
    - family: Salimans
      given: Tim
    - family: Ho
      given: Jonathan
    - family: Chen
      given: Xi
    - family: Sutskever
      given: Ilya
  citation-key: Salimans2017Evolution
  container-title: arXiv:1703.03864 [cs, stat]
  issued:
    - year: 2017
      month: 3
      day: 10
  title: Evolution Strategies as a Scalable Alternative to Reinforcement Learning
  type: article-journal
  URL: http://arxiv.org/abs/1703.03864

- id: Schulman2017Proximal
  accessed:
    - year: 2024
      month: 8
      day: 29
  author:
    - family: Schulman
      given: John
    - family: Wolski
      given: Filip
    - family: Dhariwal
      given: Prafulla
    - family: Radford
      given: Alec
    - family: Klimov
      given: Oleg
  citation-key: Schulman2017Proximal
  DOI: 10.48550/arXiv.1707.06347
  issued:
    - year: 2017
      month: 8
      day: 28
  number: arXiv:1707.06347
  publisher: arXiv
  title: Proximal Policy Optimization Algorithms
  type: article
  URL: http://arxiv.org/abs/1707.06347

- id: Shibata2006Probabilistic
  accessed:
    - year: 2014
      month: 9
      day: 9
  author:
    - family: Shibata
      given: Takeshi
    - family: Yoshinaka
      given: Ryo
    - family: Chikayama
      given: Takashi
  citation-key: Shibata2006Probabilistic
  collection-number: '4264'
  collection-title: Lecture Notes in Computer Science
  container-title: Algorithmic Learning Theory
  editor:
    - family: Balcázar
      given: José L.
    - family: Long
      given: Philip M.
    - family: Stephan
      given: Frank
  ISBN: 978-3-540-46649-9 978-3-540-46650-5
  issued:
    - year: 2006
      month: 1
      day: 1
  language: en
  page: 348-362
  publisher: Springer Berlin Heidelberg
  title: >-
    Probabilistic Generalization of Simple Grammars and Its Application to
    Reinforcement Learning
  type: chapter
  URL: http://link.springer.com/chapter/10.1007/11894841_28

- id: Silver2021Reward
  accessed:
    - year: 2022
      month: 5
      day: 27
  author:
    - family: Silver
      given: David
    - family: Singh
      given: Satinder
    - family: Precup
      given: Doina
    - family: Sutton
      given: Richard S.
  citation-key: Silver2021Reward
  container-title: Artificial Intelligence
  container-title-short: Artificial Intelligence
  DOI: 10.1016/j.artint.2021.103535
  ISSN: 0004-3702
  issued:
    - year: 2021
      month: 10
      day: 1
  language: en
  page: '103535'
  title: Reward is enough
  type: article-journal
  volume: '299'

- id: Sutton1998Reinforcement
  author:
    - family: Sutton
      given: Richard S
    - family: Barto
      given: Andrew G
  citation-key: Sutton1998Reinforcement
  event-place: Cambridge, Mass.
  ISBN: 0-262-19398-1
  issued:
    - year: 1998
  publisher: MIT Press
  publisher-place: Cambridge, Mass.
  title: Reinforcement learning
  type: book
  URL: http://lccn.loc.gov/97026416

- id: Sutton2000Policy
  accessed:
    - year: 2017
      month: 9
      day: 13
  author:
    - family: Sutton
      given: Richard S.
    - family: McAllester
      given: David A.
    - family: Singh
      given: Satinder P.
    - family: Mansour
      given: Yishay
  citation-key: Sutton2000Policy
  container-title: Advances in neural information processing systems
  issued:
    - year: 2000
  page: 1057–1063
  title: >-
    Policy gradient methods for reinforcement learning with function
    approximation
  type: paper-conference
  URL: >-
    http://papers.nips.cc/paper/1713-policy-gradient-methods-for-reinforcement-learning-with-function-approximation.pdf

- id: Sutton2018Reinforcement
  author:
    - family: Sutton
      given: Richard S.
    - family: Barto
      given: Andrew G.
  citation-key: Sutton2018Reinforcement
  edition: 2nd edition
  event-place: Cambridge, Massachusetts London, England
  ISBN: 978-0-262-03924-6
  issued:
    - year: 2018
      month: 11
      day: 13
  language: English
  number-of-pages: '552'
  publisher: Bradford Books
  publisher-place: Cambridge, Massachusetts London, England
  title: 'Reinforcement Learning, second edition: An Introduction'
  type: book
  URL: http://incompleteideas.net/book/the-book.html

- id: Thrun1992Efficient
  author:
    - family: Thrun
      given: Sebastian B.
  citation-key: Thrun1992Efficient
  issued:
    - year: 1992
  title: Efficient Exploration In Reinforcement Learning
  type: report
  URL: >-
    http://www.ri.cmu.edu/pub_files/pub1/thrun_sebastian_1992_1/thrun_sebastian_1992_1.pdf
...