@inproceedings{b068674c9f28422282754737d2b7884c,

title = "Bandits with switching costs: T2/3 regret",

abstract = "We study the adversarial multi-armed bandit problem in a setting where the player incurs a unit cost each time he switches actions. We prove that the player's T-round minimax regret in this setting is {\~⊖}(T 2/3), thereby closing a fundamental gap in our understanding of learning with bandit feedback. In the corresponding full-information version of the problem, the minimax regret is known to grow at a much slower rate of ⊖ (√T). The difference between these two rates provides the first indication that learning with bandit feedback can be significantly harder than learning with fullinformation feedback (previous results only showed a different dependence on the number of actions, but not on T.) In addition to characterizing the inherent dificulty of the multi-armed bandit problem with switching costs, our results also resolve several other open problems in online learning. One direct implication is that learning with bandit feedback against bounded-memory adaptive adversaries has a minimax regret of {\~⊖} (T2/3). Another implication is that the minimax regret of online learning in adversarial Markov decision processes (MDPs) is {\~⊖} (T2/3). The key to all of our results is a new randomized construction of a multi-scale random walk, which is of independent interest and likely to prove useful in additional settings.",

keywords = "Lower bounds, Multi-armed Bandit, Online learning, Switching costs",

author = "Ofer Dekel and Jian Ding and Tomer Koren and Yuval Peres",

year = "2014",

doi = "10.1145/2591796.2591868",

language = "אנגלית",

isbn = "9781450327107",

series = "Proceedings of the Annual ACM Symposium on Theory of Computing",

publisher = "Association for Computing Machinery",

pages = "459--467",

booktitle = "STOC 2014 - Proceedings of the 2014 ACM Symposium on Theory of Computing",

note = "4th Annual ACM Symposium on Theory of Computing, STOC 2014 ; Conference date: 31-05-2014 Through 03-06-2014",

}