@inproceedings{Xu_2020,
series={SIGIR ’20},
title={Reinforcement Learning to Rank with Pairwise Policy Gradient},
url={http://dx.doi.org/10.1145/3397271.3401148},
DOI={10.1145/3397271.3401148},
booktitle={Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval},
publisher={ACM},
author={Xu,
Jun and Wei,
Zeng and Xia,
Long and Lan,
Yanyan and Yin,
Dawei and Cheng,
Xueqi and Wen,
Ji-Rong},
year={2020},
month=jul,
pages={509–518},
collection={SIGIR ’20} }