Bibtex
|
ACM
|
MLA
|
APA
|
Harvard
|
Vancouver
|
Chicago
@misc{Kwon2023Efficient, title = {{Efficient Memory Management for Large Language Model Serving with PagedAttention}}, author = {Kwon, Woosuk and Li, Zhuohan and Zhuang, Siyuan and Sheng, Ying and Zheng, Lianmin and Yu, Cody Hao and Gonzalez, Joseph E. and Zhang, Hao and Stoica, Ion}, year = {2023}, publisher = {arXiv}, doi = {10.48550/arXiv.2309.06180}, }
Woosuk Kwon, Zhuohan Li, Siyuan Zhuang, Ying Sheng, Lianmin Zheng, Cody Hao Yu, Joseph E. Gonzalez, Hao Zhang, and Ion Stoica. 2023. Efficient Memory Management for Large Language Model Serving with PagedAttention. arXiv.2309.06180, pp. . DOI: https://doi.org/10.48550/arXiv.2309.06180
Kwon, Woosuk, Li, Zhuohan, Zhuang, Siyuan, Sheng, Ying, Zheng, Lianmin, Yu, Cody Hao, Gonzalez, Joseph E., Zhang, Hao, and Stoica, Ion. "Efficient Memory Management for Large Language Model Serving with PagedAttention". arXiv.2309.06180, pp. . 2023.
Kwon, W., Li, Z., Zhuang, S., Sheng, Y., Zheng, L., Yu, C., Gonzalez, J., Zhang, H., & Stoica, I. (2023). Efficient Memory Management for Large Language Model Serving with PagedAttention. arXiv.2309.06180, pp. .
Kwon, W., Li, Z., Zhuang, S., Sheng, Y., Zheng, L., Yu, C., Gonzalez, J., Zhang, H., Stoica, I., 2023. Efficient Memory Management for Large Language Model Serving with PagedAttention.arXiv.2309.06180, pp.
Kwon W, Li Z, Zhuang S, Sheng Y, Zheng L, Yu C, Gonzalez J, Zhang H, Stoica I. Efficient Memory Management for Large Language Model Serving with PagedAttention.arXiv.2309.061802023; pp. .
Kwon, Woosuk, Li, Zhuohan, Zhuang, Siyuan, Sheng, Ying, Zheng, Lianmin, Yu, Cody Hao, Gonzalez, Joseph E., Zhang, Hao, and Stoica, Ion "Efficient Memory Management for Large Language Model Serving with PagedAttention". arXiv.2309.06180, pp. . 2023.