@article{yuan2025ktransformers,title={KTransformers: Unleashing the Full Potential of CPU/GPU Hybrid Inference for MoE Models},author={Chen, Hongtao and Xie, Weiyu and Zhang, Boxin and Tang, Jingqi and Wang, Jiahao and Dong, Jianwei and Chen, Shaoyuan and Yuan, Ziwei and Lin, Chen and Qiu, Chengyu and others},booktitle={Proceedings of the ACM SIGOPS 31st Symposium on Operating Systems Principles (SOSP '25)},pages={1014--1029},year={2025},month=oct,publisher={Association for Computing Machinery},doi={10.1145/3731569.3764843},url={https://doi.org/10.1145/3731569.3764843}}