Update README.md
Browse files
README.md
CHANGED
|
@@ -16,4 +16,26 @@ pipeline_tag: image-text-to-text
|
|
| 16 |
|
| 17 |
# Model Card for SpaceQwen3-VL-2B-Thinking
|
| 18 |
|
| 19 |
-
Finetuned [Qwen3-VL-2B-Thinking](https://huggingface.co/Qwen/Qwen3-VL-2B-Thinking) by Low-Rank Adapters using the [SpaceOm dataset](https://huggingface.co/datasets/remyxai/SpaceOm) created with [VQASynth](https://github.com/remyxai/VQASynth), an open-source multimodal data synthesis pipeline inspired by [SpatialVLM](https://spatial-vlm.github.io/#community-implementation)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# Model Card for SpaceQwen3-VL-2B-Thinking
|
| 18 |
|
| 19 |
+
Finetuned [Qwen3-VL-2B-Thinking](https://huggingface.co/Qwen/Qwen3-VL-2B-Thinking) by Low-Rank Adapters using the [SpaceOm dataset](https://huggingface.co/datasets/remyxai/SpaceOm) created with [VQASynth](https://github.com/remyxai/VQASynth), an open-source multimodal data synthesis pipeline inspired by [SpatialVLM](https://spatial-vlm.github.io/#community-implementation)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
# Citation
|
| 23 |
+
```
|
| 24 |
+
@misc{qwen3technicalreport,
|
| 25 |
+
title={Qwen3 Technical Report},
|
| 26 |
+
author={Qwen Team},
|
| 27 |
+
year={2025},
|
| 28 |
+
eprint={2505.09388},
|
| 29 |
+
archivePrefix={arXiv},
|
| 30 |
+
primaryClass={cs.CL},
|
| 31 |
+
url={https://arxiv.org/abs/2505.09388},
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
@article{chen2024spatialvlm,
|
| 35 |
+
title = {SpatialVLM: Endowing Vision-Language Models with Spatial Reasoning Capabilities},
|
| 36 |
+
author = {Chen, Boyuan and Xu, Zhuo and Kirmani, Sean and Ichter, Brian and Driess, Danny and Florence, Pete and Sadigh, Dorsa and Guibas, Leonidas and Xia, Fei},
|
| 37 |
+
journal = {arXiv preprint arXiv:2401.12168},
|
| 38 |
+
year = {2024},
|
| 39 |
+
url = {https://arxiv.org/abs/2401.12168},
|
| 40 |
+
}
|
| 41 |
+
```
|