Update README.md
Browse files
    	
        README.md
    CHANGED
    
    | @@ -99,6 +99,60 @@ res = model.infer(tokenizer, prompt=prompt, image_file=image_file, output_path = | |
| 99 | 
             
            Refer to [🌟GitHub](https://github.com/deepseek-ai/DeepSeek-OCR/) for guidance on model inference acceleration and PDF processing, etc.<!--  -->
         | 
| 100 |  | 
| 101 | 
             
            [2025/10/23] 🚀🚀🚀 DeepSeek-OCR is now officially supported in upstream [vLLM](https://docs.vllm.ai/projects/recipes/en/latest/DeepSeek/DeepSeek-OCR.html#installing-vllm).
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 102 |  | 
| 103 |  | 
| 104 | 
             
            ## Visualizations
         | 
|  | |
| 99 | 
             
            Refer to [🌟GitHub](https://github.com/deepseek-ai/DeepSeek-OCR/) for guidance on model inference acceleration and PDF processing, etc.<!--  -->
         | 
| 100 |  | 
| 101 | 
             
            [2025/10/23] 🚀🚀🚀 DeepSeek-OCR is now officially supported in upstream [vLLM](https://docs.vllm.ai/projects/recipes/en/latest/DeepSeek/DeepSeek-OCR.html#installing-vllm).
         | 
| 102 | 
            +
            ```shell
         | 
| 103 | 
            +
            uv venv
         | 
| 104 | 
            +
            source .venv/bin/activate
         | 
| 105 | 
            +
            # Until v0.11.1 release, you need to install vLLM from nightly build
         | 
| 106 | 
            +
            uv pip install -U vllm --pre --extra-index-url https://wheels.vllm.ai/nightly
         | 
| 107 | 
            +
            ```
         | 
| 108 | 
            +
             | 
| 109 | 
            +
            ```python
         | 
| 110 | 
            +
            from vllm import LLM, SamplingParams
         | 
| 111 | 
            +
            from vllm.model_executor.models.deepseek_ocr import NGramPerReqLogitsProcessor
         | 
| 112 | 
            +
            from PIL import Image
         | 
| 113 | 
            +
             | 
| 114 | 
            +
            # Create model instance
         | 
| 115 | 
            +
            llm = LLM(
         | 
| 116 | 
            +
                model="deepseek-ai/DeepSeek-OCR",
         | 
| 117 | 
            +
                enable_prefix_caching=False,
         | 
| 118 | 
            +
                mm_processor_cache_gb=0,
         | 
| 119 | 
            +
                logits_processors=[NGramPerReqLogitsProcessor]
         | 
| 120 | 
            +
            )
         | 
| 121 | 
            +
             | 
| 122 | 
            +
            # Prepare batched input with your image file
         | 
| 123 | 
            +
            image_1 = Image.open("path/to/your/image_1.png").convert("RGB")
         | 
| 124 | 
            +
            image_2 = Image.open("path/to/your/image_2.png").convert("RGB")
         | 
| 125 | 
            +
            prompt = "<image>\nFree OCR."
         | 
| 126 | 
            +
             | 
| 127 | 
            +
            model_input = [
         | 
| 128 | 
            +
                {
         | 
| 129 | 
            +
                    "prompt": prompt,
         | 
| 130 | 
            +
                    "multi_modal_data": {"image": image_1}
         | 
| 131 | 
            +
                },
         | 
| 132 | 
            +
                {
         | 
| 133 | 
            +
                    "prompt": prompt,
         | 
| 134 | 
            +
                    "multi_modal_data": {"image": image_2}
         | 
| 135 | 
            +
                }
         | 
| 136 | 
            +
            ]
         | 
| 137 | 
            +
             | 
| 138 | 
            +
            sampling_param = SamplingParams(
         | 
| 139 | 
            +
                        temperature=0.0,
         | 
| 140 | 
            +
                        max_tokens=8192,
         | 
| 141 | 
            +
                        # ngram logit processor args
         | 
| 142 | 
            +
                        extra_args=dict(
         | 
| 143 | 
            +
                            ngram_size=30,
         | 
| 144 | 
            +
                            window_size=90,
         | 
| 145 | 
            +
                            whitelist_token_ids={128821, 128822},  # whitelist: <td>, </td>
         | 
| 146 | 
            +
                        ),
         | 
| 147 | 
            +
                        skip_special_tokens=False,
         | 
| 148 | 
            +
                    )
         | 
| 149 | 
            +
            # Generate output
         | 
| 150 | 
            +
            model_outputs = llm.generate(model_input, sampling_param)
         | 
| 151 | 
            +
             | 
| 152 | 
            +
            # Print output
         | 
| 153 | 
            +
            for output in model_outputs:
         | 
| 154 | 
            +
                print(output.outputs[0].text)
         | 
| 155 | 
            +
            ```
         | 
| 156 |  | 
| 157 |  | 
| 158 | 
             
            ## Visualizations
         | 

