ZhenweiWang commited on
Commit
0ca05b5
·
verified ·
1 Parent(s): f90c47b

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +14 -0
  2. .gitignore +6 -0
  3. License.txt +82 -0
  4. Notice.txt +76 -0
  5. README.md +6 -7
  6. app.py +1817 -0
  7. examples/realistic/Archway_Tunnel/image_0001.jpg +0 -0
  8. examples/realistic/Archway_Tunnel/image_0030.jpg +0 -0
  9. examples/realistic/Bright_Room/image_0001.jpg +0 -0
  10. examples/realistic/Bright_Room/image_0035.jpg +0 -0
  11. examples/realistic/Desk/530554609_3367433673396747_2161028887770608277_n.jpg +0 -0
  12. examples/realistic/Desk/532328457_1311198870420578_2167456836351167380_n.jpg +3 -0
  13. examples/realistic/Dining_Table/image_0001.jpg +0 -0
  14. examples/realistic/Dining_Table/image_0008.jpg +0 -0
  15. examples/realistic/Dining_Table/image_0012.jpg +0 -0
  16. examples/realistic/Dining_Table/image_0016.jpg +0 -0
  17. examples/realistic/Dino/528883410_1456464302336597_4114529568612559572_n.jpg +0 -0
  18. examples/realistic/Dino/530182709_1122456693282934_3373468492106282632_n.jpg +0 -0
  19. examples/realistic/Dino/532847807_1055021109949229_8315548832183031452_n.jpg +0 -0
  20. examples/realistic/Festival/image_0001.jpg +0 -0
  21. examples/realistic/Festival/image_0023.jpg +0 -0
  22. examples/realistic/Festival/image_0046.jpg +0 -0
  23. examples/realistic/Flower/image_0001.jpg +0 -0
  24. examples/realistic/Great_Wall/great_wall_000000.jpg +0 -0
  25. examples/realistic/Great_Wall/great_wall_000001.jpg +0 -0
  26. examples/realistic/Great_Wall/great_wall_000002.jpg +0 -0
  27. examples/realistic/Great_Wall/great_wall_000003.jpg +0 -0
  28. examples/realistic/Great_Wall/great_wall_000004.jpg +0 -0
  29. examples/realistic/Great_Wall/great_wall_000005.jpg +0 -0
  30. examples/realistic/Great_Wall/great_wall_000006.jpg +0 -0
  31. examples/realistic/Great_Wall/great_wall_000007.jpg +0 -0
  32. examples/realistic/Great_Wall/great_wall_000008.jpg +0 -0
  33. examples/realistic/Great_Wall/great_wall_000009.jpg +0 -0
  34. examples/realistic/Great_Wall/great_wall_000010.jpg +0 -0
  35. examples/realistic/Great_Wall/great_wall_000011.jpg +0 -0
  36. examples/realistic/Hall/image_0001.jpg +0 -0
  37. examples/realistic/Hall/image_0027.jpg +0 -0
  38. examples/realistic/Ireland_Landscape/image_0001.jpg +0 -0
  39. examples/realistic/Ireland_Landscape/image_0007.jpg +0 -0
  40. examples/realistic/Ireland_Landscape/image_0010.jpg +0 -0
  41. examples/realistic/Ireland_Landscape/image_0017.jpg +0 -0
  42. examples/realistic/Ireland_Landscape/image_0022.jpg +0 -0
  43. examples/realistic/Ireland_Landscape/image_0026.jpg +0 -0
  44. examples/realistic/Lego_Kitchen/00.jpg +0 -0
  45. examples/realistic/Lego_Kitchen/01.jpg +0 -0
  46. examples/realistic/Lego_Kitchen/02.jpg +0 -0
  47. examples/realistic/Lego_Kitchen/03.jpg +0 -0
  48. examples/realistic/Lego_Kitchen/04.jpg +0 -0
  49. examples/realistic/Lego_Kitchen/05.jpg +0 -0
  50. examples/realistic/Lego_Kitchen/06.jpg +0 -0
.gitattributes CHANGED
@@ -33,3 +33,17 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ examples/realistic/Desk/532328457_1311198870420578_2167456836351167380_n.jpg filter=lfs diff=lfs merge=lfs -text
37
+ examples/realistic/Office/Office.jpg filter=lfs diff=lfs merge=lfs -text
38
+ examples/realistic/Room_Cat/no_overlap_2.jpg filter=lfs diff=lfs merge=lfs -text
39
+ examples/realistic/Room_Cat/no_overlap_3.jpg filter=lfs diff=lfs merge=lfs -text
40
+ examples/realistic/Room_Cat/no_overlap_4.jpg filter=lfs diff=lfs merge=lfs -text
41
+ examples/realistic/Room_Cat/no_overlap_5.jpg filter=lfs diff=lfs merge=lfs -text
42
+ examples/realistic/Room_Cat/no_overlap_6.jpg filter=lfs diff=lfs merge=lfs -text
43
+ examples/realistic/Room_Cat/no_overlap_7.jpg filter=lfs diff=lfs merge=lfs -text
44
+ examples/realistic/Room_Cat/no_overlap_8.jpg filter=lfs diff=lfs merge=lfs -text
45
+ examples/realistic/Sisters_Statue/481869432_646849634388788_2162202232236218000_n.jpg filter=lfs diff=lfs merge=lfs -text
46
+ examples/realistic/Sisters_Statue/481943293_641636221777392_2955401254290735956_n.jpg filter=lfs diff=lfs merge=lfs -text
47
+ examples/stylistic/Cat_Girl/Cat_Girl.jpg filter=lfs diff=lfs merge=lfs -text
48
+ examples/stylistic/Oil_Painting/oil.jpg filter=lfs diff=lfs merge=lfs -text
49
+ examples/stylistic/Panda_Wild_West/panda_orange_cat_wildwest.jpeg filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ .gradio/
5
+ inference_output/
6
+ submodules/gsplat/examples/pycolmap/
License.txt ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TENCENT HUNYUANWORLD-MIRROR COMMUNITY LICENSE AGREEMENT
2
+ Tencent HunyuanWorld-Mirror Release Date: October 22, 2025
3
+ THIS LICENSE AGREEMENT DOES NOT APPLY IN THE EUROPEAN UNION, UNITED KINGDOM AND SOUTH KOREA AND IS EXPRESSLY LIMITED TO THE TERRITORY, AS DEFINED BELOW.
4
+ By clicking to agree or by using, reproducing, modifying, distributing, performing or displaying any portion or element of the Tencent HunyuanWorld-Mirror Works, including via any Hosted Service, You will be deemed to have recognized and accepted the content of this Agreement, which is effective immediately.
5
+ 1. DEFINITIONS.
6
+ a. “Acceptable Use Policy” shall mean the policy made available by Tencent as set forth in the Exhibit A.
7
+ b. “Agreement” shall mean the terms and conditions for use, reproduction, distribution, modification, performance and displaying of Tencent HunyuanWorld-Mirror Works or any portion or element thereof set forth herein.
8
+ c. “Documentation” shall mean the specifications, manuals and documentation for Tencent HunyuanWorld-Mirror made publicly available by Tencent.
9
+ d. “Hosted Service” shall mean a hosted service offered via an application programming interface (API), web access, or any other electronic or remote means.
10
+ e. “Licensee,” “You” or “Your” shall mean a natural person or legal entity exercising the rights granted by this Agreement and/or using the Tencent HunyuanWorld-Mirror Works for any purpose and in any field of use.
11
+ f. “Materials” shall mean, collectively, Tencent’s proprietary Tencent HunyuanWorld-Mirror and Documentation (and any portion thereof) as made available by Tencent under this Agreement.
12
+ g. “Model Derivatives” shall mean all: (i) modifications to Tencent HunyuanWorld-Mirror or any Model Derivative of Tencent HunyuanWorld-Mirror; (ii) works based on Tencent HunyuanWorld-Mirror or any Model Derivative of Tencent HunyuanWorld-Mirror; or (iii) any other machine learning model which is created by transfer of patterns of the weights, parameters, operations, or Output of Tencent HunyuanWorld-Mirror or any Model Derivative of Tencent HunyuanWorld-Mirror, to that model in order to cause that model to perform similarly to Tencent HunyuanWorld-Mirror or a Model Derivative of Tencent HunyuanWorld-Mirror, including distillation methods, methods that use intermediate data representations, or methods based on the generation of synthetic data Outputs by Tencent HunyuanWorld-Mirror or a Model Derivative of Tencent HunyuanWorld-Mirror for training that model. For clarity, Outputs by themselves are not deemed Model Derivatives.
13
+ h. “Output” shall mean the information and/or content output of Tencent HunyuanWorld-Mirror or a Model Derivative that results from operating or otherwise using Tencent HunyuanWorld-Mirror or a Model Derivative, including via a Hosted Service.
14
+ i. “Tencent,” “We” or “Us” shall mean the applicable entity or entities in the Tencent corporate family that own(s) intellectual property or other rights embodied in or utilized by the Materials..
15
+ j. “Tencent HunyuanWorld-Mirror” shall mean the 3D generation models and their software and algorithms, including trained model weights, parameters (including optimizer states), machine-learning model code, inference-enabling code, training-enabling code, fine-tuning enabling code and other elements of the foregoing made publicly available by Us at [https://github.com/Tencent-Hunyuan/HunyuanWorld-Mirror].
16
+ k. “Tencent HunyuanWorld-Mirror Works” shall mean: (i) the Materials; (ii) Model Derivatives; and (iii) all derivative works thereof.
17
+ l. “Territory” shall mean the worldwide territory, excluding the territory of the European Union, United Kingdom and South Korea.
18
+ m. “Third Party” or “Third Parties” shall mean individuals or legal entities that are not under common control with Us or You.
19
+ n. “including” shall mean including but not limited to.
20
+ 2. GRANT OF RIGHTS.
21
+ We grant You, for the Territory only, a non-exclusive, non-transferable and royalty-free limited license under Tencent’s intellectual property or other rights owned by Us embodied in or utilized by the Materials to use, reproduce, distribute, create derivative works of (including Model Derivatives), and make modifications to the Materials, only in accordance with the terms of this Agreement and the Acceptable Use Policy, and You must not violate (or encourage or permit anyone else to violate) any term of this Agreement or the Acceptable Use Policy.
22
+ 3. DISTRIBUTION.
23
+ You may, subject to Your compliance with this Agreement, distribute or make available to Third Parties the Tencent HunyuanWorld-Mirror Works, exclusively in the Territory, provided that You meet all of the following conditions:
24
+ a. You must provide all such Third Party recipients of the Tencent HunyuanWorld-Mirror Works or products or services using them a copy of this Agreement;
25
+ b. You must cause any modified files to carry prominent notices stating that You changed the files;
26
+ c. You are encouraged to: (i) publish at least one technology introduction blogpost or one public statement expressing Your experience of using the Tencent HunyuanWorld-Mirror Works; and (ii) mark the products or services developed by using the Tencent HunyuanWorld-Mirror Works to indicate that the product/service is “Powered by Tencent Hunyuan”;
27
+ d. All distributions to Third Parties (other than through a Hosted Service) must be accompanied by a “Notice” text file that contains the following notice: “Tencent HunyuanWorld-Mirror is licensed under the Tencent HunyuanWorld-Mirror Community License Agreement, Copyright © 2025 Tencent. All Rights Reserved. The trademark rights of “Tencent Hunyuan” are owned by Tencent or its affiliate”;
28
+ e. In the event that You use, integrate, implement, or otherwise deploy the Tencent Hunyuan Works, in whole or in part, to provide, enable, or support any service, product, or functionality to third parties, You shall clearly, accurately, and prominently disclose to all end users the full legal name and entity of the actual provider of such service, product, or functionality. You shall expressly and conspicuously state that Tencent is not affiliated with, associated with, sponsoring, or endorsing any such service, product, or functionality. You shall not use or display any name, logo, trademark, trade name, or other indicia of Tencent in any manner that could be construed as, or be likely to create, confusion, deception, or a false impression regarding any relationship, affiliation, sponsorship, or endorsement by Tencent.
29
+ You may add Your own copyright statement to Your modifications and, except as set forth in this Section and in Section 5, may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Model Derivatives as a whole, provided Your use, reproduction, modification, distribution, performance and display of the work otherwise complies with the terms and conditions of this Agreement (including as regards the Territory). If You receive Tencent HunyuanWorld-Mirror Works from a Licensee as part of an integrated end user product, then this Section 3 of this Agreement will not apply to You.
30
+ 4. ADDITIONAL COMMERCIAL TERMS.
31
+ If, on the Tencent HunyuanWorld-Mirror version release date, the monthly active users of all products or services made available by or for Licensee is greater than 1 million monthly active users in the preceding calendar month, You must request a license from Tencent, which Tencent may grant to You in its sole discretion, and You are not authorized to exercise any of the rights under this Agreement unless or until Tencent otherwise expressly grants You such rights.
32
+ Subject to Tencent's written approval, you may request a license for the use of Tencent HunyuanWorld-Mirror by submitting the following information to [email protected]:
33
+ a. Your company’s name and associated business sector that plans to use Tencent HunyuanWorld-Mirror.
34
+ b. Your intended use case and the purpose of using Tencent HunyuanWorld-Mirror.
35
+ c. Your plans to modify Tencent HunyuanWorld-Mirror or create Model Derivatives.
36
+ 5. RULES OF USE.
37
+ a. Your use of the Tencent HunyuanWorld-Mirror Works must comply with applicable laws and regulations (including trade compliance laws and regulations) and adhere to the Acceptable Use Policy for the Tencent HunyuanWorld-Mirror Works, which is hereby incorporated by reference into this Agreement. You must include the use restrictions referenced in these Sections 5(a) and 5(b) as an enforceable provision in any agreement (e.g., license agreement, terms of use, etc.) governing the use and/or distribution of Tencent HunyuanWorld-Mirror Works and You must provide notice to subsequent users to whom You distribute that Tencent HunyuanWorld-Mirror Works are subject to the use restrictions in these Sections 5(a) and 5(b).
38
+ b. You must not use the Tencent HunyuanWorld-Mirror Works or any Output or results of the Tencent HunyuanWorld-Mirror Works to improve any other AI model (other than Tencent HunyuanWorld-Mirror or Model Derivatives thereof).
39
+ c. You must not use, reproduce, modify, distribute, or display the Tencent HunyuanWorld-Mirror Works, Output or results of the Tencent HunyuanWorld-Mirror Works outside the Territory. Any such use outside the Territory is unlicensed and unauthorized under this Agreement.
40
+ 6. INTELLECTUAL PROPERTY.
41
+ a. Subject to Tencent’s ownership of Tencent HunyuanWorld-Mirror Works made by or for Tencent and intellectual property rights therein, conditioned upon Your compliance with the terms and conditions of this Agreement, as between You and Tencent, You will be the owner of any derivative works and modifications of the Materials and any Model Derivatives that are made by or for You.
42
+ b. No trademark licenses are granted under this Agreement, and in connection with the Tencent HunyuanWorld-Mirror Works, Licensee may not use any name or mark owned by or associated with Tencent or any of its affiliates, except as required for reasonable and customary use in describing and distributing the Tencent HunyuanWorld-Mirror Works. Tencent hereby grants You a license to use “Tencent Hunyuan” (the “Mark”) in the Territory solely as required to comply with the provisions of Section 3(c), provided that You comply with any applicable laws related to trademark protection. All goodwill arising out of Your use of the Mark will inure to the benefit of Tencent.
43
+ c. If You commence a lawsuit or other proceedings (including a cross-claim or counterclaim in a lawsuit) against Us or any person or entity alleging that the Materials or any Output, or any portion of any of the foregoing, infringe any intellectual property or other right owned or licensable by You, then all licenses granted to You under this Agreement shall terminate as of the date such lawsuit or other proceeding is filed. You will defend, indemnify and hold harmless Us from and against any claim by any Third Party arising out of or related to Your or the Third Party’s use or distribution of the Tencent HunyuanWorld-Mirror Works.
44
+ d. Tencent claims no rights in Outputs You generate. You and Your users are solely responsible for Outputs and their subsequent uses.
45
+ 7. DISCLAIMERS OF WARRANTY AND LIMITATIONS OF LIABILITY.
46
+ a. We are not obligated to support, update, provide training for, or develop any further version of the Tencent HunyuanWorld-Mirror Works or to grant any license thereto.
47
+ b. UNLESS AND ONLY TO THE EXTENT REQUIRED BY APPLICABLE LAW, THE TENCENT HUNYUANWORLD-MIRROR WORKS AND ANY OUTPUT AND RESULTS THEREFROM ARE PROVIDED “AS IS” WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES OF ANY KIND INCLUDING ANY WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, COURSE OF DEALING, USAGE OF TRADE, OR FITNESS FOR A PARTICULAR PURPOSE. YOU ARE SOLELY RESPONSIBLE FOR DETERMINING THE APPROPRIATENESS OF USING, REPRODUCING, MODIFYING, PERFORMING, DISPLAYING OR DISTRIBUTING ANY OF THE TENCENT HUNYUANWORLD-MIRROR WORKS OR OUTPUTS AND ASSUME ANY AND ALL RISKS ASSOCIATED WITH YOUR OR A THIRD PARTY’S USE OR DISTRIBUTION OF ANY OF THE TENCENT HUNYUANWORLD-MIRROR WORKS OR OUTPUTS AND YOUR EXERCISE OF RIGHTS AND PERMISSIONS UNDER THIS AGREEMENT.
48
+ c. TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT SHALL TENCENT OR ITS AFFILIATES BE LIABLE UNDER ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, TORT, NEGLIGENCE, PRODUCTS LIABILITY, OR OTHERWISE, FOR ANY DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, EXEMPLARY, CONSEQUENTIAL OR PUNITIVE DAMAGES, OR LOST PROFITS OF ANY KIND ARISING FROM THIS AGREEMENT OR RELATED TO ANY OF THE TENCENT HUNYUANWORLD-MIRROR WORKS OR OUTPUTS, EVEN IF TENCENT OR ITS AFFILIATES HAVE BEEN ADVISED OF THE POSSIBILITY OF ANY OF THE FOREGOING.
49
+ 8. SURVIVAL AND TERMINATION.
50
+ a. The term of this Agreement shall commence upon Your acceptance of this Agreement or access to the Materials and will continue in full force and effect until terminated in accordance with the terms and conditions herein.
51
+ b. We may terminate this Agreement if You breach any of the terms or conditions of this Agreement. Upon termination of this Agreement, You must promptly delete and cease use of the Tencent HunyuanWorld-Mirror Works. Sections 6(a), 6(c), 7 and 9 shall survive the termination of this Agreement.
52
+ 9. GOVERNING LAW AND JURISDICTION.
53
+ a. This Agreement and any dispute arising out of or relating to it will be governed by the laws of the Hong Kong Special Administrative Region of the People’s Republic of China, without regard to conflict of law principles, and the UN Convention on Contracts for the International Sale of Goods does not apply to this Agreement.
54
+ b. Exclusive jurisdiction and venue for any dispute arising out of or relating to this Agreement will be a court of competent jurisdiction in the Hong Kong Special Administrative Region of the People’s Republic of China, and Tencent and Licensee consent to the exclusive jurisdiction of such court with respect to any such dispute.
55
+
56
+ EXHIBIT A
57
+ ACCEPTABLE USE POLICY
58
+
59
+ Tencent reserves the right to update this Acceptable Use Policy from time to time.
60
+ Last modified: November 5, 2024
61
+
62
+ Tencent endeavors to promote safe and fair use of its tools and features, including Tencent HunyuanWorld-Mirror. You agree not to use Tencent HunyuanWorld-Mirror or Model Derivatives:
63
+ 1. Outside the Territory;
64
+ 2. In any way that violates any applicable national, federal, state, local, international or any other law or regulation;
65
+ 3. To harm Yourself or others;
66
+ 4. To repurpose or distribute output from Tencent HunyuanWorld-Mirror or any Model Derivatives to harm Yourself or others;
67
+ 5. To override or circumvent the safety guardrails and safeguards We have put in place;
68
+ 6. For the purpose of exploiting, harming or attempting to exploit or harm minors in any way;
69
+ 7. To generate or disseminate verifiably false information and/or content with the purpose of harming others or influencing elections;
70
+ 8. To generate or facilitate false online engagement, including fake reviews and other means of fake online engagement;
71
+ 9. To intentionally defame, disparage or otherwise harass others;
72
+ 10. To generate and/or disseminate malware (including ransomware) or any other content to be used for the purpose of harming electronic systems;
73
+ 11. To generate or disseminate personal identifiable information with the purpose of harming others;
74
+ 12. To generate or disseminate information (including images, code, posts, articles), and place the information in any public context (including –through the use of bot generated tweets), without expressly and conspicuously identifying that the information and/or content is machine generated;
75
+ 13. To impersonate another individual without consent, authorization, or legal right;
76
+ 14. To make high-stakes automated decisions in domains that affect an individual’s safety, rights or wellbeing (e.g., law enforcement, migration, medicine/health, management of critical infrastructure, safety components of products, essential services, credit, employment, housing, education, social scoring, or insurance);
77
+ 15. In a manner that violates or disrespects the social ethics and moral standards of other countries or regions;
78
+ 16. To perform, facilitate, threaten, incite, plan, promote or encourage violent extremism or terrorism;
79
+ 17. For any use intended to discriminate against or harm individuals or groups based on protected characteristics or categories, online or offline social behavior or known or predicted personal or personality characteristics;
80
+ 18. To intentionally exploit any of the vulnerabilities of a specific group of persons based on their age, social, physical or mental characteristics, in order to materially distort the behavior of a person pertaining to that group in a manner that causes or is likely to cause that person or another person physical or psychological harm;
81
+ 19. For military purposes;
82
+ 20. To engage in the unauthorized or unlicensed practice of any profession including, but not limited to, financial, legal, medical/health, or other professional practices.
Notice.txt ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Usage and Legal Notices:
2
+
3
+ Tencent is pleased to support the open source community by making Tencent HunyuanWorld-Mirror .
4
+
5
+ Copyright (C) 2025 Tencent. All rights reserved. The below software and/or models in this distribution may have been modified by Tencent ("Tencent Modifications"). All Tencent Modifications are Copyright (C) Tencent.
6
+
7
+ Tencent HunyuanWorld-Mirror is licensed under the TENCENT HUNYUANWORLD-MIRROR COMMUNITY LICENSE AGREEMENT except for the third-party components listed below, which is licensed under different terms. TTencent HunyuanWorld-Mirror does not impose any additional limitations beyond what is outlined in the respective licenses of these third-party components. Users must comply with all terms and conditions of original licenses of these third-party components and must ensure that the usage of the third party components adheres to all relevant laws and regulations.
8
+
9
+ For avoidance of doubts, Tencent HunyuanWorld-Mirror means inference-enabling code, parameters, and/or weights of this Model, which are made publicly available by Tencent in accordance with TENCENT HUNYUANWORLD-MIRROR COMMUNITY LICENSE AGREEMENT.
10
+
11
+
12
+ Other dependencies and licenses:
13
+
14
+
15
+
16
+ Open Source Software Licensed under the Apache-2.0:
17
+ --------------------------------------------------------------------
18
+ 1. gsplat
19
+
20
+ Copyright 2025 Nerfstudio Team.
21
+
22
+ You can access this component through: https://github.com/nerfstudio-project/gsplat
23
+
24
+ Terms of the Apache-2.0:
25
+ --------------------------------------------------------------------
26
+ Apache License
27
+ Version 2.0, January 2004
28
+ http://www.apache.org/licenses/
29
+
30
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
31
+
32
+ 1. Definitions.
33
+
34
+ "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.
35
+
36
+ "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.
37
+
38
+ "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
39
+
40
+ "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License.
41
+
42
+ "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.
43
+
44
+ "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.
45
+
46
+ "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).
47
+
48
+ "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.
49
+
50
+ "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution."
51
+
52
+ "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.
53
+
54
+ 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.
55
+
56
+ 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.
57
+
58
+ 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:
59
+
60
+ You must give any other recipients of the Work or Derivative Works a copy of this License; and
61
+ You must cause any modified files to carry prominent notices stating that You changed the files; and
62
+ You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and
63
+ If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License.
64
+ You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.
65
+
66
+ 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions.
67
+
68
+ 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file.
69
+
70
+ 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.
71
+
72
+ 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.
73
+
74
+ 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.
75
+
76
+ END OF TERMS AND CONDITIONS
README.md CHANGED
@@ -1,12 +1,11 @@
1
  ---
2
- title: HunyuanWorld Mirror
3
- emoji: 🐠
4
- colorFrom: indigo
5
- colorTo: pink
6
  sdk: gradio
7
  sdk_version: 5.49.1
8
  app_file: app.py
9
  pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: HunyuanWorld-Mirror
3
+ emoji: 🌍
4
+ colorFrom: purple
5
+ colorTo: red
6
  sdk: gradio
7
  sdk_version: 5.49.1
8
  app_file: app.py
9
  pinned: false
10
+ short_description: Universal 3D World Reconstruction with Any Prior Prompting
11
+ ---
 
app.py ADDED
@@ -0,0 +1,1817 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gc
2
+ import os
3
+ import shutil
4
+ import time
5
+ from datetime import datetime
6
+ import io
7
+ import sys
8
+
9
+ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
10
+
11
+ import cv2
12
+ import gradio as gr
13
+ import numpy as np
14
+ import spaces
15
+ import torch
16
+ from PIL import Image
17
+ from pillow_heif import register_heif_opener
18
+ register_heif_opener()
19
+
20
+ from src.utils.inference_utils import load_and_preprocess_images
21
+ from src.utils.geometry import (
22
+ depth_edge,
23
+ normals_edge
24
+ )
25
+ from src.utils.visual_util import (
26
+ convert_predictions_to_glb_scene,
27
+ segment_sky,
28
+ download_file_from_url
29
+ )
30
+ from src.utils.save_utils import save_camera_params, save_gs_ply, process_ply_to_splat, convert_gs_to_ply
31
+ from src.utils.render_utils import render_interpolated_video
32
+ import onnxruntime
33
+
34
+
35
+ # Initialize model - this will be done on GPU when needed
36
+ model = None
37
+
38
+ # Global variable to store current terminal output
39
+ current_terminal_output = ""
40
+
41
+ # Helper class to capture terminal output
42
+ class TeeOutput:
43
+ """Capture output while still printing to console"""
44
+ def __init__(self, max_chars=10000):
45
+ self.terminal = sys.stdout
46
+ self.log = io.StringIO()
47
+ self.max_chars = max_chars # 限制最大字符数
48
+
49
+ def write(self, message):
50
+ global current_terminal_output
51
+ self.terminal.write(message)
52
+ self.log.write(message)
53
+
54
+ # 获取当前内容并限制长度
55
+ content = self.log.getvalue()
56
+ if len(content) > self.max_chars:
57
+ # 只保留最后 max_chars 个字符
58
+ content = "...(earlier output truncated)...\n" + content[-self.max_chars:]
59
+ self.log = io.StringIO()
60
+ self.log.write(content)
61
+
62
+ current_terminal_output = self.log.getvalue()
63
+
64
+ def flush(self):
65
+ self.terminal.flush()
66
+
67
+ def getvalue(self):
68
+ return self.log.getvalue()
69
+
70
+ def clear(self):
71
+ global current_terminal_output
72
+ self.log = io.StringIO()
73
+ current_terminal_output = ""
74
+
75
+ # -------------------------------------------------------------------------
76
+ # Model inference
77
+ # -------------------------------------------------------------------------
78
+ @spaces.GPU()
79
+ def run_model(
80
+ target_dir,
81
+ confidence_percentile: float = 10,
82
+ edge_normal_threshold: float = 5.0,
83
+ edge_depth_threshold: float = 0.03,
84
+ apply_confidence_mask: bool = True,
85
+ apply_edge_mask: bool = True,
86
+ ):
87
+ """
88
+ Run the WorldMirror model on images in the 'target_dir/images' folder and return predictions.
89
+ """
90
+ global model
91
+ import torch # Ensure torch is available in function scope
92
+
93
+ from src.models.models.worldmirror import WorldMirror
94
+ from src.models.utils.geometry import depth_to_world_coords_points
95
+
96
+ print(f"Processing images from {target_dir}")
97
+
98
+ # Device check
99
+ device = "cuda" if torch.cuda.is_available() else "cpu"
100
+ device = torch.device(device)
101
+
102
+ # Initialize model if not already done
103
+ if model is None:
104
+ model = WorldMirror.from_pretrained("tencent/HunyuanWorld-Mirror").to(device)
105
+ else:
106
+ model.to(device)
107
+
108
+ model.eval()
109
+
110
+ # Load images using WorldMirror's load_images function
111
+ print("Loading images...")
112
+ image_folder_path = os.path.join(target_dir, "images")
113
+ image_file_paths = [os.path.join(image_folder_path, path) for path in os.listdir(image_folder_path)]
114
+ img = load_and_preprocess_images(image_file_paths).to(device)
115
+
116
+ print(f"Loaded {img.shape[1]} images")
117
+ if img.shape[1] == 0:
118
+ raise ValueError("No images found. Check your upload.")
119
+
120
+ # Run model inference
121
+ print("Running inference...")
122
+ inputs = {}
123
+ inputs['img'] = img
124
+ use_amp = torch.cuda.is_available() and torch.cuda.is_bf16_supported()
125
+ if use_amp:
126
+ amp_dtype = torch.bfloat16
127
+ else:
128
+ amp_dtype = torch.float32
129
+ with torch.amp.autocast('cuda', enabled=bool(use_amp), dtype=amp_dtype):
130
+ predictions = model(inputs)
131
+
132
+ # img
133
+ imgs = inputs["img"].permute(0, 1, 3, 4, 2)
134
+ imgs = imgs[0].detach().cpu().numpy() # S H W 3
135
+
136
+ # depth output
137
+ depth_preds = predictions["depth"]
138
+ depth_conf = predictions["depth_conf"]
139
+ depth_preds = depth_preds[0].detach().cpu().numpy() # S H W 1
140
+ depth_conf = depth_conf[0].detach().cpu().numpy() # S H W
141
+
142
+ # normal output
143
+ normal_preds = predictions["normals"] # S H W 3
144
+ normal_preds = normal_preds[0].detach().cpu().numpy() # S H W 3
145
+
146
+ # camera parameters
147
+ camera_poses = predictions["camera_poses"][0].detach().cpu().numpy() # [S,4,4]
148
+ camera_intrs = predictions["camera_intrs"][0].detach().cpu().numpy() # [S,3,3]
149
+
150
+ # points output
151
+ pts3d_preds = depth_to_world_coords_points(predictions["depth"][0, ..., 0], predictions["camera_poses"][0], predictions["camera_intrs"][0])[0]
152
+ pts3d_preds = pts3d_preds.detach().cpu().numpy() # S H W 3
153
+ pts3d_conf = depth_conf # S H W
154
+
155
+ # sky mask segmentation
156
+ if not os.path.exists("skyseg.onnx"):
157
+ print("Downloading skyseg.onnx...")
158
+ download_file_from_url(
159
+ "https://huggingface.co/JianyuanWang/skyseg/resolve/main/skyseg.onnx", "skyseg.onnx"
160
+ )
161
+ skyseg_session = onnxruntime.InferenceSession("skyseg.onnx")
162
+ sky_mask_list = []
163
+ for i, img_path in enumerate([os.path.join(image_folder_path, path) for path in os.listdir(image_folder_path)]):
164
+ sky_mask = segment_sky(img_path, skyseg_session)
165
+ # Resize mask to match H×W if needed
166
+ if sky_mask.shape[0] != imgs.shape[1] or sky_mask.shape[1] != imgs.shape[2]:
167
+ sky_mask = cv2.resize(sky_mask, (imgs.shape[2], imgs.shape[1]))
168
+ sky_mask_list.append(sky_mask)
169
+ sky_mask = np.stack(sky_mask_list, axis=0) # [S, H, W]
170
+ sky_mask = sky_mask>0
171
+
172
+ # mask computation
173
+ final_mask_list = []
174
+ for i in range(inputs["img"].shape[1]):
175
+ final_mask = None
176
+ if apply_confidence_mask:
177
+ # compute confidence mask based on the pointmap confidence
178
+ confidences = pts3d_conf[i, :, :] # [H, W]
179
+ percentile_threshold = np.quantile(confidences, confidence_percentile / 100.0)
180
+ conf_mask = confidences >= percentile_threshold
181
+ if final_mask is None:
182
+ final_mask = conf_mask
183
+ else:
184
+ final_mask = final_mask & conf_mask
185
+ if apply_edge_mask:
186
+ # compute edge mask based on the normalmap
187
+ normal_pred = normal_preds[i] # [H, W, 3]
188
+ normal_edges = normals_edge(
189
+ normal_pred, tol=edge_normal_threshold, mask=final_mask
190
+ )
191
+ # compute depth mask based on the depthmap
192
+ depth_pred = depth_preds[i, :, :, 0] # [H, W]
193
+ depth_edges = depth_edge(
194
+ depth_pred, rtol=edge_depth_threshold, mask=final_mask
195
+ )
196
+ edge_mask = ~(depth_edges & normal_edges)
197
+ if final_mask is None:
198
+ final_mask = edge_mask
199
+ else:
200
+ final_mask = final_mask & edge_mask
201
+ final_mask_list.append(final_mask)
202
+
203
+ if final_mask_list[0] is not None:
204
+ final_mask = np.stack(final_mask_list, axis=0) # [S, H, W]
205
+ else:
206
+ final_mask = np.ones(pts3d_conf.shape[:3], dtype=bool) # [S, H, W]
207
+
208
+ # gaussian splatting output
209
+ if "splats" in predictions:
210
+ splats_dict = {}
211
+ splats_dict['means'] = predictions["splats"]["means"]
212
+ splats_dict['scales'] = predictions["splats"]["scales"]
213
+ splats_dict['quats'] = predictions["splats"]["quats"]
214
+ splats_dict['opacities'] = predictions["splats"]["opacities"]
215
+ if "sh" in predictions["splats"]:
216
+ splats_dict['sh'] = predictions["splats"]["sh"]
217
+ if "colors" in predictions["splats"]:
218
+ splats_dict['colors'] = predictions["splats"]["colors"]
219
+
220
+ # output lists
221
+ outputs = {}
222
+ outputs['images'] = imgs
223
+ outputs['world_points'] = pts3d_preds
224
+ outputs['depth'] = depth_preds
225
+ outputs['normal'] = normal_preds
226
+ outputs['final_mask'] = final_mask
227
+ outputs['sky_mask'] = sky_mask
228
+ outputs['camera_poses'] = camera_poses
229
+ outputs['camera_intrs'] = camera_intrs
230
+ if "splats" in predictions:
231
+ outputs['splats'] = splats_dict
232
+
233
+ # Process data for visualization tabs (depth, normal)
234
+ processed_data = prepare_visualization_data(
235
+ outputs, inputs
236
+ )
237
+
238
+ # Clean up
239
+ torch.cuda.empty_cache()
240
+
241
+ return outputs, processed_data
242
+
243
+
244
+ # -------------------------------------------------------------------------
245
+ # Update and navigation function
246
+ # -------------------------------------------------------------------------
247
+ def update_view_info(current_view, total_views, view_type="Depth"):
248
+ """Update view information display"""
249
+ return f"""
250
+ <div style='text-align: center; padding: 10px; background: #f8f8f8; color: #999; border-radius: 8px; margin-bottom: 10px;'>
251
+ <strong>{view_type} View Navigation</strong> |
252
+ Current: View {current_view} / {total_views} views
253
+ </div>
254
+ """
255
+
256
+ def update_view_selectors(processed_data):
257
+ """Update view selector sliders and info displays based on available views"""
258
+ if processed_data is None or len(processed_data) == 0:
259
+ num_views = 1
260
+ else:
261
+ num_views = len(processed_data)
262
+
263
+ # 确保 num_views 至少为 1
264
+ num_views = max(1, num_views)
265
+
266
+ # 更新滑块的最大值和视图信息,使用 gr.update() 而不是创建新组件
267
+ depth_slider_update = gr.update(minimum=1, maximum=num_views, value=1, step=1)
268
+ normal_slider_update = gr.update(minimum=1, maximum=num_views, value=1, step=1)
269
+
270
+ # 更新视图信息显示
271
+ depth_info_update = update_view_info(1, num_views, "Depth")
272
+ normal_info_update = update_view_info(1, num_views, "Normal")
273
+
274
+ return (
275
+ depth_slider_update, # depth_view_slider
276
+ normal_slider_update, # normal_view_slider
277
+ depth_info_update, # depth_view_info
278
+ normal_info_update, # normal_view_info
279
+ )
280
+
281
+ def get_view_data_by_index(processed_data, view_index):
282
+ """Get view data by index, handling bounds"""
283
+ if processed_data is None or len(processed_data) == 0:
284
+ return None
285
+
286
+ view_keys = list(processed_data.keys())
287
+ if view_index < 0 or view_index >= len(view_keys):
288
+ view_index = 0
289
+
290
+ return processed_data[view_keys[view_index]]
291
+
292
+ def update_depth_view(processed_data, view_index):
293
+ """Update depth view for a specific view index"""
294
+ view_data = get_view_data_by_index(processed_data, view_index)
295
+ if view_data is None or view_data["depth"] is None:
296
+ return None
297
+
298
+ return render_depth_visualization(view_data["depth"], mask=view_data.get("mask"))
299
+
300
+ def update_normal_view(processed_data, view_index):
301
+ """Update normal view for a specific view index"""
302
+ view_data = get_view_data_by_index(processed_data, view_index)
303
+ if view_data is None or view_data["normal"] is None:
304
+ return None
305
+
306
+ return render_normal_visualization(view_data["normal"], mask=view_data.get("mask"))
307
+
308
+ def initialize_depth_normal_views(processed_data):
309
+ """Initialize the depth and normal view displays with the first view data"""
310
+ if processed_data is None or len(processed_data) == 0:
311
+ return None, None
312
+
313
+ # Use update functions to ensure confidence filtering is applied from the start
314
+ depth_vis = update_depth_view(processed_data, 0)
315
+ normal_vis = update_normal_view(processed_data, 0)
316
+
317
+ return depth_vis, normal_vis
318
+
319
+
320
+ # -------------------------------------------------------------------------
321
+ # File upload and update preview gallery
322
+ # -------------------------------------------------------------------------
323
+ def process_uploaded_files(files, time_interval=1.0):
324
+ """
325
+ Process uploaded files by extracting video frames or copying images.
326
+
327
+ Args:
328
+ files: List of uploaded file objects (videos or images)
329
+ time_interval: Interval in seconds for video frame extraction
330
+
331
+ Returns:
332
+ tuple: (target_dir, image_paths) where target_dir is the output directory
333
+ and image_paths is a list of processed image file paths
334
+ """
335
+ gc.collect()
336
+ torch.cuda.empty_cache()
337
+
338
+ # Create unique output directory
339
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
340
+ target_dir = f"input_images_{timestamp}"
341
+ images_dir = os.path.join(target_dir, "images")
342
+
343
+ if os.path.exists(target_dir):
344
+ shutil.rmtree(target_dir)
345
+ os.makedirs(images_dir)
346
+
347
+ image_paths = []
348
+
349
+ if files is None:
350
+ return target_dir, image_paths
351
+
352
+ video_exts = [".mp4", ".avi", ".mov", ".mkv", ".wmv", ".flv", ".webm", ".m4v", ".3gp"]
353
+
354
+ for file_data in files:
355
+ # Get file path
356
+ if isinstance(file_data, dict) and "name" in file_data:
357
+ src_path = file_data["name"]
358
+ else:
359
+ src_path = str(file_data)
360
+
361
+ ext = os.path.splitext(src_path)[1].lower()
362
+ base_name = os.path.splitext(os.path.basename(src_path))[0]
363
+
364
+ # Process video: extract frames
365
+ if ext in video_exts:
366
+ cap = cv2.VideoCapture(src_path)
367
+ fps = cap.get(cv2.CAP_PROP_FPS)
368
+ interval = int(fps * time_interval)
369
+
370
+ frame_count = 0
371
+ saved_count = 0
372
+ while True:
373
+ ret, frame = cap.read()
374
+ if not ret:
375
+ break
376
+ frame_count += 1
377
+ if frame_count % interval == 0:
378
+ dst_path = os.path.join(images_dir, f"{base_name}_{saved_count:06}.png")
379
+ cv2.imwrite(dst_path, frame)
380
+ image_paths.append(dst_path)
381
+ saved_count += 1
382
+ cap.release()
383
+ print(f"Extracted {saved_count} frames from: {os.path.basename(src_path)}")
384
+
385
+ # Process HEIC/HEIF: convert to JPEG
386
+ elif ext in [".heic", ".heif"]:
387
+ try:
388
+ with Image.open(src_path) as img:
389
+ if img.mode not in ("RGB", "L"):
390
+ img = img.convert("RGB")
391
+ dst_path = os.path.join(images_dir, f"{base_name}.jpg")
392
+ img.save(dst_path, "JPEG", quality=95)
393
+ image_paths.append(dst_path)
394
+ print(f"Converted HEIC: {os.path.basename(src_path)} -> {os.path.basename(dst_path)}")
395
+ except Exception as e:
396
+ print(f"HEIC conversion failed for {src_path}: {e}")
397
+ dst_path = os.path.join(images_dir, os.path.basename(src_path))
398
+ shutil.copy(src_path, dst_path)
399
+ image_paths.append(dst_path)
400
+
401
+ # Process regular images: copy directly
402
+ else:
403
+ dst_path = os.path.join(images_dir, os.path.basename(src_path))
404
+ shutil.copy(src_path, dst_path)
405
+ image_paths.append(dst_path)
406
+
407
+ image_paths = sorted(image_paths)
408
+
409
+ print(f"Processed files to {images_dir}")
410
+ return target_dir, image_paths
411
+
412
+ # Handle file upload and update preview gallery
413
+ def update_gallery_on_upload(input_video, input_images, time_interval=1.0):
414
+ """
415
+ Process uploaded files immediately when user uploads or changes files,
416
+ and display them in the gallery. Returns (target_dir, image_paths).
417
+ If nothing is uploaded, returns None and empty list.
418
+ """
419
+ if not input_video and not input_images:
420
+ return None, None, None, None
421
+ target_dir, image_paths = process_uploaded_files(input_video, input_images, time_interval)
422
+ return (
423
+ None,
424
+ target_dir,
425
+ image_paths,
426
+ "Upload complete. Click 'Reconstruct' to begin 3D processing.",
427
+ )
428
+
429
+ # -------------------------------------------------------------------------
430
+ # Init function
431
+ # -------------------------------------------------------------------------
432
+ def prepare_visualization_data(
433
+ model_outputs, input_views
434
+ ):
435
+ """Transform model predictions into structured format for display components"""
436
+ visualization_dict = {}
437
+
438
+ # Iterate through each input view
439
+ nviews = input_views["img"].shape[1]
440
+ for idx in range(nviews):
441
+ # Extract RGB image data
442
+ rgb_image = input_views["img"][0, idx].detach().cpu().numpy()
443
+
444
+ # Retrieve 3D coordinate predictions
445
+ world_coordinates = model_outputs["world_points"][idx]
446
+
447
+ # Build view-specific data structure
448
+ current_view_info = {
449
+ "image": rgb_image,
450
+ "points3d": world_coordinates,
451
+ "depth": None,
452
+ "normal": None,
453
+ "mask": None,
454
+ }
455
+
456
+ # Apply final segmentation mask from model
457
+ segmentation_mask = model_outputs["final_mask"][idx].copy()
458
+
459
+ current_view_info["mask"] = segmentation_mask
460
+ current_view_info["depth"] = model_outputs["depth"][idx].squeeze()
461
+
462
+ surface_normals = model_outputs["normal"][idx]
463
+ current_view_info["normal"] = surface_normals
464
+
465
+ visualization_dict[idx] = current_view_info
466
+
467
+ return visualization_dict
468
+
469
+ @spaces.GPU()
470
+ def gradio_demo(
471
+ target_dir,
472
+ frame_selector="All",
473
+ show_camera=False,
474
+ filter_sky_bg=False,
475
+ show_mesh=False,
476
+ filter_ambiguous=False,
477
+ ):
478
+ """
479
+ Perform reconstruction using the already-created target_dir/images.
480
+ """
481
+ # Capture terminal output
482
+ tee = TeeOutput()
483
+ old_stdout = sys.stdout
484
+ sys.stdout = tee
485
+
486
+ try:
487
+ if not os.path.isdir(target_dir) or target_dir == "None":
488
+ terminal_log = tee.getvalue()
489
+ sys.stdout = old_stdout
490
+ return None, "No valid target directory found. Please upload first.", None, None, None, None, None, None, None, None, None, None, None, None, terminal_log
491
+
492
+ start_time = time.time()
493
+ gc.collect()
494
+ torch.cuda.empty_cache()
495
+
496
+ # Prepare frame_selector dropdown
497
+ target_dir_images = os.path.join(target_dir, "images")
498
+ all_files = (
499
+ sorted(os.listdir(target_dir_images))
500
+ if os.path.isdir(target_dir_images)
501
+ else []
502
+ )
503
+ all_files = [f"{i}: {filename}" for i, filename in enumerate(all_files)]
504
+ frame_selector_choices = ["All"] + all_files
505
+
506
+ print("Running WorldMirror model...")
507
+ with torch.no_grad():
508
+ predictions, processed_data = run_model(target_dir)
509
+
510
+ # Save predictions
511
+ prediction_save_path = os.path.join(target_dir, "predictions.npz")
512
+ np.savez(prediction_save_path, **predictions)
513
+
514
+ # Save camera parameters as JSON
515
+ camera_params_file = save_camera_params(
516
+ predictions['camera_poses'],
517
+ predictions['camera_intrs'],
518
+ target_dir
519
+ )
520
+
521
+ # Handle None frame_selector
522
+ if frame_selector is None:
523
+ frame_selector = "All"
524
+
525
+ # Build a GLB file name
526
+ glbfile = os.path.join(
527
+ target_dir,
528
+ f"glbscene_{frame_selector.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_camera}_mesh{show_mesh}.glb",
529
+ )
530
+
531
+ # Convert predictions to GLB
532
+ glbscene = convert_predictions_to_glb_scene(
533
+ predictions,
534
+ filter_by_frames=frame_selector,
535
+ show_camera=show_camera,
536
+ mask_sky_bg=filter_sky_bg,
537
+ as_mesh=show_mesh, # Use the show_mesh parameter
538
+ mask_ambiguous=filter_ambiguous
539
+ )
540
+ glbscene.export(file_obj=glbfile)
541
+
542
+ end_time = time.time()
543
+ print(f"Total time: {end_time - start_time:.2f} seconds")
544
+ log_msg = (
545
+ f"Reconstruction Success ({len(all_files)} frames). Waiting for visualization."
546
+ )
547
+ # Convert predictions to 3dgs ply
548
+ gs_file = None
549
+ splat_mode = 'ply'
550
+ if "splats" in predictions:
551
+ # Get Gaussian parameters (already filtered by GaussianSplatRenderer)
552
+ means = predictions["splats"]["means"][0].reshape(-1, 3)
553
+ scales = predictions["splats"]["scales"][0].reshape(-1, 3)
554
+ quats = predictions["splats"]["quats"][0].reshape(-1, 4)
555
+ colors = (predictions["splats"]["sh"][0] if "sh" in predictions["splats"] else predictions["splats"]["colors"][0]).reshape(-1, 3)
556
+ opacities = predictions["splats"]["opacities"][0].reshape(-1)
557
+
558
+ # Convert to torch tensors if needed
559
+ if not isinstance(means, torch.Tensor):
560
+ means = torch.from_numpy(means)
561
+ if not isinstance(scales, torch.Tensor):
562
+ scales = torch.from_numpy(scales)
563
+ if not isinstance(quats, torch.Tensor):
564
+ quats = torch.from_numpy(quats)
565
+ if not isinstance(colors, torch.Tensor):
566
+ colors = torch.from_numpy(colors)
567
+ if not isinstance(opacities, torch.Tensor):
568
+ opacities = torch.from_numpy(opacities)
569
+
570
+ if splat_mode == 'ply':
571
+ gs_file = os.path.join(target_dir, "gaussians.ply")
572
+ save_gs_ply(
573
+ gs_file,
574
+ means,
575
+ scales,
576
+ quats,
577
+ colors,
578
+ opacities
579
+ )
580
+ print(f"Saved Gaussian Splatting PLY to: {gs_file}")
581
+ print(f"File exists: {os.path.exists(gs_file)}")
582
+ if os.path.exists(gs_file):
583
+ print(f"File size: {os.path.getsize(gs_file)} bytes")
584
+ elif splat_mode == 'splat':
585
+ # Save Gaussian splat
586
+ plydata = convert_gs_to_ply(
587
+ means,
588
+ scales,
589
+ quats,
590
+ colors,
591
+ opacities
592
+ )
593
+ gs_file = os.path.join(target_dir, "gaussians.splat")
594
+ gs_file = process_ply_to_splat(plydata, gs_file)
595
+
596
+ # Initialize depth and normal view displays with processed data
597
+ depth_vis, normal_vis = initialize_depth_normal_views(
598
+ processed_data
599
+ )
600
+
601
+ # Update view selectors and info displays based on available views
602
+ depth_slider, normal_slider, depth_info, normal_info = update_view_selectors(
603
+ processed_data
604
+ )
605
+
606
+ # Automatically generate render video
607
+ # Generate render video if possible
608
+ rgb_video_path = None
609
+ depth_video_path = None
610
+
611
+ if "splats" in predictions:
612
+ # try:
613
+ from pathlib import Path
614
+
615
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
616
+ # Get camera parameters and image dimensions
617
+ camera_poses = torch.tensor(predictions['camera_poses']).unsqueeze(0).to(device)
618
+ camera_intrs = torch.tensor(predictions['camera_intrs']).unsqueeze(0).to(device)
619
+ H, W = predictions['images'].shape[1], predictions['images'].shape[2]
620
+
621
+ # Render video
622
+ out_path = Path(target_dir) / "rendered_video"
623
+ render_interpolated_video(
624
+ model.gs_renderer,
625
+ predictions["splats"],
626
+ camera_poses,
627
+ camera_intrs,
628
+ (H, W),
629
+ out_path,
630
+ interp_per_pair=15,
631
+ loop_reverse=True,
632
+ save_mode="split"
633
+ )
634
+
635
+ # Check output files
636
+ rgb_video_path = str(out_path) + "_rgb.mp4"
637
+ depth_video_path = str(out_path) + "_depth.mp4"
638
+
639
+ if not os.path.exists(rgb_video_path) and not os.path.exists(depth_video_path):
640
+ rgb_video_path = None
641
+ depth_video_path = None
642
+
643
+ # Cleanup
644
+ del predictions
645
+ gc.collect()
646
+ torch.cuda.empty_cache()
647
+
648
+ # Get terminal output and restore stdout
649
+ terminal_log = tee.getvalue()
650
+ sys.stdout = old_stdout
651
+
652
+ return (
653
+ glbfile,
654
+ log_msg,
655
+ gr.Dropdown(choices=frame_selector_choices, value=frame_selector, interactive=True),
656
+ processed_data,
657
+ depth_vis,
658
+ normal_vis,
659
+ depth_slider,
660
+ normal_slider,
661
+ depth_info,
662
+ normal_info,
663
+ camera_params_file,
664
+ gs_file,
665
+ rgb_video_path,
666
+ depth_video_path,
667
+ terminal_log,
668
+ )
669
+
670
+ except Exception as e:
671
+ # In case of error, still restore stdout
672
+ terminal_log = tee.getvalue()
673
+ sys.stdout = old_stdout
674
+ print(f"Error occurred: {e}")
675
+ raise
676
+
677
+
678
+ # -------------------------------------------------------------------------
679
+ # Helper functions for visualization
680
+ # -------------------------------------------------------------------------
681
+ def render_depth_visualization(depth_map, mask=None):
682
+ """Generate a color-coded depth visualization image with masking capabilities"""
683
+ if depth_map is None:
684
+ return None
685
+
686
+ # Create working copy and identify positive depth values
687
+ depth_copy = depth_map.copy()
688
+ positive_depth_mask = depth_copy > 0
689
+
690
+ # Combine with user-provided mask for filtering
691
+ if mask is not None:
692
+ positive_depth_mask = positive_depth_mask & mask
693
+
694
+ # Perform percentile-based normalization on valid regions
695
+ if positive_depth_mask.sum() > 0:
696
+ valid_depth_values = depth_copy[positive_depth_mask]
697
+ lower_bound = np.percentile(valid_depth_values, 5)
698
+ upper_bound = np.percentile(valid_depth_values, 95)
699
+
700
+ depth_copy[positive_depth_mask] = (depth_copy[positive_depth_mask] - lower_bound) / (upper_bound - lower_bound)
701
+
702
+ # Convert to RGB using matplotlib colormap
703
+ import matplotlib.pyplot as plt
704
+
705
+ color_mapper = plt.cm.turbo_r
706
+ rgb_result = color_mapper(depth_copy)
707
+ rgb_result = (rgb_result[:, :, :3] * 255).astype(np.uint8)
708
+
709
+ # Mark invalid regions with white color
710
+ rgb_result[~positive_depth_mask] = [255, 255, 255]
711
+
712
+ return rgb_result
713
+
714
+ def render_normal_visualization(normal_map, mask=None):
715
+ """Convert surface normal vectors to RGB color representation for display"""
716
+ if normal_map is None:
717
+ return None
718
+
719
+ # Make a working copy to avoid modifying original data
720
+ normal_display = normal_map.copy()
721
+
722
+ # Handle masking by zeroing out invalid regions
723
+ if mask is not None:
724
+ masked_regions = ~mask
725
+ normal_display[masked_regions] = [0, 0, 0] # Zero out masked pixels
726
+
727
+ # Transform from [-1, 1] to [0, 1] range for RGB display
728
+ normal_display = (normal_display + 1.0) / 2.0
729
+ normal_display = (normal_display * 255).astype(np.uint8)
730
+
731
+ return normal_display
732
+
733
+
734
+ def clear_fields():
735
+ """
736
+ Clears the 3D viewer, the stored target_dir, and empties the gallery.
737
+ """
738
+ return None
739
+
740
+
741
+ def update_log():
742
+ """
743
+ Display a quick log message while waiting.
744
+ """
745
+ return "Loading and Reconstructing..."
746
+
747
+
748
+ def get_terminal_output():
749
+ """
750
+ Get current terminal output for real-time display
751
+ """
752
+ global current_terminal_output
753
+ return current_terminal_output
754
+
755
+ # -------------------------------------------------------------------------
756
+ # FunctionExample scene metadata extraction
757
+ # -------------------------------------------------------------------------
758
+ def extract_example_scenes_metadata(base_directory):
759
+ """
760
+ Extract comprehensive metadata for all scene directories containing valid images.
761
+
762
+ Args:
763
+ base_directory: Root path where example scene directories are located
764
+
765
+ Returns:
766
+ Collection of dictionaries with scene details (title, location, preview, etc.)
767
+ """
768
+ from glob import glob
769
+
770
+ # Return empty list if base directory is missing
771
+ if not os.path.exists(base_directory):
772
+ return []
773
+
774
+ # Define supported image format extensions
775
+ VALID_IMAGE_FORMATS = ['jpg', 'jpeg', 'png', 'bmp', 'tiff', 'tif']
776
+
777
+ scenes_data = []
778
+
779
+ # Process each subdirectory in the base directory
780
+ for directory_name in sorted(os.listdir(base_directory)):
781
+ current_directory = os.path.join(base_directory, directory_name)
782
+
783
+ # Filter out non-directory items
784
+ if not os.path.isdir(current_directory):
785
+ continue
786
+
787
+ # Gather all valid image files within the current directory
788
+ discovered_images = []
789
+ for file_format in VALID_IMAGE_FORMATS:
790
+ # Include both lowercase and uppercase format variations
791
+ discovered_images.extend(glob(os.path.join(current_directory, f'*.{file_format}')))
792
+ discovered_images.extend(glob(os.path.join(current_directory, f'*.{file_format.upper()}')))
793
+
794
+ # Skip directories without any valid images
795
+ if not discovered_images:
796
+ continue
797
+
798
+ # Ensure consistent image ordering
799
+ discovered_images.sort()
800
+
801
+ # Construct scene metadata record
802
+ scene_record = {
803
+ 'name': directory_name,
804
+ 'path': current_directory,
805
+ 'thumbnail': discovered_images[0],
806
+ 'num_images': len(discovered_images),
807
+ 'image_files': discovered_images,
808
+ }
809
+
810
+ scenes_data.append(scene_record)
811
+
812
+ return scenes_data
813
+
814
+ def load_example_scenes(scene_name, scenes):
815
+ """
816
+ Initialize and prepare an example scene for 3D reconstruction processing.
817
+
818
+ Args:
819
+ scene_name: Identifier of the target scene to load
820
+ scenes: List containing all available scene configurations
821
+
822
+ Returns:
823
+ Tuple containing processed scene data and status information
824
+ """
825
+ # Locate the target scene configuration by matching names
826
+ target_scene_config = None
827
+ for scene_config in scenes:
828
+ if scene_config["name"] == scene_name:
829
+ target_scene_config = scene_config
830
+ break
831
+
832
+ # Handle case where requested scene doesn't exist
833
+ if target_scene_config is None:
834
+ return None, None, None, "Scene not found"
835
+
836
+ # Prepare image file paths for processing pipeline
837
+ # Extract all image file paths from the selected scene
838
+ image_file_paths = []
839
+ for img_file_path in target_scene_config["image_files"]:
840
+ image_file_paths.append(img_file_path)
841
+
842
+ # Process the scene images through the standard upload pipeline
843
+ processed_target_dir, processed_image_list = process_uploaded_files(image_file_paths, 1.0)
844
+
845
+ # Return structured response with scene data and user feedback
846
+ status_message = f"Successfully loaded scene '{scene_name}' containing {target_scene_config['num_images']} images. Click 'Reconstruct' to begin 3D processing."
847
+
848
+ return (
849
+ None, # Reset reconstruction visualization
850
+ None, # Reset gaussian splatting output
851
+ processed_target_dir, # Provide working directory path
852
+ processed_image_list, # Update image gallery display
853
+ status_message,
854
+ )
855
+
856
+
857
+ # -------------------------------------------------------------------------
858
+ # UI and event handling
859
+ # -------------------------------------------------------------------------
860
+ theme = gr.themes.Base()
861
+
862
+ with gr.Blocks(
863
+ theme=theme,
864
+ css="""
865
+ .custom-log * {
866
+ font-style: italic;
867
+ font-size: 22px !important;
868
+ background-image: linear-gradient(120deg, #a9b8f8 0%, #7081e8 60%, #4254c5 100%);
869
+ -webkit-background-clip: text;
870
+ background-clip: text;
871
+ font-weight: bold !important;
872
+ color: transparent !important;
873
+ text-align: center !important;
874
+ }
875
+ .normal-weight-btn button,
876
+ .normal-weight-btn button span,
877
+ .normal-weight-btn button *,
878
+ .normal-weight-btn * {
879
+ font-weight: 400 !important;
880
+ }
881
+ .terminal-output {
882
+ max-height: 400px !important;
883
+ overflow-y: auto !important;
884
+ }
885
+ .terminal-output textarea {
886
+ font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace !important;
887
+ font-size: 13px !important;
888
+ line-height: 1.5 !important;
889
+ color: #333 !important;
890
+ background-color: #f8f9fa !important;
891
+ max-height: 400px !important;
892
+ }
893
+ .example-gallery {
894
+ width: 100% !important;
895
+ }
896
+ .example-gallery img {
897
+ width: 100% !important;
898
+ height: 280px !important;
899
+ object-fit: contain !important;
900
+ aspect-ratio: 16 / 9 !important;
901
+ }
902
+ .example-gallery .grid-wrap {
903
+ width: 100% !important;
904
+ }
905
+
906
+ /* 滑块导航样式 */
907
+ .depth-tab-improved .gradio-slider input[type="range"] {
908
+ height: 8px !important;
909
+ border-radius: 4px !important;
910
+ background: linear-gradient(90deg, #667eea 0%, #764ba2 100%) !important;
911
+ }
912
+
913
+ .depth-tab-improved .gradio-slider input[type="range"]::-webkit-slider-thumb {
914
+ height: 20px !important;
915
+ width: 20px !important;
916
+ border-radius: 50% !important;
917
+ background: #fff !important;
918
+ box-shadow: 0 2px 6px rgba(0,0,0,0.3) !important;
919
+ }
920
+
921
+ .depth-tab-improved button {
922
+ transition: all 0.3s ease !important;
923
+ border-radius: 6px !important;
924
+ font-weight: 500 !important;
925
+ }
926
+
927
+ .depth-tab-improved button:hover {
928
+ transform: translateY(-1px) !important;
929
+ box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important;
930
+ }
931
+
932
+ .normal-tab-improved .gradio-slider input[type="range"] {
933
+ height: 8px !important;
934
+ border-radius: 4px !important;
935
+ background: linear-gradient(90deg, #667eea 0%, #764ba2 100%) !important;
936
+ }
937
+
938
+ .normal-tab-improved .gradio-slider input[type="range"]::-webkit-slider-thumb {
939
+ height: 20px !important;
940
+ width: 20px !important;
941
+ border-radius: 50% !important;
942
+ background: #fff !important;
943
+ box-shadow: 0 2px 6px rgba(0,0,0,0.3) !important;
944
+ }
945
+
946
+ .normal-tab-improved button {
947
+ transition: all 0.3s ease !important;
948
+ border-radius: 6px !important;
949
+ font-weight: 500 !important;
950
+ }
951
+
952
+ .normal-tab-improved button:hover {
953
+ transform: translateY(-1px) !important;
954
+ box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important;
955
+ }
956
+
957
+ #depth-view-info, #normal-view-info {
958
+ animation: fadeIn 0.5s ease-in-out;
959
+ }
960
+
961
+ @keyframes fadeIn {
962
+ from { opacity: 0; transform: translateY(-10px); }
963
+ to { opacity: 1; transform: translateY(0); }
964
+ }
965
+ """
966
+ ) as demo:
967
+ # State variables for the tabbed interface
968
+ is_example = gr.Textbox(label="is_example", visible=False, value="None")
969
+ num_images = gr.Textbox(label="num_images", visible=False, value="None")
970
+ processed_data_state = gr.State(value=None)
971
+ current_view_index = gr.State(value=0) # Track current view index for navigation
972
+
973
+ # Header and description
974
+ gr.HTML(
975
+ """
976
+ <div style="text-align: center;">
977
+ <h1>
978
+ <span style="background: linear-gradient(90deg, #3b82f6, #1e40af); -webkit-background-clip: text; background-clip: text; color: transparent; font-weight: bold;">WorldMirror:</span>
979
+ <span style="color: #555555;">Universal 3D World Reconstruction with Any Prior Prompting</span>
980
+ </h1>
981
+ <p>
982
+ <a href="https://arxiv.org/abs/2510.10726">📄 ArXiv Paper</a> |
983
+ <a href="https://3d-models.hunyuan.tencent.com/world/">🌐 Project Page</a> |
984
+ <a href="https://github.com/Tencent-Hunyuan/HunyuanWorld-Mirror">💻 GitHub Repository</a> |
985
+ <a href="https://huggingface.co/tencent/HunyuanWorld-Mirror">🤗 Hugging Face Model</a>
986
+ </p>
987
+ </div>
988
+ <div style="font-size: 16px; line-height: 1.5;">
989
+ <p>WorldMirror supports any combination of inputs (images, intrinsics, poses, and depth) and multiple outputs including point clouds, camera parameters, depth maps, normal maps, and 3D Gaussian Splatting (3DGS). </p>
990
+ <h3>How to Use:</h3>
991
+ <ol>
992
+ <li><strong>Upload Your Data:</strong> Click the "Upload Video or Images" button to add your files. Videos are automatically extracted into frames at one-second intervals.</li>
993
+ <li><strong>Reconstruct:</strong> Click the "Reconstruct" button to start the 3D reconstruction.</li>
994
+ <li><strong>Visualize:</strong> Explore multiple reconstruction results across different tabs:
995
+ <ul>
996
+ <li><strong>3D View:</strong> Interactive point cloud/mesh visualization with camera poses (downloadable as GLB)</li>
997
+ <li><strong>3D Gaussian Splatting:</strong> Interactive 3D Gaussian Splatting visualization with RGB and depth videos (downloadable as PLY)</li>
998
+ <li><strong>Depth Maps:</strong> Per-view depth estimation results (downloadable as PNG)</li>
999
+ <li><strong>Normal Maps:</strong> Per-view surface orientation visualization (downloadable as PNG)</li>
1000
+ <li><strong>Camera Parameters:</strong> Estimated camera poses and intrinsics (downloadable as JSON)</li>
1001
+ </ul>
1002
+ </li>
1003
+ </ol>
1004
+ <p><strong style="color: #3b82f6;">Please note: Loading data and displaying 3D effects may take a moment. For faster performance, we recommend downloading the code from our GitHub and running it locally.</strong></p>
1005
+ </div>
1006
+ """)
1007
+
1008
+ output_path_state = gr.Textbox(label="Output Path", visible=False, value="None")
1009
+
1010
+ # Main UI components
1011
+ with gr.Row(equal_height=False):
1012
+ with gr.Column(scale=1):
1013
+ file_upload = gr.File(
1014
+ file_count="multiple",
1015
+ label="Upload Video or Images",
1016
+ interactive=True,
1017
+ file_types=["image", "video"],
1018
+ height="200px",
1019
+ )
1020
+ time_interval = gr.Slider(
1021
+ minimum=0.1,
1022
+ maximum=10.0,
1023
+ value=1.0,
1024
+ step=0.1,
1025
+ label="Video Sample interval",
1026
+ interactive=True,
1027
+ visible=True,
1028
+ scale=4,
1029
+ )
1030
+ resample_btn = gr.Button(
1031
+ "Resample",
1032
+ visible=True,
1033
+ scale=1,
1034
+ elem_classes=["normal-weight-btn"],
1035
+ )
1036
+ image_gallery = gr.Gallery(
1037
+ label="Image Preview",
1038
+ columns=4,
1039
+ height="200px",
1040
+ show_download_button=True,
1041
+ object_fit="contain",
1042
+ preview=True
1043
+ )
1044
+
1045
+ terminal_output = gr.Textbox(
1046
+ label="Terminal Output",
1047
+ lines=6,
1048
+ max_lines=6,
1049
+ interactive=False,
1050
+ show_copy_button=True,
1051
+ container=True,
1052
+ elem_classes=["terminal-output"],
1053
+ autoscroll=True
1054
+ )
1055
+
1056
+ with gr.Column(scale=3):
1057
+ log_output = gr.Markdown(
1058
+ "Upload video or images first, then click Reconstruct to start processing",
1059
+ elem_classes=["custom-log"],
1060
+ )
1061
+
1062
+ with gr.Tabs() as tabs:
1063
+ with gr.Tab("3D Gaussian Splatting", id=1) as gs_tab:
1064
+ with gr.Row():
1065
+ with gr.Column(scale=3):
1066
+ gs_output = gr.Model3D(
1067
+ label="Gaussian Splatting",
1068
+ height=500,
1069
+ )
1070
+ with gr.Column(scale=1):
1071
+ gs_rgb_video = gr.Video(
1072
+ label="Rendered RGB Video",
1073
+ height=250,
1074
+ autoplay=False,
1075
+ loop=False,
1076
+ interactive=False,
1077
+ )
1078
+ gs_depth_video = gr.Video(
1079
+ label="Rendered Depth Video",
1080
+ height=250,
1081
+ autoplay=False,
1082
+ loop=False,
1083
+ interactive=False,
1084
+ )
1085
+ with gr.Tab("Point Cloud/Mesh", id=0):
1086
+ reconstruction_output = gr.Model3D(
1087
+ label="3D Pointmap/Mesh",
1088
+ height=500,
1089
+ zoom_speed=0.4,
1090
+ pan_speed=0.4,
1091
+ )
1092
+ with gr.Tab("Depth", elem_classes=["depth-tab-improved"]):
1093
+ depth_view_info = gr.HTML(
1094
+ value="<div style='text-align: center; padding: 10px; background: #f8f8f8; color: #999; border-radius: 8px; margin-bottom: 10px;'>"
1095
+ "<strong>Depth View Navigation</strong> | Current: View 1 / 1 views</div>",
1096
+ elem_id="depth-view-info"
1097
+ )
1098
+ depth_view_slider = gr.Slider(
1099
+ minimum=1,
1100
+ maximum=1,
1101
+ step=1,
1102
+ value=1,
1103
+ label="View Selection Slider",
1104
+ interactive=True,
1105
+ elem_id="depth-view-slider"
1106
+ )
1107
+ depth_map = gr.Image(
1108
+ type="numpy",
1109
+ label="Depth Map",
1110
+ format="png",
1111
+ interactive=False,
1112
+ height=340
1113
+ )
1114
+ with gr.Tab("Normal", elem_classes=["normal-tab-improved"]):
1115
+ normal_view_info = gr.HTML(
1116
+ value="<div style='text-align: center; padding: 10px; background: #f8f8f8; color: #999; border-radius: 8px; margin-bottom: 10px;'>"
1117
+ "<strong>Normal View Navigation</strong> | Current: View 1 / 1 views</div>",
1118
+ elem_id="normal-view-info"
1119
+ )
1120
+ normal_view_slider = gr.Slider(
1121
+ minimum=1,
1122
+ maximum=1,
1123
+ step=1,
1124
+ value=1,
1125
+ label="View Selection Slider",
1126
+ interactive=True,
1127
+ elem_id="normal-view-slider"
1128
+ )
1129
+ normal_map = gr.Image(
1130
+ type="numpy",
1131
+ label="Normal Map",
1132
+ format="png",
1133
+ interactive=False,
1134
+ height=340
1135
+ )
1136
+ with gr.Tab("Camera Parameters", elem_classes=["camera-tab"]):
1137
+ with gr.Row():
1138
+ gr.HTML("")
1139
+ camera_params = gr.DownloadButton(
1140
+ label="Download Camera Parameters",
1141
+ scale=1,
1142
+ variant="primary",
1143
+ )
1144
+ gr.HTML("")
1145
+
1146
+ with gr.Row():
1147
+ reconstruct_btn = gr.Button(
1148
+ "Reconstruct",
1149
+ scale=1,
1150
+ variant="primary"
1151
+ )
1152
+ clear_btn = gr.ClearButton(
1153
+ [
1154
+ file_upload,
1155
+ reconstruction_output,
1156
+ log_output,
1157
+ output_path_state,
1158
+ image_gallery,
1159
+ depth_map,
1160
+ normal_map,
1161
+ depth_view_slider,
1162
+ normal_view_slider,
1163
+ depth_view_info,
1164
+ normal_view_info,
1165
+ camera_params,
1166
+ gs_output,
1167
+ gs_rgb_video,
1168
+ gs_depth_video,
1169
+ ],
1170
+ scale=1,
1171
+ )
1172
+
1173
+ with gr.Row():
1174
+ frame_selector = gr.Dropdown(
1175
+ choices=["All"], value="All", label="Show Points of a Specific Frame"
1176
+ )
1177
+
1178
+ gr.Markdown("### Reconstruction Options: (not applied to 3DGS)")
1179
+ with gr.Row():
1180
+ show_camera = gr.Checkbox(label="Show Camera", value=True)
1181
+ show_mesh = gr.Checkbox(label="Show Mesh", value=True)
1182
+ filter_ambiguous = gr.Checkbox(label="Filter low confidence & depth/normal edges", value=True)
1183
+ filter_sky_bg = gr.Checkbox(label="Filter Sky Background", value=False)
1184
+
1185
+ with gr.Column(scale=1):
1186
+ gr.Markdown("### Click to load example scenes")
1187
+ realworld_scenes = extract_example_scenes_metadata("examples/realistic") if os.path.exists("examples/realistic") else extract_example_scenes_metadata("examples")
1188
+ generated_scenes = extract_example_scenes_metadata("examples/stylistic") if os.path.exists("examples/stylistic") else []
1189
+
1190
+ # If no subdirectories exist, fall back to single gallery
1191
+ if not os.path.exists("examples/realistic") and not os.path.exists("examples/stylistic"):
1192
+ # Fallback: use all scenes from examples directory
1193
+ all_scenes = extract_example_scenes_metadata("examples")
1194
+ if all_scenes:
1195
+ gallery_items = [
1196
+ (scene["thumbnail"], f"{scene['name']}\n📷 {scene['num_images']} images")
1197
+ for scene in all_scenes
1198
+ ]
1199
+
1200
+ example_gallery = gr.Gallery(
1201
+ value=gallery_items,
1202
+ label="Example Scenes",
1203
+ columns=1,
1204
+ rows=None,
1205
+ height=800,
1206
+ object_fit="contain",
1207
+ show_label=False,
1208
+ interactive=True,
1209
+ preview=False,
1210
+ allow_preview=False,
1211
+ elem_classes=["example-gallery"]
1212
+ )
1213
+
1214
+ def handle_example_selection(evt: gr.SelectData):
1215
+ if evt:
1216
+ result = load_example_scenes(all_scenes[evt.index]["name"], all_scenes)
1217
+ return result
1218
+ return (None, None, None, None, "No scene selected")
1219
+
1220
+ example_gallery.select(
1221
+ fn=handle_example_selection,
1222
+ outputs=[
1223
+ reconstruction_output,
1224
+ gs_output,
1225
+ output_path_state,
1226
+ image_gallery,
1227
+ log_output,
1228
+ ],
1229
+ )
1230
+ else:
1231
+ # Tabbed interface for categorized examples
1232
+ with gr.Tabs():
1233
+ with gr.Tab("🌍 Realistic Cases"):
1234
+ if realworld_scenes:
1235
+ realworld_items = [
1236
+ (scene["thumbnail"], f"{scene['name']}\n📷 {scene['num_images']} images")
1237
+ for scene in realworld_scenes
1238
+ ]
1239
+
1240
+ realworld_gallery = gr.Gallery(
1241
+ value=realworld_items,
1242
+ label="Real-world Examples",
1243
+ columns=1,
1244
+ rows=None,
1245
+ height=750,
1246
+ object_fit="contain",
1247
+ show_label=False,
1248
+ interactive=True,
1249
+ preview=False,
1250
+ allow_preview=False,
1251
+ elem_classes=["example-gallery"]
1252
+ )
1253
+
1254
+ def handle_realworld_selection(evt: gr.SelectData):
1255
+ if evt:
1256
+ result = load_example_scenes(realworld_scenes[evt.index]["name"], realworld_scenes)
1257
+ return result
1258
+ return (None, None, None, None, "No scene selected")
1259
+
1260
+ realworld_gallery.select(
1261
+ fn=handle_realworld_selection,
1262
+ outputs=[
1263
+ reconstruction_output,
1264
+ gs_output,
1265
+ output_path_state,
1266
+ image_gallery,
1267
+ log_output,
1268
+ ],
1269
+ )
1270
+ else:
1271
+ gr.Markdown("No real-world examples available")
1272
+
1273
+ with gr.Tab("🎨 Stylistic Cases"):
1274
+ if generated_scenes:
1275
+ generated_items = [
1276
+ (scene["thumbnail"], f"{scene['name']}\n📷 {scene['num_images']} images")
1277
+ for scene in generated_scenes
1278
+ ]
1279
+
1280
+ generated_gallery = gr.Gallery(
1281
+ value=generated_items,
1282
+ label="Generated Examples",
1283
+ columns=1,
1284
+ rows=None,
1285
+ height=750,
1286
+ object_fit="contain",
1287
+ show_label=False,
1288
+ interactive=True,
1289
+ preview=False,
1290
+ allow_preview=False,
1291
+ elem_classes=["example-gallery"]
1292
+ )
1293
+
1294
+ def handle_generated_selection(evt: gr.SelectData):
1295
+ if evt:
1296
+ result = load_example_scenes(generated_scenes[evt.index]["name"], generated_scenes)
1297
+ return result
1298
+ return (None, None, None, None, "No scene selected")
1299
+
1300
+ generated_gallery.select(
1301
+ fn=handle_generated_selection,
1302
+ outputs=[
1303
+ reconstruction_output,
1304
+ gs_output,
1305
+ output_path_state,
1306
+ image_gallery,
1307
+ log_output,
1308
+ ],
1309
+ )
1310
+ else:
1311
+ gr.Markdown("No generated examples available")
1312
+
1313
+ # -------------------------------------------------------------------------
1314
+ # Click logic
1315
+ # -------------------------------------------------------------------------
1316
+ reconstruct_btn.click(fn=clear_fields, inputs=[], outputs=[]).then(
1317
+ fn=update_log, inputs=[], outputs=[log_output]
1318
+ ).then(
1319
+ fn=gradio_demo,
1320
+ inputs=[
1321
+ output_path_state,
1322
+ frame_selector,
1323
+ show_camera,
1324
+ filter_sky_bg,
1325
+ show_mesh,
1326
+ filter_ambiguous
1327
+ ],
1328
+ outputs=[
1329
+ reconstruction_output,
1330
+ log_output,
1331
+ frame_selector,
1332
+ processed_data_state,
1333
+ depth_map,
1334
+ normal_map,
1335
+ depth_view_slider,
1336
+ normal_view_slider,
1337
+ depth_view_info,
1338
+ normal_view_info,
1339
+ camera_params,
1340
+ gs_output,
1341
+ gs_rgb_video,
1342
+ gs_depth_video,
1343
+ terminal_output,
1344
+ ],
1345
+ ).then(
1346
+ fn=lambda: "False",
1347
+ inputs=[],
1348
+ outputs=[is_example], # set is_example to "False"
1349
+ )
1350
+
1351
+ # -------------------------------------------------------------------------
1352
+ # Live update logic
1353
+ # -------------------------------------------------------------------------
1354
+ def refresh_3d_scene(
1355
+ workspace_path,
1356
+ frame_selector,
1357
+ show_camera,
1358
+ is_example,
1359
+ filter_sky_bg=False,
1360
+ show_mesh=False,
1361
+ filter_ambiguous=False
1362
+ ):
1363
+ """
1364
+ Refresh 3D scene visualization
1365
+
1366
+ Load prediction data from workspace, generate or reuse GLB scene files based on current parameters,
1367
+ and return file paths needed for the 3D viewer.
1368
+
1369
+ Args:
1370
+ workspace_path: Workspace directory path for reconstruction results
1371
+ frame_selector: Frame selector value for filtering points from specific frames
1372
+ show_camera: Whether to display camera positions
1373
+ is_example: Whether this is an example scene
1374
+ filter_sky_bg: Whether to filter sky background
1375
+ show_mesh: Whether to display as mesh mode
1376
+ filter_ambiguous: Whether to filter low-confidence ambiguous areas
1377
+
1378
+ Returns:
1379
+ tuple: (GLB scene file path, Gaussian point cloud file path, status message)
1380
+ """
1381
+
1382
+ # If example scene is clicked, skip processing directly
1383
+ if is_example == "True":
1384
+ return (
1385
+ gr.update(),
1386
+ gr.update(),
1387
+ "No reconstruction results available. Please click the Reconstruct button first.",
1388
+ )
1389
+
1390
+ # Validate workspace directory path
1391
+ if not workspace_path or workspace_path == "None" or not os.path.isdir(workspace_path):
1392
+ return (
1393
+ gr.update(),
1394
+ gr.update(),
1395
+ "No reconstruction results available. Please click the Reconstruct button first.",
1396
+ )
1397
+
1398
+ # Check if prediction data file exists
1399
+ prediction_file_path = os.path.join(workspace_path, "predictions.npz")
1400
+ if not os.path.exists(prediction_file_path):
1401
+ return (
1402
+ gr.update(),
1403
+ gr.update(),
1404
+ f"Prediction file does not exist: {prediction_file_path}. Please run reconstruction first.",
1405
+ )
1406
+
1407
+ # Load prediction data
1408
+ prediction_data = np.load(prediction_file_path, allow_pickle=True)
1409
+ predictions = {key: prediction_data[key] for key in prediction_data.keys()}
1410
+
1411
+ # Generate GLB scene file path (named based on parameter combination)
1412
+ safe_frame_name = frame_selector.replace('.', '_').replace(':', '').replace(' ', '_')
1413
+ scene_filename = f"scene_{safe_frame_name}_cam{show_camera}_mesh{show_mesh}_edges{filter_ambiguous}_sky{filter_sky_bg}.glb"
1414
+ scene_glb_path = os.path.join(workspace_path, scene_filename)
1415
+
1416
+ # If GLB file doesn't exist, generate new scene file
1417
+ if not os.path.exists(scene_glb_path):
1418
+ scene_model = convert_predictions_to_glb_scene(
1419
+ predictions,
1420
+ filter_by_frames=frame_selector,
1421
+ show_camera=show_camera,
1422
+ mask_sky_bg=filter_sky_bg,
1423
+ as_mesh=show_mesh,
1424
+ mask_ambiguous=filter_ambiguous
1425
+ )
1426
+ scene_model.export(file_obj=scene_glb_path)
1427
+
1428
+ # Find Gaussian point cloud file
1429
+ gaussian_file_path = os.path.join(workspace_path, "gaussians.ply")
1430
+ if not os.path.exists(gaussian_file_path):
1431
+ gaussian_file_path = None
1432
+
1433
+ return (
1434
+ scene_glb_path,
1435
+ gaussian_file_path,
1436
+ "3D scene updated.",
1437
+ )
1438
+
1439
+ def refresh_view_displays_on_filter_update(
1440
+ workspace_dir,
1441
+ sky_background_filter,
1442
+ current_processed_data,
1443
+ depth_slider_position,
1444
+ normal_slider_position,
1445
+ ):
1446
+ """
1447
+ Refresh depth and normal view displays when filter settings change
1448
+
1449
+ When the background filter checkbox state changes, regenerate processed data and update all view displays.
1450
+ This ensures that filter effects are reflected in real-time in the depth map and normal map visualizations.
1451
+
1452
+ Args:
1453
+ workspace_dir: Workspace directory path containing prediction data and images
1454
+ sky_background_filter: Sky background filter enable status
1455
+ current_processed_data: Currently processed visualization data
1456
+ depth_slider_position: Current position of the depth view slider
1457
+ normal_slider_position: Current position of the normal view slider
1458
+
1459
+ Returns:
1460
+ tuple: (updated processed data, depth visualization result, normal visualization result)
1461
+ """
1462
+
1463
+ # Validate workspace directory validity
1464
+ if not workspace_dir or workspace_dir == "None" or not os.path.isdir(workspace_dir):
1465
+ return current_processed_data, None, None
1466
+
1467
+ # Build and check prediction data file path
1468
+ prediction_data_path = os.path.join(workspace_dir, "predictions.npz")
1469
+ if not os.path.exists(prediction_data_path):
1470
+ return current_processed_data, None, None
1471
+
1472
+ try:
1473
+ # Load raw prediction data
1474
+ raw_prediction_data = np.load(prediction_data_path, allow_pickle=True)
1475
+ predictions_dict = {key: raw_prediction_data[key] for key in raw_prediction_data.keys()}
1476
+
1477
+ # Load image data using WorldMirror's load_images function
1478
+ images_directory = os.path.join(workspace_dir, "images")
1479
+ image_file_paths = [os.path.join(images_directory, path) for path in os.listdir(images_directory)]
1480
+ img = load_and_preprocess_images(image_file_paths)
1481
+
1482
+ # Regenerate processed data with new filter settings
1483
+ refreshed_data = {}
1484
+ for view_idx in range(img.shape[1]):
1485
+ view_data = {
1486
+ "image": img[0, view_idx],
1487
+ "points3d": predictions_dict["world_points"][view_idx],
1488
+ "depth": None,
1489
+ "normal": None,
1490
+ "mask": None,
1491
+ }
1492
+ mask = predictions_dict["final_mask"][view_idx].copy()
1493
+ if sky_background_filter:
1494
+ sky_mask = predictions_dict["sky_mask"][view_idx]
1495
+ mask = mask & sky_mask
1496
+ view_data["mask"] = mask
1497
+ view_data["depth"] = predictions_dict["depth"][view_idx].squeeze()
1498
+ view_data["normal"] = predictions_dict["normal"][view_idx]
1499
+ refreshed_data[view_idx] = view_data
1500
+
1501
+ # Get current view indices from slider positions (convert to 0-based indices)
1502
+ current_depth_index = int(depth_slider_position) - 1 if depth_slider_position else 0
1503
+ current_normal_index = int(normal_slider_position) - 1 if normal_slider_position else 0
1504
+
1505
+ # Update depth and normal views with new filter data
1506
+ updated_depth_visualization = update_depth_view(refreshed_data, current_depth_index)
1507
+ updated_normal_visualization = update_normal_view(refreshed_data, current_normal_index)
1508
+
1509
+ return refreshed_data, updated_depth_visualization, updated_normal_visualization
1510
+
1511
+ except Exception as error:
1512
+ print(f"Error occurred while refreshing view displays: {error}")
1513
+ return current_processed_data, None, None
1514
+
1515
+ frame_selector.change(
1516
+ refresh_3d_scene,
1517
+ [
1518
+ output_path_state,
1519
+ frame_selector,
1520
+ show_camera,
1521
+ is_example,
1522
+ filter_sky_bg,
1523
+ show_mesh,
1524
+ filter_ambiguous
1525
+ ],
1526
+ [reconstruction_output, gs_output, log_output],
1527
+ )
1528
+ show_camera.change(
1529
+ refresh_3d_scene,
1530
+ [
1531
+ output_path_state,
1532
+ frame_selector,
1533
+ show_camera,
1534
+ is_example,
1535
+ filter_sky_bg,
1536
+ show_mesh,
1537
+ filter_ambiguous
1538
+ ],
1539
+ [reconstruction_output, gs_output, log_output],
1540
+ )
1541
+ show_mesh.change(
1542
+ refresh_3d_scene,
1543
+ [
1544
+ output_path_state,
1545
+ frame_selector,
1546
+ show_camera,
1547
+ is_example,
1548
+ filter_sky_bg,
1549
+ show_mesh,
1550
+ filter_ambiguous
1551
+ ],
1552
+ [reconstruction_output, gs_output, log_output],
1553
+ )
1554
+
1555
+ filter_sky_bg.change(
1556
+ refresh_3d_scene,
1557
+ [
1558
+ output_path_state,
1559
+ frame_selector,
1560
+ show_camera,
1561
+ is_example,
1562
+ filter_sky_bg,
1563
+ show_mesh,
1564
+ filter_ambiguous
1565
+ ],
1566
+ [reconstruction_output, gs_output, log_output],
1567
+ ).then(
1568
+ fn=refresh_view_displays_on_filter_update,
1569
+ inputs=[
1570
+ output_path_state,
1571
+ filter_sky_bg,
1572
+ processed_data_state,
1573
+ depth_view_slider,
1574
+ normal_view_slider,
1575
+ ],
1576
+ outputs=[
1577
+ processed_data_state,
1578
+ depth_map,
1579
+ normal_map,
1580
+ ],
1581
+ )
1582
+ filter_ambiguous.change(
1583
+ refresh_3d_scene,
1584
+ [
1585
+ output_path_state,
1586
+ frame_selector,
1587
+ show_camera,
1588
+ is_example,
1589
+ filter_sky_bg,
1590
+ show_mesh,
1591
+ filter_ambiguous
1592
+ ],
1593
+ [reconstruction_output, gs_output, log_output],
1594
+ ).then(
1595
+ fn=refresh_view_displays_on_filter_update,
1596
+ inputs=[
1597
+ output_path_state,
1598
+ filter_sky_bg,
1599
+ processed_data_state,
1600
+ depth_view_slider,
1601
+ normal_view_slider,
1602
+ ],
1603
+ outputs=[
1604
+ processed_data_state,
1605
+ depth_map,
1606
+ normal_map,
1607
+ ],
1608
+ )
1609
+
1610
+ # -------------------------------------------------------------------------
1611
+ # Auto update gallery when user uploads or changes files
1612
+ # -------------------------------------------------------------------------
1613
+ def update_gallery_on_file_upload(files, interval):
1614
+ if not files:
1615
+ return None, None, None, ""
1616
+
1617
+ # Capture terminal output
1618
+ tee = TeeOutput()
1619
+ old_stdout = sys.stdout
1620
+ sys.stdout = tee
1621
+
1622
+ try:
1623
+ target_dir, image_paths = process_uploaded_files(files, interval)
1624
+ terminal_log = tee.getvalue()
1625
+ sys.stdout = old_stdout
1626
+
1627
+ return (
1628
+ target_dir,
1629
+ image_paths,
1630
+ "Upload complete. Click 'Reconstruct' to begin 3D processing.",
1631
+ terminal_log,
1632
+ )
1633
+ except Exception as e:
1634
+ terminal_log = tee.getvalue()
1635
+ sys.stdout = old_stdout
1636
+ print(f"Error occurred: {e}")
1637
+ raise
1638
+
1639
+ def resample_video_with_new_interval(files, new_interval, current_target_dir):
1640
+ """Resample video with new slider value"""
1641
+ if not files:
1642
+ return (
1643
+ current_target_dir,
1644
+ None,
1645
+ "No files to resample.",
1646
+ "",
1647
+ )
1648
+
1649
+ # Check if we have videos to resample
1650
+ video_extensions = [
1651
+ ".mp4",
1652
+ ".avi",
1653
+ ".mov",
1654
+ ".mkv",
1655
+ ".wmv",
1656
+ ".flv",
1657
+ ".webm",
1658
+ ".m4v",
1659
+ ".3gp",
1660
+ ]
1661
+ has_video = any(
1662
+ os.path.splitext(
1663
+ str(file_data["name"] if isinstance(file_data, dict) else file_data)
1664
+ )[1].lower()
1665
+ in video_extensions
1666
+ for file_data in files
1667
+ )
1668
+
1669
+ if not has_video:
1670
+ return (
1671
+ current_target_dir,
1672
+ None,
1673
+ "No videos found to resample.",
1674
+ "",
1675
+ )
1676
+
1677
+ # Capture terminal output
1678
+ tee = TeeOutput()
1679
+ old_stdout = sys.stdout
1680
+ sys.stdout = tee
1681
+
1682
+ try:
1683
+ # Clean up old target directory if it exists
1684
+ if (
1685
+ current_target_dir
1686
+ and current_target_dir != "None"
1687
+ and os.path.exists(current_target_dir)
1688
+ ):
1689
+ shutil.rmtree(current_target_dir)
1690
+
1691
+ # Process files with new interval
1692
+ target_dir, image_paths = process_uploaded_files(files, new_interval)
1693
+
1694
+ terminal_log = tee.getvalue()
1695
+ sys.stdout = old_stdout
1696
+
1697
+ return (
1698
+ target_dir,
1699
+ image_paths,
1700
+ f"Video resampled with {new_interval}s interval. Click 'Reconstruct' to begin 3D processing.",
1701
+ terminal_log,
1702
+ )
1703
+ except Exception as e:
1704
+ terminal_log = tee.getvalue()
1705
+ sys.stdout = old_stdout
1706
+ print(f"Error occurred: {e}")
1707
+ raise
1708
+
1709
+ file_upload.change(
1710
+ fn=update_gallery_on_file_upload,
1711
+ inputs=[file_upload, time_interval],
1712
+ outputs=[output_path_state, image_gallery, log_output, terminal_output],
1713
+ )
1714
+
1715
+ resample_btn.click(
1716
+ fn=resample_video_with_new_interval,
1717
+ inputs=[file_upload, time_interval, output_path_state],
1718
+ outputs=[output_path_state, image_gallery, log_output, terminal_output],
1719
+ )
1720
+
1721
+ # -------------------------------------------------------------------------
1722
+ # Navigation for Depth, Normal tabs
1723
+ # -------------------------------------------------------------------------
1724
+ def navigate_with_slider(processed_data, target_view):
1725
+ """Navigate to specified view using slider"""
1726
+ if processed_data is None or len(processed_data) == 0:
1727
+ return None, update_view_info(1, 1)
1728
+
1729
+ # Check if target_view is None or invalid value, and safely convert to int
1730
+ try:
1731
+ if target_view is None:
1732
+ target_view = 1
1733
+ else:
1734
+ target_view = int(float(target_view)) # Convert to float first then int, handle decimal input
1735
+ except (ValueError, TypeError):
1736
+ target_view = 1
1737
+
1738
+ total_views = len(processed_data)
1739
+ # Ensure view index is within valid range
1740
+ view_index = max(1, min(target_view, total_views)) - 1
1741
+
1742
+ # Update depth map
1743
+ depth_vis = update_depth_view(processed_data, view_index)
1744
+
1745
+ # Update view information
1746
+ info_html = update_view_info(view_index + 1, total_views)
1747
+
1748
+ return depth_vis, info_html
1749
+
1750
+ def navigate_with_slider_normal(processed_data, target_view):
1751
+ """Navigate to specified normal view using slider"""
1752
+ if processed_data is None or len(processed_data) == 0:
1753
+ return None, update_view_info(1, 1, "Normal")
1754
+
1755
+ # Check if target_view is None or invalid value, and safely convert to int
1756
+ try:
1757
+ if target_view is None:
1758
+ target_view = 1
1759
+ else:
1760
+ target_view = int(float(target_view)) # Convert to float first then int, handle decimal input
1761
+ except (ValueError, TypeError):
1762
+ target_view = 1
1763
+
1764
+ total_views = len(processed_data)
1765
+ # Ensure view index is within valid range
1766
+ view_index = max(1, min(target_view, total_views)) - 1
1767
+
1768
+ # Update normal map
1769
+ normal_vis = update_normal_view(processed_data, view_index)
1770
+
1771
+ # Update view information
1772
+ info_html = update_view_info(view_index + 1, total_views, "Normal")
1773
+
1774
+ return normal_vis, info_html
1775
+
1776
+ def handle_depth_slider_change(processed_data, target_view):
1777
+ return navigate_with_slider(processed_data, target_view)
1778
+
1779
+ def handle_normal_slider_change(processed_data, target_view):
1780
+ return navigate_with_slider_normal(processed_data, target_view)
1781
+
1782
+ depth_view_slider.change(
1783
+ fn=handle_depth_slider_change,
1784
+ inputs=[processed_data_state, depth_view_slider],
1785
+ outputs=[depth_map, depth_view_info]
1786
+ )
1787
+
1788
+ normal_view_slider.change(
1789
+ fn=handle_normal_slider_change,
1790
+ inputs=[processed_data_state, normal_view_slider],
1791
+ outputs=[normal_map, normal_view_info]
1792
+ )
1793
+
1794
+ # -------------------------------------------------------------------------
1795
+ # Real-time terminal output update
1796
+ # -------------------------------------------------------------------------
1797
+ # Use a timer to periodically update terminal output
1798
+ timer = gr.Timer(value=0.5) # Update every 0.5 seconds
1799
+ timer.tick(
1800
+ fn=get_terminal_output,
1801
+ inputs=[],
1802
+ outputs=[terminal_output]
1803
+ )
1804
+
1805
+ gr.HTML("""
1806
+ <hr style="margin-top: 40px; margin-bottom: 20px;">
1807
+ <div style="text-align: center; font-size: 14px; color: #666; margin-bottom: 20px;">
1808
+ <h3>Acknowledgements</h3>
1809
+ <p>🔗 <a href="https://github.com/microsoft/MoGe">MoGe2 on HuggingFace</a> | 🔗 <a href="https://github.com/facebookresearch/vggt">VGGT on HuggingFace</a></p>
1810
+ </div>
1811
+ """)
1812
+
1813
+ demo.queue().launch(
1814
+ show_error=True,
1815
+ share=True,
1816
+ ssr_mode=False,
1817
+ )
examples/realistic/Archway_Tunnel/image_0001.jpg ADDED
examples/realistic/Archway_Tunnel/image_0030.jpg ADDED
examples/realistic/Bright_Room/image_0001.jpg ADDED
examples/realistic/Bright_Room/image_0035.jpg ADDED
examples/realistic/Desk/530554609_3367433673396747_2161028887770608277_n.jpg ADDED
examples/realistic/Desk/532328457_1311198870420578_2167456836351167380_n.jpg ADDED

Git LFS Details

  • SHA256: dc595d2d32d5a87edb75f3e76d08e2ce81a0a13deb4921736263fd96f6d08288
  • Pointer size: 131 Bytes
  • Size of remote file: 108 kB
examples/realistic/Dining_Table/image_0001.jpg ADDED
examples/realistic/Dining_Table/image_0008.jpg ADDED
examples/realistic/Dining_Table/image_0012.jpg ADDED
examples/realistic/Dining_Table/image_0016.jpg ADDED
examples/realistic/Dino/528883410_1456464302336597_4114529568612559572_n.jpg ADDED
examples/realistic/Dino/530182709_1122456693282934_3373468492106282632_n.jpg ADDED
examples/realistic/Dino/532847807_1055021109949229_8315548832183031452_n.jpg ADDED
examples/realistic/Festival/image_0001.jpg ADDED
examples/realistic/Festival/image_0023.jpg ADDED
examples/realistic/Festival/image_0046.jpg ADDED
examples/realistic/Flower/image_0001.jpg ADDED
examples/realistic/Great_Wall/great_wall_000000.jpg ADDED
examples/realistic/Great_Wall/great_wall_000001.jpg ADDED
examples/realistic/Great_Wall/great_wall_000002.jpg ADDED
examples/realistic/Great_Wall/great_wall_000003.jpg ADDED
examples/realistic/Great_Wall/great_wall_000004.jpg ADDED
examples/realistic/Great_Wall/great_wall_000005.jpg ADDED
examples/realistic/Great_Wall/great_wall_000006.jpg ADDED
examples/realistic/Great_Wall/great_wall_000007.jpg ADDED
examples/realistic/Great_Wall/great_wall_000008.jpg ADDED
examples/realistic/Great_Wall/great_wall_000009.jpg ADDED
examples/realistic/Great_Wall/great_wall_000010.jpg ADDED
examples/realistic/Great_Wall/great_wall_000011.jpg ADDED
examples/realistic/Hall/image_0001.jpg ADDED
examples/realistic/Hall/image_0027.jpg ADDED
examples/realistic/Ireland_Landscape/image_0001.jpg ADDED
examples/realistic/Ireland_Landscape/image_0007.jpg ADDED
examples/realistic/Ireland_Landscape/image_0010.jpg ADDED
examples/realistic/Ireland_Landscape/image_0017.jpg ADDED
examples/realistic/Ireland_Landscape/image_0022.jpg ADDED
examples/realistic/Ireland_Landscape/image_0026.jpg ADDED
examples/realistic/Lego_Kitchen/00.jpg ADDED
examples/realistic/Lego_Kitchen/01.jpg ADDED
examples/realistic/Lego_Kitchen/02.jpg ADDED
examples/realistic/Lego_Kitchen/03.jpg ADDED
examples/realistic/Lego_Kitchen/04.jpg ADDED
examples/realistic/Lego_Kitchen/05.jpg ADDED
examples/realistic/Lego_Kitchen/06.jpg ADDED