Upload folder using huggingface_hub
Browse files- .gitignore +4 -0
- LICENSE +201 -0
- Stacking_Ensemble/catboost_info/learn/events.out.tfevents +3 -0
- Stacking_Ensemble/catboost_info/learn_error.tsv +101 -0
- Stacking_Ensemble/catboost_info/time_left.tsv +101 -0
- Stacking_Ensemble/super_robust.py +345 -0
- visualization/final_result_visualization.R +73 -0
- visualization/fold_visualization.R +73 -0
- visualization/label_tables.R +59 -0
.gitignore
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.env
|
| 2 |
+
*.pkl
|
| 3 |
+
*.json
|
| 4 |
+
__pycache__/
|
LICENSE
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Apache License
|
| 2 |
+
Version 2.0, January 2004
|
| 3 |
+
http://www.apache.org/licenses/
|
| 4 |
+
|
| 5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
| 6 |
+
|
| 7 |
+
1. Definitions.
|
| 8 |
+
|
| 9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
| 10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
| 11 |
+
|
| 12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
| 13 |
+
the copyright owner that is granting the License.
|
| 14 |
+
|
| 15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
| 16 |
+
other entities that control, are controlled by, or are under common
|
| 17 |
+
control with that entity. For the purposes of this definition,
|
| 18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
| 19 |
+
direction or management of such entity, whether by contract or
|
| 20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
| 21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
| 22 |
+
|
| 23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
| 24 |
+
exercising permissions granted by this License.
|
| 25 |
+
|
| 26 |
+
"Source" form shall mean the preferred form for making modifications,
|
| 27 |
+
including but not limited to software source code, documentation
|
| 28 |
+
source, and configuration files.
|
| 29 |
+
|
| 30 |
+
"Object" form shall mean any form resulting from mechanical
|
| 31 |
+
transformation or translation of a Source form, including but
|
| 32 |
+
not limited to compiled object code, generated documentation,
|
| 33 |
+
and conversions to other media types.
|
| 34 |
+
|
| 35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
| 36 |
+
Object form, made available under the License, as indicated by a
|
| 37 |
+
copyright notice that is included in or attached to the work
|
| 38 |
+
(an example is provided in the Appendix below).
|
| 39 |
+
|
| 40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
| 41 |
+
form, that is based on (or derived from) the Work and for which the
|
| 42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
| 43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
| 44 |
+
of this License, Derivative Works shall not include works that remain
|
| 45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
| 46 |
+
the Work and Derivative Works thereof.
|
| 47 |
+
|
| 48 |
+
"Contribution" shall mean any work of authorship, including
|
| 49 |
+
the original version of the Work and any modifications or additions
|
| 50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
| 51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
| 52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
| 53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
| 54 |
+
means any form of electronic, verbal, or written communication sent
|
| 55 |
+
to the Licensor or its representatives, including but not limited to
|
| 56 |
+
communication on electronic mailing lists, source code control systems,
|
| 57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
| 58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
| 59 |
+
excluding communication that is conspicuously marked or otherwise
|
| 60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
| 61 |
+
|
| 62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
| 63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
| 64 |
+
subsequently incorporated within the Work.
|
| 65 |
+
|
| 66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
| 67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
| 70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
| 71 |
+
Work and such Derivative Works in Source or Object form.
|
| 72 |
+
|
| 73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
| 74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 76 |
+
(except as stated in this section) patent license to make, have made,
|
| 77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
| 78 |
+
where such license applies only to those patent claims licensable
|
| 79 |
+
by such Contributor that are necessarily infringed by their
|
| 80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
| 81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
| 82 |
+
institute patent litigation against any entity (including a
|
| 83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
| 84 |
+
or a Contribution incorporated within the Work constitutes direct
|
| 85 |
+
or contributory patent infringement, then any patent licenses
|
| 86 |
+
granted to You under this License for that Work shall terminate
|
| 87 |
+
as of the date such litigation is filed.
|
| 88 |
+
|
| 89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
| 90 |
+
Work or Derivative Works thereof in any medium, with or without
|
| 91 |
+
modifications, and in Source or Object form, provided that You
|
| 92 |
+
meet the following conditions:
|
| 93 |
+
|
| 94 |
+
(a) You must give any other recipients of the Work or
|
| 95 |
+
Derivative Works a copy of this License; and
|
| 96 |
+
|
| 97 |
+
(b) You must cause any modified files to carry prominent notices
|
| 98 |
+
stating that You changed the files; and
|
| 99 |
+
|
| 100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
| 101 |
+
that You distribute, all copyright, patent, trademark, and
|
| 102 |
+
attribution notices from the Source form of the Work,
|
| 103 |
+
excluding those notices that do not pertain to any part of
|
| 104 |
+
the Derivative Works; and
|
| 105 |
+
|
| 106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
| 107 |
+
distribution, then any Derivative Works that You distribute must
|
| 108 |
+
include a readable copy of the attribution notices contained
|
| 109 |
+
within such NOTICE file, excluding those notices that do not
|
| 110 |
+
pertain to any part of the Derivative Works, in at least one
|
| 111 |
+
of the following places: within a NOTICE text file distributed
|
| 112 |
+
as part of the Derivative Works; within the Source form or
|
| 113 |
+
documentation, if provided along with the Derivative Works; or,
|
| 114 |
+
within a display generated by the Derivative Works, if and
|
| 115 |
+
wherever such third-party notices normally appear. The contents
|
| 116 |
+
of the NOTICE file are for informational purposes only and
|
| 117 |
+
do not modify the License. You may add Your own attribution
|
| 118 |
+
notices within Derivative Works that You distribute, alongside
|
| 119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
| 120 |
+
that such additional attribution notices cannot be construed
|
| 121 |
+
as modifying the License.
|
| 122 |
+
|
| 123 |
+
You may add Your own copyright statement to Your modifications and
|
| 124 |
+
may provide additional or different license terms and conditions
|
| 125 |
+
for use, reproduction, or distribution of Your modifications, or
|
| 126 |
+
for any such Derivative Works as a whole, provided Your use,
|
| 127 |
+
reproduction, and distribution of the Work otherwise complies with
|
| 128 |
+
the conditions stated in this License.
|
| 129 |
+
|
| 130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
| 131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
| 132 |
+
by You to the Licensor shall be under the terms and conditions of
|
| 133 |
+
this License, without any additional terms or conditions.
|
| 134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
| 135 |
+
the terms of any separate license agreement you may have executed
|
| 136 |
+
with Licensor regarding such Contributions.
|
| 137 |
+
|
| 138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
| 139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
| 140 |
+
except as required for reasonable and customary use in describing the
|
| 141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
| 142 |
+
|
| 143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
| 144 |
+
agreed to in writing, Licensor provides the Work (and each
|
| 145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
| 146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
| 147 |
+
implied, including, without limitation, any warranties or conditions
|
| 148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
| 149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
| 150 |
+
appropriateness of using or redistributing the Work and assume any
|
| 151 |
+
risks associated with Your exercise of permissions under this License.
|
| 152 |
+
|
| 153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
| 154 |
+
whether in tort (including negligence), contract, or otherwise,
|
| 155 |
+
unless required by applicable law (such as deliberate and grossly
|
| 156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
| 157 |
+
liable to You for damages, including any direct, indirect, special,
|
| 158 |
+
incidental, or consequential damages of any character arising as a
|
| 159 |
+
result of this License or out of the use or inability to use the
|
| 160 |
+
Work (including but not limited to damages for loss of goodwill,
|
| 161 |
+
work stoppage, computer failure or malfunction, or any and all
|
| 162 |
+
other commercial damages or losses), even if such Contributor
|
| 163 |
+
has been advised of the possibility of such damages.
|
| 164 |
+
|
| 165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
| 166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
| 167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
| 168 |
+
or other liability obligations and/or rights consistent with this
|
| 169 |
+
License. However, in accepting such obligations, You may act only
|
| 170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
| 171 |
+
of any other Contributor, and only if You agree to indemnify,
|
| 172 |
+
defend, and hold each Contributor harmless for any liability
|
| 173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
| 174 |
+
of your accepting any such warranty or additional liability.
|
| 175 |
+
|
| 176 |
+
END OF TERMS AND CONDITIONS
|
| 177 |
+
|
| 178 |
+
APPENDIX: How to apply the Apache License to your work.
|
| 179 |
+
|
| 180 |
+
To apply the Apache License to your work, attach the following
|
| 181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
| 182 |
+
replaced with your own identifying information. (Don't include
|
| 183 |
+
the brackets!) The text should be enclosed in the appropriate
|
| 184 |
+
comment syntax for the file format. We also recommend that a
|
| 185 |
+
file or class name and description of purpose be included on the
|
| 186 |
+
same "printed page" as the copyright notice for easier
|
| 187 |
+
identification within third-party archives.
|
| 188 |
+
|
| 189 |
+
Copyright [yyyy] [name of copyright owner]
|
| 190 |
+
|
| 191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 192 |
+
you may not use this file except in compliance with the License.
|
| 193 |
+
You may obtain a copy of the License at
|
| 194 |
+
|
| 195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 196 |
+
|
| 197 |
+
Unless required by applicable law or agreed to in writing, software
|
| 198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 200 |
+
See the License for the specific language governing permissions and
|
| 201 |
+
limitations under the License.
|
Stacking_Ensemble/catboost_info/learn/events.out.tfevents
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:805c85d02f04e1b8da81c15ab711f90119c9726938d2aac1546f0ae88cfee78d
|
| 3 |
+
size 5998
|
Stacking_Ensemble/catboost_info/learn_error.tsv
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
iter MultiClass
|
| 2 |
+
0 1.488181985
|
| 3 |
+
1 1.175472709
|
| 4 |
+
2 0.9714606556
|
| 5 |
+
3 0.822204168
|
| 6 |
+
4 0.7066984775
|
| 7 |
+
5 0.6135547206
|
| 8 |
+
6 0.5355362088
|
| 9 |
+
7 0.4698605988
|
| 10 |
+
8 0.4146191713
|
| 11 |
+
9 0.3667849828
|
| 12 |
+
10 0.3261929774
|
| 13 |
+
11 0.2908549268
|
| 14 |
+
12 0.260042417
|
| 15 |
+
13 0.2327451858
|
| 16 |
+
14 0.2087759212
|
| 17 |
+
15 0.1877009929
|
| 18 |
+
16 0.1689697491
|
| 19 |
+
17 0.1522890739
|
| 20 |
+
18 0.1372164668
|
| 21 |
+
19 0.1238374094
|
| 22 |
+
20 0.1120119492
|
| 23 |
+
21 0.1013892871
|
| 24 |
+
22 0.09182722355
|
| 25 |
+
23 0.08338165797
|
| 26 |
+
24 0.07576687175
|
| 27 |
+
25 0.06886970876
|
| 28 |
+
26 0.06268082522
|
| 29 |
+
27 0.05716733145
|
| 30 |
+
28 0.05217468551
|
| 31 |
+
29 0.0476568518
|
| 32 |
+
30 0.04346803928
|
| 33 |
+
31 0.03974352467
|
| 34 |
+
32 0.0363756203
|
| 35 |
+
33 0.03339336174
|
| 36 |
+
34 0.03065356329
|
| 37 |
+
35 0.02817926567
|
| 38 |
+
36 0.02587410778
|
| 39 |
+
37 0.02389808264
|
| 40 |
+
38 0.02206780387
|
| 41 |
+
39 0.02037104102
|
| 42 |
+
40 0.01889452817
|
| 43 |
+
41 0.01760071126
|
| 44 |
+
42 0.01633396123
|
| 45 |
+
43 0.0152895552
|
| 46 |
+
44 0.01424559459
|
| 47 |
+
45 0.01327303972
|
| 48 |
+
46 0.0124083342
|
| 49 |
+
47 0.01171990456
|
| 50 |
+
48 0.01108572132
|
| 51 |
+
49 0.01055402737
|
| 52 |
+
50 0.009960500889
|
| 53 |
+
51 0.0093607482
|
| 54 |
+
52 0.00882983384
|
| 55 |
+
53 0.008336367655
|
| 56 |
+
54 0.008013916605
|
| 57 |
+
55 0.007621786211
|
| 58 |
+
56 0.007266206996
|
| 59 |
+
57 0.006893280758
|
| 60 |
+
58 0.006504591733
|
| 61 |
+
59 0.00618290818
|
| 62 |
+
60 0.005889986567
|
| 63 |
+
61 0.005563291532
|
| 64 |
+
62 0.005298557961
|
| 65 |
+
63 0.005087143587
|
| 66 |
+
64 0.004938276785
|
| 67 |
+
65 0.004628865543
|
| 68 |
+
66 0.004477967255
|
| 69 |
+
67 0.004348256068
|
| 70 |
+
68 0.004121486135
|
| 71 |
+
69 0.003937261659
|
| 72 |
+
70 0.003791577086
|
| 73 |
+
71 0.003666238063
|
| 74 |
+
72 0.003529822238
|
| 75 |
+
73 0.003393230571
|
| 76 |
+
74 0.003212297574
|
| 77 |
+
75 0.003042950395
|
| 78 |
+
76 0.002914422938
|
| 79 |
+
77 0.002762845206
|
| 80 |
+
78 0.002650832149
|
| 81 |
+
79 0.002554477695
|
| 82 |
+
80 0.002518738147
|
| 83 |
+
81 0.002437060784
|
| 84 |
+
82 0.002398997496
|
| 85 |
+
83 0.002315308546
|
| 86 |
+
84 0.002259881865
|
| 87 |
+
85 0.00218425362
|
| 88 |
+
86 0.00209584122
|
| 89 |
+
87 0.002055078497
|
| 90 |
+
88 0.001999753615
|
| 91 |
+
89 0.001930903634
|
| 92 |
+
90 0.001895621403
|
| 93 |
+
91 0.001834844434
|
| 94 |
+
92 0.001794619572
|
| 95 |
+
93 0.001753839332
|
| 96 |
+
94 0.001741076217
|
| 97 |
+
95 0.00170799159
|
| 98 |
+
96 0.001674756988
|
| 99 |
+
97 0.0016233309
|
| 100 |
+
98 0.0016049431
|
| 101 |
+
99 0.001573250279
|
Stacking_Ensemble/catboost_info/time_left.tsv
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
iter Passed Remaining
|
| 2 |
+
0 3189 315790
|
| 3 |
+
1 6366 311964
|
| 4 |
+
2 9516 307695
|
| 5 |
+
3 12758 306199
|
| 6 |
+
4 15910 302298
|
| 7 |
+
5 18993 297572
|
| 8 |
+
6 22008 292400
|
| 9 |
+
7 25044 288007
|
| 10 |
+
8 28146 284587
|
| 11 |
+
9 31222 281004
|
| 12 |
+
10 34412 278431
|
| 13 |
+
11 37494 274962
|
| 14 |
+
12 40623 271866
|
| 15 |
+
13 43784 268959
|
| 16 |
+
14 46955 266083
|
| 17 |
+
15 50039 262709
|
| 18 |
+
16 53083 259170
|
| 19 |
+
17 56137 255737
|
| 20 |
+
18 59190 252337
|
| 21 |
+
19 62309 249237
|
| 22 |
+
20 65330 245766
|
| 23 |
+
21 68397 242501
|
| 24 |
+
22 71456 239225
|
| 25 |
+
23 74525 235996
|
| 26 |
+
24 77557 232672
|
| 27 |
+
25 80597 229391
|
| 28 |
+
26 83622 226090
|
| 29 |
+
27 86695 222931
|
| 30 |
+
28 89811 219884
|
| 31 |
+
29 92830 216605
|
| 32 |
+
30 95940 213546
|
| 33 |
+
31 98947 210263
|
| 34 |
+
32 102003 207098
|
| 35 |
+
33 105030 203881
|
| 36 |
+
34 108046 200657
|
| 37 |
+
35 111164 197626
|
| 38 |
+
36 114308 194633
|
| 39 |
+
37 117508 191724
|
| 40 |
+
38 120567 188580
|
| 41 |
+
39 123595 185393
|
| 42 |
+
40 126649 182251
|
| 43 |
+
41 129663 179059
|
| 44 |
+
42 132711 175920
|
| 45 |
+
43 135737 172756
|
| 46 |
+
44 138752 169586
|
| 47 |
+
45 141788 166447
|
| 48 |
+
46 144842 163332
|
| 49 |
+
47 147950 160279
|
| 50 |
+
48 151031 157196
|
| 51 |
+
49 154130 154130
|
| 52 |
+
50 157266 151098
|
| 53 |
+
51 160354 148019
|
| 54 |
+
52 163469 144963
|
| 55 |
+
53 166478 141814
|
| 56 |
+
54 170027 139113
|
| 57 |
+
55 173303 136166
|
| 58 |
+
56 176950 133489
|
| 59 |
+
57 180097 130415
|
| 60 |
+
58 183294 127374
|
| 61 |
+
59 186443 124295
|
| 62 |
+
60 189525 121171
|
| 63 |
+
61 192592 118040
|
| 64 |
+
62 195659 114910
|
| 65 |
+
63 198735 111788
|
| 66 |
+
64 201729 108623
|
| 67 |
+
65 204776 105490
|
| 68 |
+
66 207807 102353
|
| 69 |
+
67 210916 99254
|
| 70 |
+
68 214012 96150
|
| 71 |
+
69 217023 93010
|
| 72 |
+
70 220109 89904
|
| 73 |
+
71 223145 86778
|
| 74 |
+
72 226232 83675
|
| 75 |
+
73 229335 80577
|
| 76 |
+
74 232447 77482
|
| 77 |
+
75 235576 74392
|
| 78 |
+
76 238931 71369
|
| 79 |
+
77 242198 68312
|
| 80 |
+
78 245371 65225
|
| 81 |
+
79 248630 62157
|
| 82 |
+
80 251637 59026
|
| 83 |
+
81 254642 55897
|
| 84 |
+
82 257755 52793
|
| 85 |
+
83 261438 49797
|
| 86 |
+
84 265004 46765
|
| 87 |
+
85 268424 43696
|
| 88 |
+
86 271569 40579
|
| 89 |
+
87 274816 37474
|
| 90 |
+
88 278062 34367
|
| 91 |
+
89 281225 31247
|
| 92 |
+
90 284462 28133
|
| 93 |
+
91 287787 25024
|
| 94 |
+
92 291430 21935
|
| 95 |
+
93 294880 18822
|
| 96 |
+
94 297937 15680
|
| 97 |
+
95 301147 12547
|
| 98 |
+
96 304389 9414
|
| 99 |
+
97 307712 6279
|
| 100 |
+
98 311401 3145
|
| 101 |
+
99 314589 0
|
Stacking_Ensemble/super_robust.py
ADDED
|
@@ -0,0 +1,345 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
stacking_ensemble_safe.py (FINAL EXTENDED + FULL METRICS)
|
| 4 |
+
Stacking Ensemble: XGBoost, CatBoost, LightGBM, AdaBoost + RandomForest Meta Model
|
| 5 |
+
|
| 6 |
+
Features:
|
| 7 |
+
- Safe GPU fallback
|
| 8 |
+
- Full metrics logging (accuracy, precision, recall, f1, percentage, etc.)
|
| 9 |
+
- JSON-compatible for R Spider Chart
|
| 10 |
+
- Auto robustness_score & fold_variance
|
| 11 |
+
- Handles NaN, inf, weird column names, and file I/O issues
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import os, json, time, warnings, argparse, gc
|
| 15 |
+
from huggingface_hub import HfApi, upload_file, create_repo
|
| 16 |
+
import shutil
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
import pandas as pd
|
| 19 |
+
import numpy as np
|
| 20 |
+
from sklearn.model_selection import train_test_split, StratifiedKFold
|
| 21 |
+
from sklearn.preprocessing import LabelEncoder
|
| 22 |
+
from sklearn.impute import SimpleImputer
|
| 23 |
+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
| 24 |
+
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
|
| 25 |
+
from sklearn.tree import DecisionTreeClassifier
|
| 26 |
+
from xgboost import XGBClassifier
|
| 27 |
+
from catboost import CatBoostClassifier
|
| 28 |
+
import lightgbm as lgb
|
| 29 |
+
import joblib
|
| 30 |
+
|
| 31 |
+
warnings.filterwarnings("ignore")
|
| 32 |
+
|
| 33 |
+
# ==============================================================
|
| 34 |
+
# SAFE LOADING
|
| 35 |
+
# ==============================================================
|
| 36 |
+
def load_dataset(path, max_rows=500000):
|
| 37 |
+
ext = Path(path).suffix.lower()
|
| 38 |
+
print(f"[load_dataset] Loading: {path}")
|
| 39 |
+
try:
|
| 40 |
+
if ext == ".csv":
|
| 41 |
+
try:
|
| 42 |
+
df = pd.read_csv(path)
|
| 43 |
+
except MemoryError:
|
| 44 |
+
print(f"[load_dataset] MemoryError — loading first {max_rows} rows.")
|
| 45 |
+
df = pd.read_csv(path, nrows=max_rows)
|
| 46 |
+
elif ext in [".parquet", ".pq", ".parq"]:
|
| 47 |
+
df = pd.read_parquet(path)
|
| 48 |
+
else:
|
| 49 |
+
raise ValueError("Unsupported file format")
|
| 50 |
+
except Exception as e:
|
| 51 |
+
raise RuntimeError(f"[load_dataset] Failed to load dataset: {e}")
|
| 52 |
+
print(f"[load_dataset] Loaded {len(df)} rows × {len(df.columns)} columns.")
|
| 53 |
+
return df
|
| 54 |
+
|
| 55 |
+
# ==============================================================
|
| 56 |
+
# SANITIZE FEATURE NAMES
|
| 57 |
+
# ==============================================================
|
| 58 |
+
def sanitize_feature_names(df):
|
| 59 |
+
original = df.columns.tolist()
|
| 60 |
+
df.columns = (
|
| 61 |
+
df.columns.astype(str)
|
| 62 |
+
.str.replace(r'[^A-Za-z0-9_]+', '_', regex=True)
|
| 63 |
+
.str.strip('_')
|
| 64 |
+
)
|
| 65 |
+
renamed = {o: n for o, n in zip(original, df.columns) if o != n}
|
| 66 |
+
if renamed:
|
| 67 |
+
print(f"[sanitize_feature_names] Renamed {len(renamed)} columns for LightGBM safety.")
|
| 68 |
+
return df
|
| 69 |
+
|
| 70 |
+
# ==============================================================
|
| 71 |
+
# TARGET DETECTION
|
| 72 |
+
# ==============================================================
|
| 73 |
+
def detect_target_column(df):
|
| 74 |
+
candidates = ["label", "target", "class", "category", "attack", "output", "y"]
|
| 75 |
+
for c in df.columns:
|
| 76 |
+
if c.lower() in candidates:
|
| 77 |
+
return c
|
| 78 |
+
for c in df.columns:
|
| 79 |
+
if df[c].nunique() <= 50:
|
| 80 |
+
return c
|
| 81 |
+
return df.columns[-1]
|
| 82 |
+
|
| 83 |
+
# ==============================================================
|
| 84 |
+
# DATA PREP
|
| 85 |
+
# ==============================================================
|
| 86 |
+
def prep_data(df, target=None):
|
| 87 |
+
if target is None:
|
| 88 |
+
target = detect_target_column(df)
|
| 89 |
+
y = df[target]
|
| 90 |
+
X = df.drop(columns=[target])
|
| 91 |
+
|
| 92 |
+
le = LabelEncoder()
|
| 93 |
+
y = le.fit_transform(y.astype(str))
|
| 94 |
+
|
| 95 |
+
for col in X.select_dtypes(include=["object", "bool"]).columns:
|
| 96 |
+
X[col] = LabelEncoder().fit_transform(X[col].astype(str))
|
| 97 |
+
|
| 98 |
+
X = X.replace([np.inf, -np.inf], np.nan)
|
| 99 |
+
X = pd.DataFrame(SimpleImputer(strategy="mean").fit_transform(X), columns=X.columns)
|
| 100 |
+
X = sanitize_feature_names(X)
|
| 101 |
+
return X, y, target, le
|
| 102 |
+
|
| 103 |
+
# ==============================================================
|
| 104 |
+
# TRAIN BASE MODELS
|
| 105 |
+
# ==============================================================
|
| 106 |
+
def train_base_models(X_train, y_train, X_val):
|
| 107 |
+
try:
|
| 108 |
+
import cupy
|
| 109 |
+
gpu_ok = cupy.cuda.runtime.getDeviceCount() > 0
|
| 110 |
+
except Exception:
|
| 111 |
+
gpu_ok = False
|
| 112 |
+
|
| 113 |
+
device = "gpu" if gpu_ok else "cpu"
|
| 114 |
+
print(f"[train_base_models] Using {device.upper()} mode")
|
| 115 |
+
|
| 116 |
+
models, preds, times = {}, {}, {}
|
| 117 |
+
num_cls = len(np.unique(y_train))
|
| 118 |
+
|
| 119 |
+
def safe_train(name, fn):
|
| 120 |
+
try:
|
| 121 |
+
start = time.perf_counter()
|
| 122 |
+
print(f"[train_base_models] Training {name} ...")
|
| 123 |
+
m = fn()
|
| 124 |
+
dur = round(time.perf_counter() - start, 2)
|
| 125 |
+
times[name.lower()] = dur
|
| 126 |
+
print(f"[train_base_models] {name} done in {dur:.2f}s")
|
| 127 |
+
return m
|
| 128 |
+
except Exception as e:
|
| 129 |
+
print(f"[train_base_models] {name} failed: {e}")
|
| 130 |
+
times[name] = 0.0
|
| 131 |
+
return None
|
| 132 |
+
|
| 133 |
+
# XGBoost
|
| 134 |
+
xgb_fn = lambda: XGBClassifier(
|
| 135 |
+
n_estimators=50, learning_rate=0.3, max_depth=4,
|
| 136 |
+
tree_method="gpu_hist" if gpu_ok else "hist",
|
| 137 |
+
objective="binary:logistic" if num_cls == 2 else "multi:softmax",
|
| 138 |
+
num_class=num_cls if num_cls > 2 else None,
|
| 139 |
+
use_label_encoder=False, eval_metric="logloss", random_state=42, verbosity=0
|
| 140 |
+
).fit(X_train, y_train)
|
| 141 |
+
xgb = safe_train("XGBoost", xgb_fn)
|
| 142 |
+
if xgb: preds["xgboost"] = xgb.predict(X_val); models["xgboost"] = xgb
|
| 143 |
+
|
| 144 |
+
# CatBoost
|
| 145 |
+
cat_fn = lambda: CatBoostClassifier(
|
| 146 |
+
iterations=100, learning_rate=0.1, depth=6,
|
| 147 |
+
loss_function="Logloss" if num_cls == 2 else "MultiClass",
|
| 148 |
+
task_type="GPU" if gpu_ok else "CPU", verbose=False, random_seed=42
|
| 149 |
+
).fit(X_train, y_train)
|
| 150 |
+
cat = safe_train("CatBoost", cat_fn)
|
| 151 |
+
if cat: preds["catboost"] = cat.predict(X_val); models["catboost"] = cat
|
| 152 |
+
|
| 153 |
+
# LightGBM
|
| 154 |
+
lgb_fn = lambda: lgb.LGBMClassifier(
|
| 155 |
+
n_estimators=50, learning_rate=0.3, max_depth=4,
|
| 156 |
+
device="gpu" if gpu_ok else "cpu",
|
| 157 |
+
objective="binary" if num_cls == 2 else "multiclass",
|
| 158 |
+
num_class=num_cls if num_cls > 2 else None, random_state=42
|
| 159 |
+
).fit(X_train, y_train)
|
| 160 |
+
lgbm = safe_train("LightGBM", lgb_fn)
|
| 161 |
+
if lgbm: preds["lightgbm"] = lgbm.predict(X_val); models["lightgbm"] = lgbm
|
| 162 |
+
|
| 163 |
+
# AdaBoost
|
| 164 |
+
ada_fn = lambda: AdaBoostClassifier(
|
| 165 |
+
estimator=DecisionTreeClassifier(max_depth=3),
|
| 166 |
+
n_estimators=50, random_state=42
|
| 167 |
+
).fit(X_train, y_train)
|
| 168 |
+
ada = safe_train("AdaBoost", ada_fn)
|
| 169 |
+
if ada: preds["adaboost"] = ada.predict(X_val); models["adaboost"] = ada
|
| 170 |
+
|
| 171 |
+
gc.collect()
|
| 172 |
+
return models, preds, times
|
| 173 |
+
|
| 174 |
+
# ==============================================================
|
| 175 |
+
# OOF STACKING (WITH FULL METRICS)
|
| 176 |
+
# ==============================================================
|
| 177 |
+
def oof_stacking(X, y, n_folds=5):
|
| 178 |
+
skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
|
| 179 |
+
oof, folds = {}, []
|
| 180 |
+
for k in ["xgboost", "catboost", "lightgbm", "adaboost"]:
|
| 181 |
+
oof[k] = np.zeros(len(y), dtype=np.int32)
|
| 182 |
+
|
| 183 |
+
for i, (tr, val) in enumerate(skf.split(X, y), start=1):
|
| 184 |
+
print(f"\n[oof_stacking] ==== Fold {i}/{n_folds} ====")
|
| 185 |
+
X_tr, X_val, y_tr, y_val = X.iloc[tr], X.iloc[val], y[tr], y[val]
|
| 186 |
+
try:
|
| 187 |
+
models, preds, times = train_base_models(X_tr, y_tr, X_val)
|
| 188 |
+
except Exception as e:
|
| 189 |
+
print(f"[Fold {i}] Fold skipped: {e}")
|
| 190 |
+
continue
|
| 191 |
+
|
| 192 |
+
fold_metrics = {}
|
| 193 |
+
for name, y_pred in preds.items():
|
| 194 |
+
y_pred = np.ravel(y_pred)
|
| 195 |
+
oof[name][val] = y_pred
|
| 196 |
+
acc = accuracy_score(y_val, y_pred)
|
| 197 |
+
pre = precision_score(y_val, y_pred, average='weighted', zero_division=0)
|
| 198 |
+
rec = recall_score(y_val, y_pred, average='weighted', zero_division=0)
|
| 199 |
+
f1v = f1_score(y_val, y_pred, average='weighted', zero_division=0)
|
| 200 |
+
total_v = int((y_pred != 0).sum())
|
| 201 |
+
pct = round(total_v / len(y_pred) * 100, 4)
|
| 202 |
+
is_vul = bool(total_v > 0)
|
| 203 |
+
|
| 204 |
+
fold_metrics[name] = {
|
| 205 |
+
"accuracy": float(acc),
|
| 206 |
+
"precision": float(pre),
|
| 207 |
+
"recall": float(rec),
|
| 208 |
+
"f1": float(f1v),
|
| 209 |
+
"total_vulnerable": total_v,
|
| 210 |
+
"percentage": pct,
|
| 211 |
+
"is_vulnerable": is_vul,
|
| 212 |
+
"train_time_sec": float(times.get(name.lower(), 0.0))
|
| 213 |
+
}
|
| 214 |
+
print(f"[Fold {i}] {name}: acc={acc:.4f}, f1={f1v:.4f}, vuln={pct}%")
|
| 215 |
+
|
| 216 |
+
folds.append({"fold": i, "metrics": fold_metrics})
|
| 217 |
+
print("[oof_stacking] Completed all folds.")
|
| 218 |
+
return oof, folds
|
| 219 |
+
|
| 220 |
+
# ==============================================================
|
| 221 |
+
# META MODEL & EVALUATION
|
| 222 |
+
# ==============================================================
|
| 223 |
+
def train_meta_model(oof_preds, y):
|
| 224 |
+
meta_X = np.column_stack([oof_preds[k] for k in oof_preds])
|
| 225 |
+
meta = RandomForestClassifier(n_estimators=50, random_state=42, max_features="sqrt")
|
| 226 |
+
meta.fit(meta_X, y)
|
| 227 |
+
return meta
|
| 228 |
+
|
| 229 |
+
def evaluate(models, meta, X_test, y_test, times):
|
| 230 |
+
results = {}
|
| 231 |
+
for name, m in models.items():
|
| 232 |
+
y_pred = m.predict(X_test)
|
| 233 |
+
acc = accuracy_score(y_test, y_pred)
|
| 234 |
+
pre = precision_score(y_test, y_pred, average='weighted', zero_division=0)
|
| 235 |
+
rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
|
| 236 |
+
f1v = f1_score(y_test, y_pred, average='weighted', zero_division=0)
|
| 237 |
+
total_v = int((y_pred != 0).sum())
|
| 238 |
+
pct = round(total_v / len(y_pred) * 100, 4)
|
| 239 |
+
is_vul = bool(total_v > 0)
|
| 240 |
+
results[name] = {
|
| 241 |
+
"accuracy": acc, "precision": pre, "recall": rec, "f1": f1v,
|
| 242 |
+
"total_vulnerable": total_v, "percentage": pct, "is_vulnerable": is_vul, "train_time_sec": float(times.get(name.lower(), 0.0))
|
| 243 |
+
}
|
| 244 |
+
print(f"[evaluate] {name}: acc={acc:.4f}, f1={f1v:.4f}, vuln={pct}%")
|
| 245 |
+
|
| 246 |
+
meta_X = np.column_stack([models[k].predict(X_test) for k in models])
|
| 247 |
+
y_meta = meta.predict(meta_X)
|
| 248 |
+
results["meta_model"] = {
|
| 249 |
+
"accuracy": accuracy_score(y_test, y_meta),
|
| 250 |
+
"precision": precision_score(y_test, y_meta, average='weighted', zero_division=0),
|
| 251 |
+
"recall": recall_score(y_test, y_meta, average='weighted', zero_division=0),
|
| 252 |
+
"f1": f1_score(y_test, y_meta, average='weighted', zero_division=0)
|
| 253 |
+
}
|
| 254 |
+
return results
|
| 255 |
+
|
| 256 |
+
# ==============================================================
|
| 257 |
+
# SAVE SUMMARY
|
| 258 |
+
# ==============================================================
|
| 259 |
+
def save_summary_json(outdir, target, nrows, class_labels, folds, results):
|
| 260 |
+
outdir = Path(outdir)
|
| 261 |
+
outdir.mkdir(parents=True, exist_ok=True)
|
| 262 |
+
|
| 263 |
+
# Calculate fold variance & robustness
|
| 264 |
+
fold_acc = [np.mean([m["accuracy"] for m in f["metrics"].values()]) for f in folds]
|
| 265 |
+
fold_variance = float(np.var(fold_acc))
|
| 266 |
+
robustness_score = float(1 - fold_variance)
|
| 267 |
+
|
| 268 |
+
summary = {
|
| 269 |
+
"target_column": target,
|
| 270 |
+
"rows": int(nrows),
|
| 271 |
+
"folds": folds,
|
| 272 |
+
"final_results": results,
|
| 273 |
+
"class_labels": list(class_labels),
|
| 274 |
+
"fold_variance": round(fold_variance, 6),
|
| 275 |
+
"robustness_score": round(robustness_score, 6)
|
| 276 |
+
}
|
| 277 |
+
path = outdir / "summary.json"
|
| 278 |
+
with open(path, "w") as f:
|
| 279 |
+
json.dump(summary, f, indent=2)
|
| 280 |
+
print(f"[save_summary_json] Saved to {path}")
|
| 281 |
+
|
| 282 |
+
# ==============================================================
|
| 283 |
+
# HUGGINGFACE UPLOAD
|
| 284 |
+
# ==============================================================
|
| 285 |
+
|
| 286 |
+
# ==============================================================
|
| 287 |
+
# SAVE MODELS LOCALLY
|
| 288 |
+
# ==============================================================
|
| 289 |
+
def save_models(models, meta_model, outdir):
|
| 290 |
+
model_dir = os.path.join(outdir, "models")
|
| 291 |
+
os.makedirs(model_dir, exist_ok=True)
|
| 292 |
+
|
| 293 |
+
for name, model in models.items():
|
| 294 |
+
joblib.dump(model, os.path.join(model_dir, f"{name}_model.pkl"))
|
| 295 |
+
joblib.dump(meta_model, os.path.join(model_dir, "meta_model.pkl"))
|
| 296 |
+
|
| 297 |
+
print(f"[save_models] All base and meta models saved to {model_dir}")
|
| 298 |
+
return model_dir
|
| 299 |
+
|
| 300 |
+
# ==============================================================
|
| 301 |
+
# MAIN
|
| 302 |
+
# ==============================================================
|
| 303 |
+
def main(args):
|
| 304 |
+
start = time.perf_counter()
|
| 305 |
+
df = load_dataset(args.dataset)
|
| 306 |
+
X, y, target, le = prep_data(df, args.target_label)
|
| 307 |
+
X_train, X_test, y_train, y_test = train_test_split(
|
| 308 |
+
X, y, test_size=args.test_size, random_state=42, stratify=y if len(np.unique(y))>1 else None
|
| 309 |
+
)
|
| 310 |
+
oof_preds, folds = oof_stacking(X_train, y_train, n_folds=args.n_folds)
|
| 311 |
+
meta = train_meta_model(oof_preds, y_train)
|
| 312 |
+
models, _, times = train_base_models(X_train, y_train, X_test)
|
| 313 |
+
results = evaluate(models, meta, X_test, y_test, times)
|
| 314 |
+
|
| 315 |
+
# === Simpan Model dan Hasil Analisis ===
|
| 316 |
+
save_models(models, meta, args.outdir)
|
| 317 |
+
save_summary_json(args.outdir, target, len(df), le.classes_, folds, results)
|
| 318 |
+
|
| 319 |
+
# === Hitung total waktu training dan evaluasi ===
|
| 320 |
+
total_time = round(time.perf_counter() - start, 2)
|
| 321 |
+
print(f"\n Completed in {total_time} sec")
|
| 322 |
+
|
| 323 |
+
# === Simpan ke JSON dengan waktu total ===
|
| 324 |
+
save_summary_json(args.outdir, target, len(df), le.classes_, folds, results)
|
| 325 |
+
|
| 326 |
+
# Tambahkan waktu total ke JSON yang sudah tersimpan
|
| 327 |
+
summary_path = Path(args.outdir) / "summary.json"
|
| 328 |
+
if summary_path.exists():
|
| 329 |
+
with open(summary_path, "r+") as f:
|
| 330 |
+
data = json.load(f)
|
| 331 |
+
data["total_train_time_sec"] = total_time
|
| 332 |
+
f.seek(0)
|
| 333 |
+
json.dump(data, f, indent=2)
|
| 334 |
+
f.truncate()
|
| 335 |
+
print(f"[save_summary_json] total_train_time_sec={total_time} saved.")
|
| 336 |
+
|
| 337 |
+
if __name__ == "__main__":
|
| 338 |
+
p = argparse.ArgumentParser()
|
| 339 |
+
p.add_argument("--dataset", required=True)
|
| 340 |
+
p.add_argument("--outdir", required=True)
|
| 341 |
+
p.add_argument("--target-label", default=None)
|
| 342 |
+
p.add_argument("--test-size", type=float, default=0.2)
|
| 343 |
+
p.add_argument("--n-folds", type=int, default=5)
|
| 344 |
+
args = p.parse_args()
|
| 345 |
+
main(args)
|
visualization/final_result_visualization.R
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
library(jsonlite)
|
| 2 |
+
library(dplyr)
|
| 3 |
+
library(fmsb)
|
| 4 |
+
|
| 5 |
+
# === 1. Baca file JSON ===
|
| 6 |
+
data <- fromJSON("summary.json", simplifyVector = FALSE)
|
| 7 |
+
|
| 8 |
+
# === 2. Ambil final_results ===
|
| 9 |
+
final_results <- data$final_results
|
| 10 |
+
|
| 11 |
+
# === 3. Ubah ke data frame ===
|
| 12 |
+
df_final <- lapply(names(final_results), function(model_name) {
|
| 13 |
+
metric <- final_results[[model_name]]
|
| 14 |
+
data.frame(
|
| 15 |
+
model = model_name,
|
| 16 |
+
accuracy = metric$accuracy,
|
| 17 |
+
precision = metric$precision,
|
| 18 |
+
recall = metric$recall,
|
| 19 |
+
f1 = metric$f1,
|
| 20 |
+
total_vulnerable = metric$total_vulnerable %||% NA,
|
| 21 |
+
percentage = metric$percentage %||% NA,
|
| 22 |
+
is_vulnerable = as.numeric(metric$is_vulnerable %||% NA),
|
| 23 |
+
train_time_sec = metric$train_time_sec %||% NA
|
| 24 |
+
)
|
| 25 |
+
}) %>% bind_rows()
|
| 26 |
+
|
| 27 |
+
print("=== FINAL RESULTS DATA ===")
|
| 28 |
+
print(df_final)
|
| 29 |
+
|
| 30 |
+
# === 4. Normalisasi semua nilai ke skala 0–1 (biar proporsional di radar chart) ===
|
| 31 |
+
norm_df <- df_final
|
| 32 |
+
num_cols <- sapply(norm_df, is.numeric)
|
| 33 |
+
norm_df[num_cols] <- lapply(norm_df[num_cols], function(x) {
|
| 34 |
+
rng <- range(x, na.rm = TRUE)
|
| 35 |
+
if (rng[1] == rng[2]) rep(1, length(x)) else (x - rng[1]) / (rng[2] - rng[1])
|
| 36 |
+
})
|
| 37 |
+
|
| 38 |
+
# === 5. Siapkan data radar chart ===
|
| 39 |
+
radar_data <- norm_df %>% select(-model)
|
| 40 |
+
rownames(radar_data) <- df_final$model
|
| 41 |
+
|
| 42 |
+
max_val <- rep(1, ncol(radar_data))
|
| 43 |
+
min_val <- rep(0, ncol(radar_data))
|
| 44 |
+
radar_plot_data <- rbind(max_val, min_val, radar_data)
|
| 45 |
+
|
| 46 |
+
# === 6. Plot radar chart ===
|
| 47 |
+
colors <- c("red", "blue", "green", "orange", "purple")
|
| 48 |
+
par(mfrow = c(1, 1), mar = c(2, 2, 4, 2))
|
| 49 |
+
|
| 50 |
+
radarchart(
|
| 51 |
+
radar_plot_data,
|
| 52 |
+
axistype = 1,
|
| 53 |
+
pcol = colors,
|
| 54 |
+
plwd = 3,
|
| 55 |
+
plty = 1,
|
| 56 |
+
cglcol = "grey",
|
| 57 |
+
cglty = 1,
|
| 58 |
+
axislabcol = "grey30",
|
| 59 |
+
caxislabels = seq(0, 1, 0.2),
|
| 60 |
+
cglwd = 0.8,
|
| 61 |
+
vlcex = 0.8,
|
| 62 |
+
title = "Final Results – Model Comparison (All Metrics Normalized)"
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
legend(
|
| 66 |
+
"bottomright",
|
| 67 |
+
legend = rownames(radar_data),
|
| 68 |
+
col = colors,
|
| 69 |
+
lty = 1,
|
| 70 |
+
lwd = 3,
|
| 71 |
+
bty = "n",
|
| 72 |
+
cex = 0.8
|
| 73 |
+
)
|
visualization/fold_visualization.R
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
library(jsonlite)
|
| 2 |
+
library(dplyr)
|
| 3 |
+
library(fmsb)
|
| 4 |
+
|
| 5 |
+
# === 1. Baca file JSON ===
|
| 6 |
+
data <- fromJSON("summary.json", simplifyVector = FALSE)
|
| 7 |
+
|
| 8 |
+
# === 2. Ambil fold 1 ===
|
| 9 |
+
fold1 <- data$folds[[5]]
|
| 10 |
+
metrics_list <- fold1$metrics
|
| 11 |
+
|
| 12 |
+
# === 3. Buat data frame semua model di fold 1 ===
|
| 13 |
+
df_fold1 <- lapply(names(metrics_list), function(model_name) {
|
| 14 |
+
metric <- metrics_list[[model_name]]
|
| 15 |
+
data.frame(
|
| 16 |
+
model = model_name,
|
| 17 |
+
accuracy = metric$accuracy,
|
| 18 |
+
precision = metric$precision,
|
| 19 |
+
recall = metric$recall,
|
| 20 |
+
f1 = metric$f1,
|
| 21 |
+
total_vulnerable = metric$total_vulnerable,
|
| 22 |
+
percentage = metric$percentage,
|
| 23 |
+
is_vulnerable = as.numeric(metric$is_vulnerable),
|
| 24 |
+
train_time_sec = metric$train_time_sec
|
| 25 |
+
)
|
| 26 |
+
}) %>% bind_rows()
|
| 27 |
+
|
| 28 |
+
print("=== Data Fold 1 ===")
|
| 29 |
+
print(df_fold1)
|
| 30 |
+
|
| 31 |
+
# === 4. Normalisasi semua kolom numerik ke skala 0–1 (biar radar chart proporsional) ===
|
| 32 |
+
norm_df <- df_fold1
|
| 33 |
+
num_cols <- sapply(norm_df, is.numeric)
|
| 34 |
+
norm_df[num_cols] <- lapply(norm_df[num_cols], function(x) {
|
| 35 |
+
(x - min(x)) / (max(x) - min(x))
|
| 36 |
+
})
|
| 37 |
+
|
| 38 |
+
# === 5. Siapkan data radar chart ===
|
| 39 |
+
radar_data <- norm_df %>% select(-model)
|
| 40 |
+
rownames(radar_data) <- df_fold1$model
|
| 41 |
+
|
| 42 |
+
max_val <- rep(1, ncol(radar_data))
|
| 43 |
+
min_val <- rep(0, ncol(radar_data))
|
| 44 |
+
radar_plot_data <- rbind(max_val, min_val, radar_data)
|
| 45 |
+
|
| 46 |
+
# === 6. Plot radar chart untuk fold 1 ===
|
| 47 |
+
colors <- c("red", "blue", "green", "orange")
|
| 48 |
+
par(mfrow = c(1, 1), mar = c(2, 2, 4, 2))
|
| 49 |
+
|
| 50 |
+
radarchart(
|
| 51 |
+
radar_plot_data,
|
| 52 |
+
axistype = 1,
|
| 53 |
+
pcol = colors,
|
| 54 |
+
plwd = 3,
|
| 55 |
+
plty = 1,
|
| 56 |
+
cglcol = "grey",
|
| 57 |
+
cglty = 1,
|
| 58 |
+
axislabcol = "grey30",
|
| 59 |
+
caxislabels = seq(0, 1, 0.2),
|
| 60 |
+
cglwd = 0.8,
|
| 61 |
+
vlcex = 0.8,
|
| 62 |
+
title = "📊 Fold 1 – Model Comparison (All Metrics Normalized)"
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
legend(
|
| 66 |
+
"bottomright",
|
| 67 |
+
legend = rownames(radar_data),
|
| 68 |
+
col = colors,
|
| 69 |
+
lty = 1,
|
| 70 |
+
lwd = 3,
|
| 71 |
+
bty = "n",
|
| 72 |
+
cex = 0.8
|
| 73 |
+
)
|
visualization/label_tables.R
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# === Load libraries ===
|
| 2 |
+
library(jsonlite)
|
| 3 |
+
library(grid)
|
| 4 |
+
library(gridExtra)
|
| 5 |
+
|
| 6 |
+
# === 1. Baca file JSON ===
|
| 7 |
+
data <- fromJSON("summary.json", simplifyVector = FALSE)
|
| 8 |
+
|
| 9 |
+
# === 2. Ambil metadata ===
|
| 10 |
+
fold_variance <- data$fold_variance
|
| 11 |
+
robustness_score <- data$robustness_score
|
| 12 |
+
total_train_time <- data$total_train_time_sec
|
| 13 |
+
class_labels <- data$class_labels
|
| 14 |
+
table_data <- data.frame(Label = class_labels)
|
| 15 |
+
|
| 16 |
+
# === 3. Layout proporsional ===
|
| 17 |
+
grid.newpage()
|
| 18 |
+
pushViewport(viewport(layout = grid.layout(2, 1, heights = unit(c(1, 2), "null"))))
|
| 19 |
+
|
| 20 |
+
# === 3a. Header teks metadata ===
|
| 21 |
+
pushViewport(viewport(layout.pos.row = 1))
|
| 22 |
+
grid.text("Model Summary", x = 0.5, y = 0.8,
|
| 23 |
+
gp = gpar(fontsize = 16, fontface = "bold"))
|
| 24 |
+
grid.text(sprintf("Fold Variance: %.6f", fold_variance),
|
| 25 |
+
x = 0.5, y = 0.55, gp = gpar(fontsize = 12, col = "#333333"))
|
| 26 |
+
grid.text(sprintf("Robustness Score: %.6f", robustness_score),
|
| 27 |
+
x = 0.5, y = 0.40, gp = gpar(fontsize = 12, col = "#333333"))
|
| 28 |
+
grid.text(sprintf("Total Train Time: %.2f sec", total_train_time),
|
| 29 |
+
x = 0.5, y = 0.25, gp = gpar(fontsize = 12, col = "#333333"))
|
| 30 |
+
popViewport()
|
| 31 |
+
|
| 32 |
+
# === 3b. Tabel class labels di bawah ===
|
| 33 |
+
pushViewport(viewport(layout.pos.row = 2))
|
| 34 |
+
|
| 35 |
+
# Judul class labels — posisikan sedikit lebih dekat ke tabel
|
| 36 |
+
grid.text("Class Labels", x = 0.5, y = 0.78,
|
| 37 |
+
gp = gpar(fontsize = 14, fontface = "bold", col = "#222222"))
|
| 38 |
+
|
| 39 |
+
# Tema tabel elegan dan kompak
|
| 40 |
+
table_theme <- gridExtra::ttheme_default(
|
| 41 |
+
core = list(
|
| 42 |
+
fg_params = list(cex = 0.85, col = "#222222"),
|
| 43 |
+
bg_params = list(fill = rep("#f9f9f9", nrow(table_data)), col = NA)
|
| 44 |
+
),
|
| 45 |
+
colhead = list(
|
| 46 |
+
fg_params = list(cex = 0.9, fontface = "bold", col = "#222222"),
|
| 47 |
+
bg_params = list(fill = "#e8e8e8", col = NA)
|
| 48 |
+
),
|
| 49 |
+
padding = unit(c(2, 4), "mm")
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
# Buat tabel dan posisikan tepat di bawah judul
|
| 53 |
+
table_grob <- gridExtra::tableGrob(
|
| 54 |
+
table_data, rows = NULL, theme = table_theme
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
# Posisi tabel lebih dekat ke judul dan tetap proporsional
|
| 58 |
+
grid.draw(editGrob(table_grob, vp = viewport(x = 0.5, y = 0.43, width = 0.25, height = 0.4)))
|
| 59 |
+
popViewport()
|