Spaces:
Sleeping
Sleeping
| from ast import literal_eval | |
| def make_lang_list(row): | |
| languages = row["languages"] | |
| if languages == "none": | |
| return [] | |
| return literal_eval(languages) | |
| def language_count(row): | |
| return len(row["languages"]) | |
| def process_for_lang(data, modality): | |
| # Filter by modality | |
| if modality == "NLP": | |
| data = data[data["modality"] == "nlp"] | |
| elif modality == "Audio": | |
| data = data[data["modality"] == "audio"] | |
| elif modality == "Multimodal": | |
| data = data[data["modality"] == "multimodal"] | |
| # Remove rows without languages | |
| data.loc[data.languages == "False", 'languages'] = None | |
| data.loc[data.languages == {}, 'languages'] = None | |
| # Count of rows that have no languages | |
| no_lang_count = data["languages"].isna().sum() | |
| # As the languages column might have multiple languages, | |
| # we need to convert it to a list. We then count the number of languages. | |
| data["languages"] = data["languages"].fillna('none') | |
| data["languages"] = data.apply(make_lang_list, axis=1) | |
| data["language_count"] = data.apply(language_count, axis=1) | |
| # Just keep the models with at least one language | |
| models_with_langs = data[data["language_count"] > 0] | |
| langs = models_with_langs["languages"].explode() | |
| langs = langs[langs != {}] | |
| total_langs = len(langs.unique()) | |
| data['multilingual'] = data.apply(lambda x: int("multilingual" in x['languages']), axis=1) | |
| return data, no_lang_count, total_langs, langs.unique() | |
| def filter_multilinguality(data, linguality): | |
| if linguality == "Just Multilingual": | |
| multilingual_tag = data["multilingual"] == 1 | |
| multiple_lang_tags = data["language_count"] > 1 | |
| return data[multilingual_tag | multiple_lang_tags] | |
| elif linguality == "Three or more languages": | |
| return data[data["language_count"] >= 3] | |
| else: | |
| return data |