Spaces:
Sleeping
Sleeping
device token
Browse files- api/routes/patients.py +1 -1
- api/services/synthea_integration.py +162 -39
api/routes/patients.py
CHANGED
|
@@ -1713,7 +1713,7 @@ async def generate_and_import_synthea_patients(
|
|
| 1713 |
"error_count": len(errors),
|
| 1714 |
"errors": errors,
|
| 1715 |
"config": generation_result['config'],
|
| 1716 |
-
"source":
|
| 1717 |
}
|
| 1718 |
|
| 1719 |
except Exception as e:
|
|
|
|
| 1713 |
"error_count": len(errors),
|
| 1714 |
"errors": errors,
|
| 1715 |
"config": generation_result['config'],
|
| 1716 |
+
"source": generation_result.get('source', 'synthea')
|
| 1717 |
}
|
| 1718 |
|
| 1719 |
except Exception as e:
|
api/services/synthea_integration.py
CHANGED
|
@@ -99,6 +99,21 @@ class SyntheaIntegrationService:
|
|
| 99 |
try:
|
| 100 |
logger.info("🚀 Starting Synthea generation...")
|
| 101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
# Clear output directory
|
| 103 |
for file in self.output_dir.glob("*.json"):
|
| 104 |
file.unlink()
|
|
@@ -110,6 +125,8 @@ class SyntheaIntegrationService:
|
|
| 110 |
"-o", str(self.output_dir)
|
| 111 |
]
|
| 112 |
|
|
|
|
|
|
|
| 113 |
process = await asyncio.create_subprocess_exec(
|
| 114 |
*cmd,
|
| 115 |
stdout=asyncio.subprocess.PIPE,
|
|
@@ -123,7 +140,9 @@ class SyntheaIntegrationService:
|
|
| 123 |
logger.info(f"Output: {stdout.decode()}")
|
| 124 |
return True
|
| 125 |
else:
|
| 126 |
-
|
|
|
|
|
|
|
| 127 |
return False
|
| 128 |
|
| 129 |
except Exception as e:
|
|
@@ -367,44 +386,45 @@ class SyntheaIntegrationService:
|
|
| 367 |
try:
|
| 368 |
logger.info(f"🎯 Starting Synthea generation for {population} patients")
|
| 369 |
|
| 370 |
-
#
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
|
|
|
|
|
|
|
|
|
| 405 |
|
| 406 |
-
except HTTPException:
|
| 407 |
-
raise
|
| 408 |
except Exception as e:
|
| 409 |
logger.error(f"❌ Error in generate_and_import_patients: {str(e)}")
|
| 410 |
raise HTTPException(
|
|
@@ -412,6 +432,92 @@ class SyntheaIntegrationService:
|
|
| 412 |
detail=f"Synthea integration failed: {str(e)}"
|
| 413 |
)
|
| 414 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 415 |
async def get_synthea_statistics(self) -> Dict[str, Any]:
|
| 416 |
"""
|
| 417 |
Get statistics about Synthea-generated data
|
|
@@ -422,9 +528,26 @@ class SyntheaIntegrationService:
|
|
| 422 |
"patient_files": 0,
|
| 423 |
"hospital_files": 0,
|
| 424 |
"practitioner_files": 0,
|
| 425 |
-
"total_size_mb": 0
|
|
|
|
|
|
|
| 426 |
}
|
| 427 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 428 |
if self.output_dir.exists():
|
| 429 |
for file_path in self.output_dir.glob("*.json"):
|
| 430 |
stats["total_files"] += 1
|
|
|
|
| 99 |
try:
|
| 100 |
logger.info("🚀 Starting Synthea generation...")
|
| 101 |
|
| 102 |
+
# Check if Java is available
|
| 103 |
+
try:
|
| 104 |
+
java_check = await asyncio.create_subprocess_exec(
|
| 105 |
+
"java", "-version",
|
| 106 |
+
stdout=asyncio.subprocess.PIPE,
|
| 107 |
+
stderr=asyncio.subprocess.PIPE
|
| 108 |
+
)
|
| 109 |
+
await java_check.communicate()
|
| 110 |
+
if java_check.returncode != 0:
|
| 111 |
+
logger.error("❌ Java is not available in the environment")
|
| 112 |
+
return False
|
| 113 |
+
except FileNotFoundError:
|
| 114 |
+
logger.error("❌ Java is not installed or not in PATH")
|
| 115 |
+
return False
|
| 116 |
+
|
| 117 |
# Clear output directory
|
| 118 |
for file in self.output_dir.glob("*.json"):
|
| 119 |
file.unlink()
|
|
|
|
| 125 |
"-o", str(self.output_dir)
|
| 126 |
]
|
| 127 |
|
| 128 |
+
logger.info(f"Running command: {' '.join(cmd)}")
|
| 129 |
+
|
| 130 |
process = await asyncio.create_subprocess_exec(
|
| 131 |
*cmd,
|
| 132 |
stdout=asyncio.subprocess.PIPE,
|
|
|
|
| 140 |
logger.info(f"Output: {stdout.decode()}")
|
| 141 |
return True
|
| 142 |
else:
|
| 143 |
+
error_output = stderr.decode()
|
| 144 |
+
logger.error(f"❌ Synthea generation failed with return code {process.returncode}")
|
| 145 |
+
logger.error(f"Error output: {error_output}")
|
| 146 |
return False
|
| 147 |
|
| 148 |
except Exception as e:
|
|
|
|
| 386 |
try:
|
| 387 |
logger.info(f"🎯 Starting Synthea generation for {population} patients")
|
| 388 |
|
| 389 |
+
# Try to use real Synthea first
|
| 390 |
+
try:
|
| 391 |
+
# Download Synthea if needed
|
| 392 |
+
if not await self.download_synthea():
|
| 393 |
+
logger.warning("⚠️ Failed to download Synthea, falling back to mock data")
|
| 394 |
+
return await self._generate_mock_patients(population, age_min, age_max, gender, location)
|
| 395 |
+
|
| 396 |
+
# Generate configuration
|
| 397 |
+
config_overrides = {
|
| 398 |
+
"population": population,
|
| 399 |
+
"age_min": age_min,
|
| 400 |
+
"age_max": age_max,
|
| 401 |
+
"gender": gender,
|
| 402 |
+
"location": location
|
| 403 |
+
}
|
| 404 |
+
|
| 405 |
+
config_file = await self.generate_synthea_config(config_overrides)
|
| 406 |
+
|
| 407 |
+
# Run generation
|
| 408 |
+
if not await self.run_synthea_generation(config_file):
|
| 409 |
+
logger.warning("⚠️ Synthea generation failed, falling back to mock data")
|
| 410 |
+
return await self._generate_mock_patients(population, age_min, age_max, gender, location)
|
| 411 |
+
|
| 412 |
+
# Process output
|
| 413 |
+
patients = await self.process_synthea_output()
|
| 414 |
+
|
| 415 |
+
return {
|
| 416 |
+
"status": "success",
|
| 417 |
+
"generated_patients": len(patients),
|
| 418 |
+
"patients": patients,
|
| 419 |
+
"config": config_overrides,
|
| 420 |
+
"output_directory": str(self.output_dir),
|
| 421 |
+
"source": "synthea_real"
|
| 422 |
+
}
|
| 423 |
+
|
| 424 |
+
except Exception as e:
|
| 425 |
+
logger.warning(f"⚠️ Synthea integration failed: {str(e)}, falling back to mock data")
|
| 426 |
+
return await self._generate_mock_patients(population, age_min, age_max, gender, location)
|
| 427 |
|
|
|
|
|
|
|
| 428 |
except Exception as e:
|
| 429 |
logger.error(f"❌ Error in generate_and_import_patients: {str(e)}")
|
| 430 |
raise HTTPException(
|
|
|
|
| 432 |
detail=f"Synthea integration failed: {str(e)}"
|
| 433 |
)
|
| 434 |
|
| 435 |
+
async def _generate_mock_patients(
|
| 436 |
+
self,
|
| 437 |
+
population: int = 10,
|
| 438 |
+
age_min: int = 18,
|
| 439 |
+
age_max: int = 80,
|
| 440 |
+
gender: str = "both",
|
| 441 |
+
location: str = "Massachusetts"
|
| 442 |
+
) -> Dict[str, Any]:
|
| 443 |
+
"""
|
| 444 |
+
Generate mock patient data when Synthea is not available
|
| 445 |
+
"""
|
| 446 |
+
import random
|
| 447 |
+
from datetime import datetime, timedelta
|
| 448 |
+
|
| 449 |
+
logger.info(f"🎭 Generating {population} mock patients")
|
| 450 |
+
|
| 451 |
+
first_names = ["John", "Jane", "Michael", "Sarah", "David", "Emily", "Robert", "Lisa", "James", "Maria"]
|
| 452 |
+
last_names = ["Smith", "Johnson", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis", "Rodriguez", "Martinez"]
|
| 453 |
+
cities = ["Boston", "Cambridge", "Worcester", "Springfield", "Lowell", "Cambridge", "New Bedford", "Brockton", "Quincy", "Lynn"]
|
| 454 |
+
|
| 455 |
+
patients = []
|
| 456 |
+
|
| 457 |
+
for i in range(population):
|
| 458 |
+
# Generate random name
|
| 459 |
+
first_name = random.choice(first_names)
|
| 460 |
+
last_name = random.choice(last_names)
|
| 461 |
+
full_name = f"{first_name} {last_name}"
|
| 462 |
+
|
| 463 |
+
# Generate random age
|
| 464 |
+
age = random.randint(age_min, age_max)
|
| 465 |
+
birth_date = datetime.now() - timedelta(days=age*365 + random.randint(0, 365))
|
| 466 |
+
|
| 467 |
+
# Generate random gender
|
| 468 |
+
if gender == "both":
|
| 469 |
+
patient_gender = random.choice(["male", "female"])
|
| 470 |
+
else:
|
| 471 |
+
patient_gender = gender
|
| 472 |
+
|
| 473 |
+
# Generate random address
|
| 474 |
+
street_number = random.randint(100, 9999)
|
| 475 |
+
street_name = random.choice(["Main St", "Oak Ave", "Elm St", "Maple Dr", "Cedar Ln"])
|
| 476 |
+
city = random.choice(cities)
|
| 477 |
+
state = "MA"
|
| 478 |
+
postal_code = f"{random.randint(10000, 99999)}"
|
| 479 |
+
|
| 480 |
+
patient_data = {
|
| 481 |
+
'fhir_id': f"mock-patient-{i+1}",
|
| 482 |
+
'full_name': full_name,
|
| 483 |
+
'gender': patient_gender,
|
| 484 |
+
'date_of_birth': birth_date.strftime('%Y-%m-%d'),
|
| 485 |
+
'address': f"{street_number} {street_name}",
|
| 486 |
+
'city': city,
|
| 487 |
+
'state': state,
|
| 488 |
+
'postal_code': postal_code,
|
| 489 |
+
'country': 'US',
|
| 490 |
+
'marital_status': random.choice(['single', 'married', 'divorced', 'widowed']),
|
| 491 |
+
'language': 'English',
|
| 492 |
+
'source': 'synthea_mock',
|
| 493 |
+
'import_date': datetime.utcnow().isoformat(),
|
| 494 |
+
'last_updated': datetime.utcnow().isoformat(),
|
| 495 |
+
'conditions': [],
|
| 496 |
+
'medications': [],
|
| 497 |
+
'encounters': [],
|
| 498 |
+
'observations': [],
|
| 499 |
+
'procedures': [],
|
| 500 |
+
'immunizations': [],
|
| 501 |
+
'allergies': []
|
| 502 |
+
}
|
| 503 |
+
|
| 504 |
+
patients.append(patient_data)
|
| 505 |
+
|
| 506 |
+
return {
|
| 507 |
+
"status": "success",
|
| 508 |
+
"generated_patients": len(patients),
|
| 509 |
+
"patients": patients,
|
| 510 |
+
"config": {
|
| 511 |
+
"population": population,
|
| 512 |
+
"age_min": age_min,
|
| 513 |
+
"age_max": age_max,
|
| 514 |
+
"gender": gender,
|
| 515 |
+
"location": location
|
| 516 |
+
},
|
| 517 |
+
"output_directory": "mock_data",
|
| 518 |
+
"source": "synthea_mock"
|
| 519 |
+
}
|
| 520 |
+
|
| 521 |
async def get_synthea_statistics(self) -> Dict[str, Any]:
|
| 522 |
"""
|
| 523 |
Get statistics about Synthea-generated data
|
|
|
|
| 528 |
"patient_files": 0,
|
| 529 |
"hospital_files": 0,
|
| 530 |
"practitioner_files": 0,
|
| 531 |
+
"total_size_mb": 0,
|
| 532 |
+
"synthea_available": False,
|
| 533 |
+
"java_available": False
|
| 534 |
}
|
| 535 |
|
| 536 |
+
# Check if Java is available
|
| 537 |
+
try:
|
| 538 |
+
java_check = await asyncio.create_subprocess_exec(
|
| 539 |
+
"java", "-version",
|
| 540 |
+
stdout=asyncio.subprocess.PIPE,
|
| 541 |
+
stderr=asyncio.subprocess.PIPE
|
| 542 |
+
)
|
| 543 |
+
await java_check.communicate()
|
| 544 |
+
stats["java_available"] = (java_check.returncode == 0)
|
| 545 |
+
except FileNotFoundError:
|
| 546 |
+
stats["java_available"] = False
|
| 547 |
+
|
| 548 |
+
# Check if Synthea JAR exists
|
| 549 |
+
stats["synthea_available"] = self.synthea_jar_path.exists()
|
| 550 |
+
|
| 551 |
if self.output_dir.exists():
|
| 552 |
for file_path in self.output_dir.glob("*.json"):
|
| 553 |
stats["total_files"] += 1
|