Deadmon commited on
Commit
0e307b6
·
verified ·
1 Parent(s): 4c28a8f

Delete ttsfm-web/app.py1

Browse files
Files changed (1) hide show
  1. ttsfm-web/app.py1 +0 -574
ttsfm-web/app.py1 DELETED
@@ -1,574 +0,0 @@
1
- """
2
- TTSFM Web Application
3
-
4
- A Flask web application that provides a user-friendly interface
5
- for the TTSFM text-to-speech package.
6
- """
7
-
8
- import os
9
- import json
10
- import logging
11
- from datetime import datetime
12
- from pathlib import Path
13
- from typing import Dict, Any, Optional
14
-
15
- from flask import Flask, request, jsonify, send_file, Response, render_template
16
- from flask_cors import CORS
17
- from dotenv import load_dotenv
18
-
19
- # Import the TTSFM package
20
- try:
21
- from ttsfm import TTSClient, Voice, AudioFormat, TTSException
22
- from ttsfm.exceptions import APIException, NetworkException, ValidationException
23
- from ttsfm.utils import validate_text_length, split_text_by_length
24
- except ImportError:
25
- # Fallback for development when package is not installed
26
- import sys
27
- sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
28
- from ttsfm import TTSClient, Voice, AudioFormat, TTSException
29
- from ttsfm.exceptions import APIException, NetworkException, ValidationException
30
- from ttsfm.utils import validate_text_length, split_text_by_length
31
-
32
- # Load environment variables
33
- load_dotenv()
34
-
35
- # Configure logging
36
- logging.basicConfig(
37
- level=logging.INFO,
38
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
39
- )
40
- logger = logging.getLogger(__name__)
41
-
42
- # Create Flask app
43
- app = Flask(__name__, static_folder='static', static_url_path='/static')
44
- CORS(app)
45
-
46
- # Configuration
47
- HOST = os.getenv("HOST", "localhost")
48
- PORT = int(os.getenv("PORT", "8000"))
49
- DEBUG = os.getenv("DEBUG", "false").lower() == "true"
50
-
51
- # Create TTS client - now uses openai.fm directly, no configuration needed
52
- tts_client = TTSClient()
53
-
54
- logger.info("Initialized web app with TTSFM using openai.fm free service")
55
-
56
- @app.route('/')
57
- def index():
58
- """Serve the main web interface."""
59
- return render_template('index.html')
60
-
61
- @app.route('/playground')
62
- def playground():
63
- """Serve the interactive playground."""
64
- return render_template('playground.html')
65
-
66
- @app.route('/docs')
67
- def docs():
68
- """Serve the API documentation."""
69
- return render_template('docs.html')
70
-
71
- @app.route('/api/voices', methods=['GET'])
72
- def get_voices():
73
- """Get list of available voices."""
74
- try:
75
- voices = [
76
- {
77
- "id": voice.value,
78
- "name": voice.value.title(),
79
- "description": f"{voice.value.title()} voice"
80
- }
81
- for voice in Voice
82
- ]
83
-
84
- return jsonify({
85
- "voices": voices,
86
- "count": len(voices)
87
- })
88
-
89
- except Exception as e:
90
- logger.error(f"Error getting voices: {e}")
91
- return jsonify({"error": "Failed to get voices"}), 500
92
-
93
- @app.route('/api/formats', methods=['GET'])
94
- def get_formats():
95
- """Get list of supported audio formats."""
96
- try:
97
- formats = [
98
- {
99
- "id": "mp3",
100
- "name": "MP3",
101
- "mime_type": "audio/mpeg",
102
- "description": "MP3 audio format - good quality, small file size",
103
- "quality": "Good",
104
- "file_size": "Small",
105
- "use_case": "Web, mobile apps, general use"
106
- },
107
- {
108
- "id": "opus",
109
- "name": "OPUS",
110
- "mime_type": "audio/opus",
111
- "description": "OPUS audio format - excellent quality, small file size",
112
- "quality": "Excellent",
113
- "file_size": "Small",
114
- "use_case": "Web streaming, VoIP"
115
- },
116
- {
117
- "id": "aac",
118
- "name": "AAC",
119
- "mime_type": "audio/aac",
120
- "description": "AAC audio format - good quality, medium file size",
121
- "quality": "Good",
122
- "file_size": "Medium",
123
- "use_case": "Apple devices, streaming"
124
- },
125
- {
126
- "id": "flac",
127
- "name": "FLAC",
128
- "mime_type": "audio/flac",
129
- "description": "FLAC audio format - lossless quality, large file size",
130
- "quality": "Lossless",
131
- "file_size": "Large",
132
- "use_case": "High-quality archival"
133
- },
134
- {
135
- "id": "wav",
136
- "name": "WAV",
137
- "mime_type": "audio/wav",
138
- "description": "WAV audio format - lossless quality, large file size",
139
- "quality": "Lossless",
140
- "file_size": "Large",
141
- "use_case": "Professional audio"
142
- },
143
- {
144
- "id": "pcm",
145
- "name": "PCM",
146
- "mime_type": "audio/pcm",
147
- "description": "PCM audio format - raw audio data, large file size",
148
- "quality": "Raw",
149
- "file_size": "Large",
150
- "use_case": "Audio processing"
151
- }
152
- ]
153
-
154
- return jsonify({
155
- "formats": formats,
156
- "count": len(formats)
157
- })
158
-
159
- except Exception as e:
160
- logger.error(f"Error getting formats: {e}")
161
- return jsonify({"error": "Failed to get formats"}), 500
162
-
163
- @app.route('/api/validate-text', methods=['POST'])
164
- def validate_text():
165
- """Validate text length and provide splitting suggestions."""
166
- try:
167
- data = request.get_json()
168
- if not data:
169
- return jsonify({"error": "No JSON data provided"}), 400
170
-
171
- text = data.get('text', '').strip()
172
- max_length = data.get('max_length', 4096)
173
-
174
- if not text:
175
- return jsonify({"error": "Text is required"}), 400
176
-
177
- text_length = len(text)
178
- is_valid = text_length <= max_length
179
-
180
- result = {
181
- "text_length": text_length,
182
- "max_length": max_length,
183
- "is_valid": is_valid,
184
- "needs_splitting": not is_valid
185
- }
186
-
187
- if not is_valid:
188
- # Provide splitting suggestions
189
- chunks = split_text_by_length(text, max_length, preserve_words=True)
190
- result.update({
191
- "suggested_chunks": len(chunks),
192
- "chunk_preview": [chunk[:100] + "..." if len(chunk) > 100 else chunk for chunk in chunks[:3]]
193
- })
194
-
195
- return jsonify(result)
196
-
197
- except Exception as e:
198
- logger.error(f"Text validation error: {e}")
199
- return jsonify({"error": "Text validation failed"}), 500
200
-
201
- @app.route('/api/generate', methods=['POST'])
202
- def generate_speech():
203
- """Generate speech from text using the TTSFM package."""
204
- try:
205
- # Parse request data
206
- data = request.get_json()
207
- if not data:
208
- return jsonify({"error": "No JSON data provided"}), 400
209
-
210
- # Extract parameters
211
- text = data.get('text', '').strip()
212
- voice = data.get('voice', Voice.ALLOY.value)
213
- response_format = data.get('format', AudioFormat.MP3.value)
214
- instructions = data.get('instructions', '').strip() or None
215
- max_length = data.get('max_length', 4096)
216
- validate_length = data.get('validate_length', True)
217
-
218
- # Validate required fields
219
- if not text:
220
- return jsonify({"error": "Text is required"}), 400
221
-
222
- # Validate voice
223
- try:
224
- voice_enum = Voice(voice.lower())
225
- except ValueError:
226
- return jsonify({
227
- "error": f"Invalid voice: {voice}. Must be one of: {[v.value for v in Voice]}"
228
- }), 400
229
-
230
- # Validate format
231
- try:
232
- format_enum = AudioFormat(response_format.lower())
233
- except ValueError:
234
- return jsonify({
235
- "error": f"Invalid format: {response_format}. Must be one of: {[f.value for f in AudioFormat]}"
236
- }), 400
237
-
238
- logger.info(f"Generating speech: text='{text[:50]}...', voice={voice}, format={response_format}")
239
-
240
- # Generate speech using the TTSFM package with validation
241
- response = tts_client.generate_speech(
242
- text=text,
243
- voice=voice_enum,
244
- response_format=format_enum,
245
- instructions=instructions,
246
- max_length=max_length,
247
- validate_length=validate_length
248
- )
249
-
250
- # Return audio data
251
- return Response(
252
- response.audio_data,
253
- mimetype=response.content_type,
254
- headers={
255
- 'Content-Disposition': f'attachment; filename="speech.{response.format.value}"',
256
- 'Content-Length': str(response.size),
257
- 'X-Audio-Format': response.format.value,
258
- 'X-Audio-Size': str(response.size)
259
- }
260
- )
261
-
262
- except ValidationException as e:
263
- logger.warning(f"Validation error: {e}")
264
- return jsonify({"error": str(e)}), 400
265
-
266
- except APIException as e:
267
- logger.error(f"API error: {e}")
268
- return jsonify({
269
- "error": str(e),
270
- "status_code": getattr(e, 'status_code', 500)
271
- }), getattr(e, 'status_code', 500)
272
-
273
- except NetworkException as e:
274
- logger.error(f"Network error: {e}")
275
- return jsonify({
276
- "error": "TTS service is currently unavailable",
277
- "details": str(e)
278
- }), 503
279
-
280
- except TTSException as e:
281
- logger.error(f"TTS error: {e}")
282
- return jsonify({"error": str(e)}), 500
283
-
284
- except Exception as e:
285
- logger.error(f"Unexpected error: {e}")
286
- return jsonify({"error": "Internal server error"}), 500
287
-
288
- @app.route('/api/generate-batch', methods=['POST'])
289
- def generate_speech_batch():
290
- """Generate speech from long text by splitting into chunks."""
291
- try:
292
- data = request.get_json()
293
- if not data:
294
- return jsonify({"error": "No JSON data provided"}), 400
295
-
296
- text = data.get('text', '').strip()
297
- voice = data.get('voice', Voice.ALLOY.value)
298
- response_format = data.get('format', AudioFormat.MP3.value)
299
- instructions = data.get('instructions', '').strip() or None
300
- max_length = data.get('max_length', 4096)
301
- preserve_words = data.get('preserve_words', True)
302
-
303
- if not text:
304
- return jsonify({"error": "Text is required"}), 400
305
-
306
- # Validate voice and format
307
- try:
308
- voice_enum = Voice(voice.lower())
309
- format_enum = AudioFormat(response_format.lower())
310
- except ValueError as e:
311
- return jsonify({"error": f"Invalid voice or format: {e}"}), 400
312
-
313
- # Split text into chunks
314
- chunks = split_text_by_length(text, max_length, preserve_words)
315
-
316
- if not chunks:
317
- return jsonify({"error": "No valid text chunks found"}), 400
318
-
319
- logger.info(f"Processing {len(chunks)} chunks for batch generation")
320
-
321
- # Generate speech for each chunk
322
- results = []
323
- for i, chunk in enumerate(chunks):
324
- try:
325
- response = tts_client.generate_speech(
326
- text=chunk,
327
- voice=voice_enum,
328
- response_format=format_enum,
329
- instructions=instructions,
330
- max_length=max_length,
331
- validate_length=False # Already split
332
- )
333
-
334
- # Convert to base64 for JSON response
335
- import base64
336
- audio_b64 = base64.b64encode(response.audio_data).decode('utf-8')
337
-
338
- results.append({
339
- "chunk_index": i + 1,
340
- "chunk_text": chunk[:100] + "..." if len(chunk) > 100 else chunk,
341
- "audio_data": audio_b64,
342
- "content_type": response.content_type,
343
- "size": response.size,
344
- "format": response.format.value
345
- })
346
-
347
- except Exception as e:
348
- logger.error(f"Failed to generate chunk {i+1}: {e}")
349
- results.append({
350
- "chunk_index": i + 1,
351
- "chunk_text": chunk[:100] + "..." if len(chunk) > 100 else chunk,
352
- "error": str(e)
353
- })
354
-
355
- return jsonify({
356
- "total_chunks": len(chunks),
357
- "successful_chunks": len([r for r in results if "audio_data" in r]),
358
- "results": results
359
- })
360
-
361
- except Exception as e:
362
- logger.error(f"Batch generation error: {e}")
363
- return jsonify({"error": "Batch generation failed"}), 500
364
-
365
- @app.route('/api/status', methods=['GET'])
366
- def get_status():
367
- """Get service status."""
368
- try:
369
- # Try to make a simple request to check if the TTS service is available
370
- test_response = tts_client.generate_speech(
371
- text="test",
372
- voice=Voice.ALLOY,
373
- response_format=AudioFormat.MP3
374
- )
375
-
376
- return jsonify({
377
- "status": "online",
378
- "tts_service": "openai.fm (free)",
379
- "package_version": "3.0.0",
380
- "timestamp": datetime.now().isoformat()
381
- })
382
-
383
- except Exception as e:
384
- logger.error(f"Status check failed: {e}")
385
- return jsonify({
386
- "status": "error",
387
- "tts_service": "openai.fm (free)",
388
- "error": str(e),
389
- "timestamp": datetime.now().isoformat()
390
- }), 503
391
-
392
- @app.route('/api/health', methods=['GET'])
393
- def health_check():
394
- """Simple health check endpoint."""
395
- return jsonify({
396
- "status": "healthy",
397
- "timestamp": datetime.now().isoformat()
398
- })
399
-
400
- # OpenAI-compatible API endpoints
401
- @app.route('/v1/audio/speech', methods=['POST'])
402
- def openai_speech():
403
- """OpenAI-compatible speech generation endpoint."""
404
- try:
405
- # Parse request data
406
- data = request.get_json()
407
- if not data:
408
- return jsonify({
409
- "error": {
410
- "message": "No JSON data provided",
411
- "type": "invalid_request_error",
412
- "code": "missing_data"
413
- }
414
- }), 400
415
-
416
- # Extract OpenAI-compatible parameters
417
- model = data.get('model', 'gpt-4o-mini-tts') # Accept but ignore model
418
- input_text = data.get('input', '').strip()
419
- voice = data.get('voice', 'alloy')
420
- response_format = data.get('response_format', 'mp3')
421
- instructions = data.get('instructions', '').strip() or None
422
- speed = data.get('speed', 1.0) # Accept but ignore speed
423
-
424
- # Validate required fields
425
- if not input_text:
426
- return jsonify({
427
- "error": {
428
- "message": "Input text is required",
429
- "type": "invalid_request_error",
430
- "code": "missing_input"
431
- }
432
- }), 400
433
-
434
- # Validate voice
435
- try:
436
- voice_enum = Voice(voice.lower())
437
- except ValueError:
438
- return jsonify({
439
- "error": {
440
- "message": f"Invalid voice: {voice}. Must be one of: {[v.value for v in Voice]}",
441
- "type": "invalid_request_error",
442
- "code": "invalid_voice"
443
- }
444
- }), 400
445
-
446
- # Validate format
447
- try:
448
- format_enum = AudioFormat(response_format.lower())
449
- except ValueError:
450
- return jsonify({
451
- "error": {
452
- "message": f"Invalid response_format: {response_format}. Must be one of: {[f.value for f in AudioFormat]}",
453
- "type": "invalid_request_error",
454
- "code": "invalid_format"
455
- }
456
- }), 400
457
-
458
- logger.info(f"OpenAI API: Generating speech: text='{input_text[:50]}...', voice={voice}, format={response_format}")
459
-
460
- # Generate speech using the TTSFM package
461
- response = tts_client.generate_speech(
462
- text=input_text,
463
- voice=voice_enum,
464
- response_format=format_enum,
465
- instructions=instructions,
466
- max_length=4096,
467
- validate_length=True
468
- )
469
-
470
- # Return audio data in OpenAI format
471
- return Response(
472
- response.audio_data,
473
- mimetype=response.content_type,
474
- headers={
475
- 'Content-Type': response.content_type,
476
- 'Content-Length': str(response.size),
477
- 'X-Audio-Format': response.format.value,
478
- 'X-Audio-Size': str(response.size),
479
- 'X-Powered-By': 'TTSFM-OpenAI-Compatible'
480
- }
481
- )
482
-
483
- except ValidationException as e:
484
- logger.warning(f"OpenAI API validation error: {e}")
485
- return jsonify({
486
- "error": {
487
- "message": str(e),
488
- "type": "invalid_request_error",
489
- "code": "validation_error"
490
- }
491
- }), 400
492
-
493
- except APIException as e:
494
- logger.error(f"OpenAI API error: {e}")
495
- return jsonify({
496
- "error": {
497
- "message": str(e),
498
- "type": "api_error",
499
- "code": "tts_error"
500
- }
501
- }), getattr(e, 'status_code', 500)
502
-
503
- except NetworkException as e:
504
- logger.error(f"OpenAI API network error: {e}")
505
- return jsonify({
506
- "error": {
507
- "message": "TTS service is currently unavailable",
508
- "type": "service_unavailable_error",
509
- "code": "service_unavailable"
510
- }
511
- }), 503
512
-
513
- except Exception as e:
514
- logger.error(f"OpenAI API unexpected error: {e}")
515
- return jsonify({
516
- "error": {
517
- "message": "An unexpected error occurred",
518
- "type": "internal_error",
519
- "code": "internal_error"
520
- }
521
- }), 500
522
-
523
- @app.route('/v1/models', methods=['GET'])
524
- def openai_models():
525
- """OpenAI-compatible models endpoint."""
526
- return jsonify({
527
- "object": "list",
528
- "data": [
529
- {
530
- "id": "gpt-4o-mini-tts",
531
- "object": "model",
532
- "created": 1699564800,
533
- "owned_by": "ttsfm",
534
- "permission": [],
535
- "root": "gpt-4o-mini-tts",
536
- "parent": None
537
- }
538
- ]
539
- })
540
-
541
- @app.errorhandler(404)
542
- def not_found(error):
543
- """Handle 404 errors."""
544
- return jsonify({"error": "Endpoint not found"}), 404
545
-
546
- @app.errorhandler(405)
547
- def method_not_allowed(error):
548
- """Handle 405 errors."""
549
- return jsonify({"error": "Method not allowed"}), 405
550
-
551
- @app.errorhandler(500)
552
- def internal_error(error):
553
- """Handle 500 errors."""
554
- logger.error(f"Internal server error: {error}")
555
- return jsonify({"error": "Internal server error"}), 500
556
-
557
- if __name__ == '__main__':
558
- logger.info(f"Starting TTSFM web application on {HOST}:{PORT}")
559
- logger.info("Using openai.fm free TTS service")
560
- logger.info(f"Debug mode: {DEBUG}")
561
-
562
- try:
563
- app.run(
564
- host=HOST,
565
- port=PORT,
566
- debug=DEBUG
567
- )
568
- except KeyboardInterrupt:
569
- logger.info("Application stopped by user")
570
- except Exception as e:
571
- logger.error(f"Failed to start application: {e}")
572
- finally:
573
- # Clean up TTS client
574
- tts_client.close()