seanpedrickcase commited on
Commit
6806363
·
1 Parent(s): 8da3518

Custom env variables should now overwrite defaults for lambda function. Usage logs should now be correctly created with lambda function

Browse files
lambda_entrypoint.py CHANGED
@@ -199,8 +199,26 @@ def lambda_handler(event, context):
199
  print("Detected .env file, loading environment variables...")
200
 
201
  # Load environment variables from the .env file
202
- load_dotenv(input_file_path)
203
- print("Environment variables loaded from .env file")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
  # Extract the actual input file path from environment variables
206
  # Look for common environment variable names that might contain the input file path
@@ -252,6 +270,14 @@ def lambda_handler(event, context):
252
  # 4. Prepare arguments for the CLI function
253
  # This dictionary should mirror the one in your app.py's "direct mode"
254
  # If we loaded a .env file, use environment variables as defaults
 
 
 
 
 
 
 
 
255
  cli_args = {
256
  # Task Selection
257
  "task": arguments.get("task", os.getenv("DIRECT_MODE_TASK", "redact")),
@@ -289,7 +315,7 @@ def lambda_handler(event, context):
289
  "do_initial_clean", os.getenv("DO_INITIAL_TABULAR_DATA_CLEAN", "False")
290
  ),
291
  "save_logs_to_csv": arguments.get(
292
- "save_logs_to_csv", os.getenv("SAVE_LOGS_TO_CSV", "False")
293
  ),
294
  "save_logs_to_dynamodb": arguments.get(
295
  "save_logs_to_dynamodb", os.getenv("SAVE_LOGS_TO_DYNAMODB", "False")
@@ -325,9 +351,7 @@ def lambda_handler(event, context):
325
  os.getenv("SPACY_MODEL_PATH", os.environ["SPACY_MODEL_PATH"]),
326
  ),
327
  # PDF/Image Redaction Arguments
328
- "ocr_method": arguments.get(
329
- "ocr_method", os.getenv("TESSERACT_TEXT_EXTRACT_OPTION", "Local OCR")
330
- ),
331
  "page_min": int(
332
  arguments.get("page_min", os.getenv("DEFAULT_PAGE_MIN", DEFAULT_PAGE_MIN))
333
  ),
@@ -471,6 +495,12 @@ def lambda_handler(event, context):
471
  "prepare_images": arguments.get("prepare_images", True),
472
  }
473
 
 
 
 
 
 
 
474
  # Combine extraction options
475
  extraction_options = (
476
  _get_env_list(cli_args["handwrite_signature_extraction"])
 
199
  print("Detected .env file, loading environment variables...")
200
 
201
  # Load environment variables from the .env file
202
+ print(f"Loading .env file from: {input_file_path}")
203
+
204
+ # Check if file exists and is readable
205
+ if os.path.exists(input_file_path):
206
+ print(".env file exists and is readable")
207
+ with open(input_file_path, "r") as f:
208
+ content = f.read()
209
+ print(f".env file content preview: {content[:200]}...")
210
+ else:
211
+ print(f"ERROR: .env file does not exist at {input_file_path}")
212
+
213
+ load_dotenv(input_file_path, override=True)
214
+ print("Environment variables loaded from .env file (with override=True)")
215
+
216
+ # Debug: Print the loaded environment variables
217
+ print(f"DEFAULT_PAGE_MIN from env: {os.getenv('DEFAULT_PAGE_MIN')}")
218
+ print(f"DEFAULT_PAGE_MAX from env: {os.getenv('DEFAULT_PAGE_MAX')}")
219
+ print(
220
+ f"All DEFAULT_PAGE_* env vars: {[k for k in os.environ.keys() if 'DEFAULT_PAGE' in k]}"
221
+ )
222
 
223
  # Extract the actual input file path from environment variables
224
  # Look for common environment variable names that might contain the input file path
 
270
  # 4. Prepare arguments for the CLI function
271
  # This dictionary should mirror the one in your app.py's "direct mode"
272
  # If we loaded a .env file, use environment variables as defaults
273
+
274
+ # Debug: Print environment variables before constructing cli_args
275
+ print("Before cli_args construction:")
276
+ print(f" DEFAULT_PAGE_MIN from env: {os.getenv('DEFAULT_PAGE_MIN')}")
277
+ print(f" DEFAULT_PAGE_MAX from env: {os.getenv('DEFAULT_PAGE_MAX')}")
278
+ print(f" DEFAULT_PAGE_MIN from config: {DEFAULT_PAGE_MIN}")
279
+ print(f" DEFAULT_PAGE_MAX from config: {DEFAULT_PAGE_MAX}")
280
+
281
  cli_args = {
282
  # Task Selection
283
  "task": arguments.get("task", os.getenv("DIRECT_MODE_TASK", "redact")),
 
315
  "do_initial_clean", os.getenv("DO_INITIAL_TABULAR_DATA_CLEAN", "False")
316
  ),
317
  "save_logs_to_csv": arguments.get(
318
+ "save_logs_to_csv", os.getenv("SAVE_LOGS_TO_CSV", "True")
319
  ),
320
  "save_logs_to_dynamodb": arguments.get(
321
  "save_logs_to_dynamodb", os.getenv("SAVE_LOGS_TO_DYNAMODB", "False")
 
351
  os.getenv("SPACY_MODEL_PATH", os.environ["SPACY_MODEL_PATH"]),
352
  ),
353
  # PDF/Image Redaction Arguments
354
+ "ocr_method": arguments.get("ocr_method", os.getenv("OCR_METHOD", "Local OCR")),
 
 
355
  "page_min": int(
356
  arguments.get("page_min", os.getenv("DEFAULT_PAGE_MIN", DEFAULT_PAGE_MIN))
357
  ),
 
495
  "prepare_images": arguments.get("prepare_images", True),
496
  }
497
 
498
+ # Debug: Print the final page_min and page_max values
499
+ print(f"Final cli_args page_min: {cli_args['page_min']}")
500
+ print(f"Final cli_args page_max: {cli_args['page_max']}")
501
+ print(f"Final cli_args save_logs_to_csv: {cli_args['save_logs_to_csv']}")
502
+ print(f"Final cli_args usage_logs_folder: {cli_args['usage_logs_folder']}")
503
+
504
  # Combine extraction options
505
  extraction_options = (
506
  _get_env_list(cli_args["handwrite_signature_extraction"])
tools/cli_usage_logger.py CHANGED
@@ -217,13 +217,13 @@ def create_cli_usage_logger(logs_folder: str = None) -> CLIUsageLogger:
217
  Returns:
218
  Configured CLIUsageLogger instance
219
  """
220
- # Parse CSV headers from config
221
- import json
222
-
223
  try:
224
- headers = json.loads(CSV_USAGE_LOG_HEADERS)
 
 
225
  except Exception as e:
226
- print(f"Error parsing CSV usage log headers: {e}")
227
  # Fallback headers if parsing fails
228
  headers = [
229
  "session_hash_textbox",
 
217
  Returns:
218
  Configured CLIUsageLogger instance
219
  """
220
+ # Use CSV headers from config (already parsed as list)
 
 
221
  try:
222
+ headers = CSV_USAGE_LOG_HEADERS
223
+ if not headers or len(headers) == 0:
224
+ raise ValueError("Empty headers list")
225
  except Exception as e:
226
+ print(f"Error using CSV usage log headers: {e}")
227
  # Fallback headers if parsing fails
228
  headers = [
229
  "session_hash_textbox",
tools/file_redaction.py CHANGED
@@ -1298,9 +1298,7 @@ def choose_and_run_redactor(
1298
  output_folder + pdf_file_name_without_ext + "_redacted.pdf"
1299
  )
1300
  # Add page range suffix if partial processing
1301
- print(
1302
- f"page_min: {page_min}, current_loop_page: {current_loop_page}, number_of_pages: {number_of_pages}"
1303
- )
1304
  out_redacted_pdf_file_path = add_page_range_suffix_to_file_path(
1305
  out_redacted_pdf_file_path,
1306
  page_min,
 
1298
  output_folder + pdf_file_name_without_ext + "_redacted.pdf"
1299
  )
1300
  # Add page range suffix if partial processing
1301
+
 
 
1302
  out_redacted_pdf_file_path = add_page_range_suffix_to_file_path(
1303
  out_redacted_pdf_file_path,
1304
  page_min,