seanpedrickcase commited on
Commit
5aec971
·
1 Parent(s): b1f183d

Updated test suite to deal with missing file issues

Browse files
.github/scripts/setup_test_data.py CHANGED
@@ -5,6 +5,7 @@ Creates dummy test files when example data is not available.
5
  """
6
 
7
  import os
 
8
 
9
  import pandas as pd
10
 
@@ -19,7 +20,7 @@ def create_directories():
19
 
20
 
21
  def create_dummy_pdf():
22
- """Create a dummy PDF for testing."""
23
 
24
  # Install reportlab if not available
25
  try:
@@ -33,9 +34,12 @@ def create_dummy_pdf():
33
  from reportlab.pdfgen import canvas
34
 
35
  try:
 
36
  pdf_path = (
37
  "example_data/example_of_emails_sent_to_a_professor_before_applying.pdf"
38
  )
 
 
39
 
40
  c = canvas.Canvas(pdf_path, pagesize=letter)
41
  c.drawString(100, 750, "This is a test document for redaction testing.")
@@ -53,15 +57,69 @@ def create_dummy_pdf():
53
 
54
  print(f"Created dummy PDF: {pdf_path}")
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  except ImportError:
57
  print("ReportLab not available, skipping PDF creation")
58
- # Create a simple text file instead
59
  with open(
60
  "example_data/example_of_emails_sent_to_a_professor_before_applying.pdf",
61
  "w",
62
  ) as f:
63
  f.write("This is a dummy PDF file for testing")
64
- print("Created dummy text file instead of PDF")
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
 
67
  def create_dummy_csv():
@@ -209,6 +267,8 @@ def create_dummy_image():
209
  def main():
210
  """Main setup function."""
211
  print("Setting up test data for GitHub Actions...")
 
 
212
 
213
  create_directories()
214
  create_dummy_pdf()
@@ -222,7 +282,29 @@ def main():
222
  print("Created files:")
223
  for root, dirs, files in os.walk("example_data"):
224
  for file in files:
225
- print(f" {os.path.join(root, file)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
 
227
 
228
  if __name__ == "__main__":
 
5
  """
6
 
7
  import os
8
+ import sys
9
 
10
  import pandas as pd
11
 
 
20
 
21
 
22
  def create_dummy_pdf():
23
+ """Create dummy PDFs for testing."""
24
 
25
  # Install reportlab if not available
26
  try:
 
34
  from reportlab.pdfgen import canvas
35
 
36
  try:
37
+ # Create the main test PDF
38
  pdf_path = (
39
  "example_data/example_of_emails_sent_to_a_professor_before_applying.pdf"
40
  )
41
+ print(f"Creating PDF: {pdf_path}")
42
+ print(f"Directory exists: {os.path.exists('example_data')}")
43
 
44
  c = canvas.Canvas(pdf_path, pagesize=letter)
45
  c.drawString(100, 750, "This is a test document for redaction testing.")
 
57
 
58
  print(f"Created dummy PDF: {pdf_path}")
59
 
60
+ # Create Partnership Agreement Toolkit PDF
61
+ partnership_pdf_path = "example_data/Partnership-Agreement-Toolkit_0_0.pdf"
62
+ print(f"Creating PDF: {partnership_pdf_path}")
63
+ c = canvas.Canvas(partnership_pdf_path, pagesize=letter)
64
+ c.drawString(100, 750, "Partnership Agreement Toolkit")
65
+ c.drawString(100, 700, "This is a test partnership agreement document.")
66
+ c.drawString(100, 650, "Contact: partnership@example.com")
67
+ c.drawString(100, 600, "Phone: (555) 123-4567")
68
+ c.drawString(100, 550, "Address: 123 Partnership Street, City, State 12345")
69
+ c.showPage()
70
+
71
+ # Add second page
72
+ c.drawString(100, 750, "Page 2 - Partnership Details")
73
+ c.drawString(100, 700, "More partnership information here.")
74
+ c.drawString(100, 650, "Contact: info@partnership.org")
75
+ c.showPage()
76
+
77
+ # Add third page
78
+ c.drawString(100, 750, "Page 3 - Terms and Conditions")
79
+ c.drawString(100, 700, "Terms and conditions content.")
80
+ c.drawString(100, 650, "Legal contact: legal@partnership.org")
81
+ c.save()
82
+
83
+ print(f"Created dummy PDF: {partnership_pdf_path}")
84
+
85
+ # Create Graduate Job Cover Letter PDF
86
+ cover_letter_pdf_path = "example_data/graduate-job-example-cover-letter.pdf"
87
+ print(f"Creating PDF: {cover_letter_pdf_path}")
88
+ c = canvas.Canvas(cover_letter_pdf_path, pagesize=letter)
89
+ c.drawString(100, 750, "Cover Letter Example")
90
+ c.drawString(100, 700, "Dear Hiring Manager,")
91
+ c.drawString(100, 650, "I am writing to apply for the position.")
92
+ c.drawString(100, 600, "Contact: applicant@example.com")
93
+ c.drawString(100, 550, "Phone: (555) 987-6543")
94
+ c.drawString(100, 500, "Address: 456 Job Street, Employment City, EC 54321")
95
+ c.drawString(100, 450, "Sincerely,")
96
+ c.drawString(100, 400, "John Applicant")
97
+ c.save()
98
+
99
+ print(f"Created dummy PDF: {cover_letter_pdf_path}")
100
+
101
  except ImportError:
102
  print("ReportLab not available, skipping PDF creation")
103
+ # Create simple text files instead
104
  with open(
105
  "example_data/example_of_emails_sent_to_a_professor_before_applying.pdf",
106
  "w",
107
  ) as f:
108
  f.write("This is a dummy PDF file for testing")
109
+
110
+ with open(
111
+ "example_data/Partnership-Agreement-Toolkit_0_0.pdf",
112
+ "w",
113
+ ) as f:
114
+ f.write("This is a dummy Partnership Agreement PDF file for testing")
115
+
116
+ with open(
117
+ "example_data/graduate-job-example-cover-letter.pdf",
118
+ "w",
119
+ ) as f:
120
+ f.write("This is a dummy cover letter PDF file for testing")
121
+
122
+ print("Created dummy text files instead of PDFs")
123
 
124
 
125
  def create_dummy_csv():
 
267
  def main():
268
  """Main setup function."""
269
  print("Setting up test data for GitHub Actions...")
270
+ print(f"Current working directory: {os.getcwd()}")
271
+ print(f"Python version: {sys.version}")
272
 
273
  create_directories()
274
  create_dummy_pdf()
 
282
  print("Created files:")
283
  for root, dirs, files in os.walk("example_data"):
284
  for file in files:
285
+ file_path = os.path.join(root, file)
286
+ print(f" {file_path}")
287
+ # Verify the file exists and has content
288
+ if os.path.exists(file_path):
289
+ file_size = os.path.getsize(file_path)
290
+ print(f" Size: {file_size} bytes")
291
+ else:
292
+ print(" WARNING: File does not exist!")
293
+
294
+ # Verify critical files exist
295
+ critical_files = [
296
+ "example_data/Partnership-Agreement-Toolkit_0_0.pdf",
297
+ "example_data/graduate-job-example-cover-letter.pdf",
298
+ "example_data/example_of_emails_sent_to_a_professor_before_applying.pdf",
299
+ ]
300
+
301
+ print("\nVerifying critical test files:")
302
+ for file_path in critical_files:
303
+ if os.path.exists(file_path):
304
+ file_size = os.path.getsize(file_path)
305
+ print(f"✅ {file_path} exists ({file_size} bytes)")
306
+ else:
307
+ print(f"❌ {file_path} MISSING!")
308
 
309
 
310
  if __name__ == "__main__":
.github/workflows/ci.yml CHANGED
@@ -89,6 +89,17 @@ jobs:
89
  - name: Setup test data
90
  run: |
91
  python .github/scripts/setup_test_data.py
 
 
 
 
 
 
 
 
 
 
 
92
 
93
  - name: Clean up problematic config files
94
  run: |
@@ -161,6 +172,17 @@ jobs:
161
  - name: Setup test data
162
  run: |
163
  python .github/scripts/setup_test_data.py
 
 
 
 
 
 
 
 
 
 
 
164
 
165
  - name: Run integration tests
166
  run: |
 
89
  - name: Setup test data
90
  run: |
91
  python .github/scripts/setup_test_data.py
92
+ echo "Setup script completed. Checking results:"
93
+ ls -la example_data/ || echo "example_data directory not found"
94
+
95
+ - name: Verify test data files
96
+ run: |
97
+ echo "Checking if critical test files exist:"
98
+ ls -la example_data/
99
+ echo "Checking for specific PDF files:"
100
+ ls -la example_data/*.pdf || echo "No PDF files found"
101
+ echo "Checking file sizes:"
102
+ find example_data -name "*.pdf" -exec ls -lh {} \;
103
 
104
  - name: Clean up problematic config files
105
  run: |
 
172
  - name: Setup test data
173
  run: |
174
  python .github/scripts/setup_test_data.py
175
+ echo "Setup script completed. Checking results:"
176
+ ls -la example_data/ || echo "example_data directory not found"
177
+
178
+ - name: Verify test data files
179
+ run: |
180
+ echo "Checking if critical test files exist:"
181
+ ls -la example_data/
182
+ echo "Checking for specific PDF files:"
183
+ ls -la example_data/*.pdf || echo "No PDF files found"
184
+ echo "Checking file sizes:"
185
+ find example_data -name "*.pdf" -exec ls -lh {} \;
186
 
187
  - name: Run integration tests
188
  run: |
.github/workflows/simple-test.yml CHANGED
@@ -45,6 +45,17 @@ jobs:
45
  - name: Setup test data
46
  run: |
47
  python .github/scripts/setup_test_data.py
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  - name: Run CLI tests
50
  run: |
 
45
  - name: Setup test data
46
  run: |
47
  python .github/scripts/setup_test_data.py
48
+ echo "Setup script completed. Checking results:"
49
+ ls -la example_data/ || echo "example_data directory not found"
50
+
51
+ - name: Verify test data files
52
+ run: |
53
+ echo "Checking if critical test files exist:"
54
+ ls -la example_data/
55
+ echo "Checking for specific PDF files:"
56
+ ls -la example_data/*.pdf || echo "No PDF files found"
57
+ echo "Checking file sizes:"
58
+ find example_data -name "*.pdf" -exec ls -lh {} \;
59
 
60
  - name: Run CLI tests
61
  run: |
cli_redact.py CHANGED
@@ -636,6 +636,21 @@ python cli_redact.py --task textract --textract_action list
636
  if isinstance(args.input_file, str):
637
  args.input_file = [args.input_file]
638
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
639
  _, file_extension = os.path.splitext(args.input_file[0])
640
  file_extension = file_extension.lower()
641
  else:
 
636
  if isinstance(args.input_file, str):
637
  args.input_file = [args.input_file]
638
 
639
+ # Debug: Print file path information
640
+ input_file_path = args.input_file[0]
641
+ print(f"Debug: Input file path: {input_file_path}")
642
+ print(f"Debug: File exists: {os.path.exists(input_file_path)}")
643
+ print(f"Debug: Absolute path: {os.path.abspath(input_file_path)}")
644
+ if os.path.exists(input_file_path):
645
+ print(f"Debug: File size: {os.path.getsize(input_file_path)} bytes")
646
+ else:
647
+ print(
648
+ f"Debug: File not found! Current working directory: {os.getcwd()}"
649
+ )
650
+ print(
651
+ f"Debug: Directory contents: {os.listdir(os.path.dirname(input_file_path) if os.path.dirname(input_file_path) else '.')}"
652
+ )
653
+
654
  _, file_extension = os.path.splitext(args.input_file[0])
655
  file_extension = file_extension.lower()
656
  else:
test/test.py CHANGED
@@ -500,6 +500,18 @@ class TestCLIRedactExamples(unittest.TestCase):
500
  print(f"Example data directory: {cls.example_data_dir}")
501
  print(f"Temp output directory: {cls.temp_output_dir}")
502
 
 
 
 
 
 
 
 
 
 
 
 
 
503
  @classmethod
504
  def tearDownClass(cls):
505
  """Clean up test environment after running tests."""
 
500
  print(f"Example data directory: {cls.example_data_dir}")
501
  print(f"Temp output directory: {cls.temp_output_dir}")
502
 
503
+ # Debug: Check if example data directory exists and list contents
504
+ if os.path.exists(cls.example_data_dir):
505
+ print("Example data directory exists. Contents:")
506
+ for item in os.listdir(cls.example_data_dir):
507
+ item_path = os.path.join(cls.example_data_dir, item)
508
+ if os.path.isfile(item_path):
509
+ print(f" File: {item} ({os.path.getsize(item_path)} bytes)")
510
+ else:
511
+ print(f" Directory: {item}")
512
+ else:
513
+ print(f"Example data directory does not exist: {cls.example_data_dir}")
514
+
515
  @classmethod
516
  def tearDownClass(cls):
517
  """Clean up test environment after running tests."""
tools/file_redaction.py CHANGED
@@ -3954,6 +3954,21 @@ def redact_text_pdf(
3954
  )
3955
  nlp_analyser.registry.add_recognizer(new_custom_fuzzy_recogniser)
3956
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3957
  # Open with Pikepdf to get text lines
3958
  pikepdf_pdf = Pdf.open(file_path)
3959
  number_of_pages = len(pikepdf_pdf.pages)
 
3954
  )
3955
  nlp_analyser.registry.add_recognizer(new_custom_fuzzy_recogniser)
3956
 
3957
+ # Debug: Print file path information before opening
3958
+ print(f"Debug: Attempting to open PDF file: {file_path}")
3959
+ print(f"Debug: File exists: {os.path.exists(file_path)}")
3960
+ print(f"Debug: Absolute path: {os.path.abspath(file_path)}")
3961
+ if os.path.exists(file_path):
3962
+ print(f"Debug: File size: {os.path.getsize(file_path)} bytes")
3963
+ else:
3964
+ print(f"Debug: File not found! Current working directory: {os.getcwd()}")
3965
+ print(
3966
+ f"Debug: Directory contents: {os.listdir(os.path.dirname(file_path) if os.path.dirname(file_path) else '.')}"
3967
+ )
3968
+ raise FileNotFoundError(
3969
+ f"Failed to open file '{file_path}'. File does not exist."
3970
+ )
3971
+
3972
  # Open with Pikepdf to get text lines
3973
  pikepdf_pdf = Pdf.open(file_path)
3974
  number_of_pages = len(pikepdf_pdf.pages)