Fix HTML file download.

This commit is contained in:
2026-04-01 09:42:34 +02:00
parent e06e5437be
commit 184b418d5b
2 changed files with 18 additions and 14 deletions

View File

@@ -19,6 +19,7 @@ distributions):
- `File::Spec` - For file path operations - `File::Spec` - For file path operations
- `File::HomeDir` - For home directory detection - `File::HomeDir` - For home directory detection
- `File::Path` - For creating cache directory structures - `File::Path` - For creating cache directory structures
- `Math::Base36` - For calculating the name of the HTML files
## Licensing ## Licensing

31
ccc
View File

@@ -29,6 +29,7 @@ use JSON::PP;
use File::Spec; use File::Spec;
use File::HomeDir; use File::HomeDir;
use File::Path qw(make_path); use File::Path qw(make_path);
use Math::Base36 ':all';
my $arg = shift or usage(); my $arg = shift or usage();
@@ -118,40 +119,42 @@ sub build_section_map {
print STDERR "Fetching Catechism sections...\n"; print STDERR "Fetching Catechism sections...\n";
# Build list of all possible HTML files based on hex naming # Build list of all possible HTML files from base36 naming
# Files are named __P1.HTM, __P2.HTM... __P9.HTM, __PA.HTM... __PF.HTM, __P10.HTM, etc.
my @filenames; my @filenames;
for my $i (1..1000) { my $total_files = decode_base36('AE');
my $hex = sprintf("%X", $i); for my $i (1..$total_files) {
push @filenames, "__P$hex.HTM"; my $b36 = encode_base36($i);
push @filenames, "__P$b36.HTM";
} }
my $count = 0; my $count = 0;
my $consecutive_404s = 0; my $consecutive_404s = 0;
foreach my $filename (@filenames) { foreach my $filename (@filenames) {
my $url = "$base_url/$filename"; my $url = "$base_url/$filename";
retry_get:
my $response = $ua->get($url); my $response = $ua->get($url);
unless($response->is_success) { unless($response->is_success) {
# Count consecutive 404s, stop after too many
$consecutive_404s++; $consecutive_404s++;
next if $consecutive_404s < 10; # Allow up to 9 consecutive 404s goto retry_get if $consecutive_404s < 3;
last;
print STDERR "\n404 response on '$url'.\n";
exit 1;
} }
$consecutive_404s = 0; # Reset counter on success $consecutive_404s = 0;
my $content = $response->content; my $content = $response->content;
# Extract all section numbers
my @sections = extract_section_numbers($content); my @sections = extract_section_numbers($content);
foreach my $section (@sections) { foreach my $section (@sections) {
$section_map{$section} = $filename; $section_map{$section} = $filename;
$count++;
} }
$count++;
# Print progress # Print progress
print STDERR "." if(scalar(keys %section_map) % 50 == 0); my $progress = $count * 100 / $total_files;
print STDERR "\r$progress%";
} }
print STDERR "\n"; print STDERR "\n";