Compare commits

...

5 Commits

2 changed files with 15 additions and 20 deletions

View File

@@ -32,8 +32,7 @@ $ ccc 232-233
## Dependencies ## Dependencies
The script requires the following Perl modules (all included in standard Perl The script requires the following Perl modules:
distributions):
- `LWP::UserAgent` - For HTTP requests - `LWP::UserAgent` - For HTTP requests
- `JSON::PP` - For JSON parsing - `JSON::PP` - For JSON parsing
@@ -42,6 +41,9 @@ distributions):
- `File::Path` - For creating cache directory structures - `File::Path` - For creating cache directory structures
- `Math::Base36` - For calculating the name of the HTML files - `Math::Base36` - For calculating the name of the HTML files
The `Math::Base36` module is not a part of standard Perl distribuitions, but can
be installed with [cpan](https://www.cpan.org/modules/INSTALL.html).
## Licensing ## Licensing
This project is licensed under the terms & conditions of the Zlib license. See This project is licensed under the terms & conditions of the Zlib license. See

29
ccc
View File

@@ -192,19 +192,20 @@ sub extract_section_numbers {
my @sections; my @sections;
# Find section header numbers with Windows lines # Find section header numbers with Windows lines
while($html =~ /\r\n<[p|P] class=MsoNormal[^>]*>(<i[^>]*>)?(\d{1,4})/g) { while($html =~ /\r\n<[pP] class=MsoNormal[^>]*>(<i[^>]*>)?(\d{1,4})\s+/g) {
push @sections, $2; my $section_num = $2;
push @sections, $section_num;
} }
return @sections; return @sections;
} }
sub fetch_and_display_section { sub fetch_and_display_section {
my ($section_num, $section_info) = @_; my ($section_num, $section_page) = @_;
my $ua = LWP::UserAgent->new(timeout => 10); my $ua = LWP::UserAgent->new(timeout => 10);
my $base_url = 'https://www.vatican.va/archive/ENG0015'; my $base_url = 'https://www.vatican.va/archive/ENG0015';
my $url = "$base_url/$section_info"; my $url = "$base_url/$section_page";
my $response = $ua->get($url); my $response = $ua->get($url);
@@ -214,19 +215,10 @@ sub fetch_and_display_section {
} }
my $content = $response->content; my $content = $response->content;
print_section($content, $section_num);
# Extract the section content
my $section_content = extract_section_content($content, $section_num);
if($section_content) {
print $section_content;
} else {
print STDERR "Error: Could not parse section $section_num\n";
exit 1;
}
} }
sub extract_section_content { sub print_section {
my ($html, $section_num) = @_; my ($html, $section_num) = @_;
# Convert HTML entities # Convert HTML entities
@@ -255,10 +247,11 @@ sub extract_section_content {
# Add line breaks for readability # Add line breaks for readability
$content =~ s/([.!?])\s+(?=[A-Z])/\n\n/g; $content =~ s/([.!?])\s+(?=[A-Z])/\n\n/g;
return "\033[1m$section_num\033[0m $content\n"; print "\033[1m$section_num\033[0m $content\n";
} else {
print STDERR "Error: Could not parse section $section_num\n";
exit 1;
} }
return undef;
} }
sub get_xdg_cache_dir { sub get_xdg_cache_dir {