Compare commits
5 Commits
7bdc8dd59f
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 770af6735d | |||
| 08e675166c | |||
| da32b78e98 | |||
| 88b630bdf1 | |||
| d2b6312eac |
@@ -32,8 +32,7 @@ $ ccc 232-233
|
|||||||
|
|
||||||
## Dependencies
|
## Dependencies
|
||||||
|
|
||||||
The script requires the following Perl modules (all included in standard Perl
|
The script requires the following Perl modules:
|
||||||
distributions):
|
|
||||||
|
|
||||||
- `LWP::UserAgent` - For HTTP requests
|
- `LWP::UserAgent` - For HTTP requests
|
||||||
- `JSON::PP` - For JSON parsing
|
- `JSON::PP` - For JSON parsing
|
||||||
@@ -42,6 +41,9 @@ distributions):
|
|||||||
- `File::Path` - For creating cache directory structures
|
- `File::Path` - For creating cache directory structures
|
||||||
- `Math::Base36` - For calculating the name of the HTML files
|
- `Math::Base36` - For calculating the name of the HTML files
|
||||||
|
|
||||||
|
The `Math::Base36` module is not a part of standard Perl distribuitions, but can
|
||||||
|
be installed with [cpan](https://www.cpan.org/modules/INSTALL.html).
|
||||||
|
|
||||||
## Licensing
|
## Licensing
|
||||||
|
|
||||||
This project is licensed under the terms & conditions of the Zlib license. See
|
This project is licensed under the terms & conditions of the Zlib license. See
|
||||||
|
|||||||
29
ccc
29
ccc
@@ -192,19 +192,20 @@ sub extract_section_numbers {
|
|||||||
my @sections;
|
my @sections;
|
||||||
|
|
||||||
# Find section header numbers with Windows lines
|
# Find section header numbers with Windows lines
|
||||||
while($html =~ /\r\n<[p|P] class=MsoNormal[^>]*>(<i[^>]*>)?(\d{1,4})/g) {
|
while($html =~ /\r\n<[pP] class=MsoNormal[^>]*>(<i[^>]*>)?(\d{1,4})\s+/g) {
|
||||||
push @sections, $2;
|
my $section_num = $2;
|
||||||
|
push @sections, $section_num;
|
||||||
}
|
}
|
||||||
|
|
||||||
return @sections;
|
return @sections;
|
||||||
}
|
}
|
||||||
|
|
||||||
sub fetch_and_display_section {
|
sub fetch_and_display_section {
|
||||||
my ($section_num, $section_info) = @_;
|
my ($section_num, $section_page) = @_;
|
||||||
|
|
||||||
my $ua = LWP::UserAgent->new(timeout => 10);
|
my $ua = LWP::UserAgent->new(timeout => 10);
|
||||||
my $base_url = 'https://www.vatican.va/archive/ENG0015';
|
my $base_url = 'https://www.vatican.va/archive/ENG0015';
|
||||||
my $url = "$base_url/$section_info";
|
my $url = "$base_url/$section_page";
|
||||||
|
|
||||||
my $response = $ua->get($url);
|
my $response = $ua->get($url);
|
||||||
|
|
||||||
@@ -214,19 +215,10 @@ sub fetch_and_display_section {
|
|||||||
}
|
}
|
||||||
|
|
||||||
my $content = $response->content;
|
my $content = $response->content;
|
||||||
|
print_section($content, $section_num);
|
||||||
# Extract the section content
|
|
||||||
my $section_content = extract_section_content($content, $section_num);
|
|
||||||
|
|
||||||
if($section_content) {
|
|
||||||
print $section_content;
|
|
||||||
} else {
|
|
||||||
print STDERR "Error: Could not parse section $section_num\n";
|
|
||||||
exit 1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sub extract_section_content {
|
sub print_section {
|
||||||
my ($html, $section_num) = @_;
|
my ($html, $section_num) = @_;
|
||||||
|
|
||||||
# Convert HTML entities
|
# Convert HTML entities
|
||||||
@@ -255,10 +247,11 @@ sub extract_section_content {
|
|||||||
# Add line breaks for readability
|
# Add line breaks for readability
|
||||||
$content =~ s/([.!?])\s+(?=[A-Z])/\n\n/g;
|
$content =~ s/([.!?])\s+(?=[A-Z])/\n\n/g;
|
||||||
|
|
||||||
return "\033[1m$section_num\033[0m $content\n";
|
print "\033[1m$section_num\033[0m $content\n";
|
||||||
|
} else {
|
||||||
|
print STDERR "Error: Could not parse section $section_num\n";
|
||||||
|
exit 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return undef;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sub get_xdg_cache_dir {
|
sub get_xdg_cache_dir {
|
||||||
|
|||||||
Reference in New Issue
Block a user