Compare commits
4 Commits
7bdc8dd59f
...
08e675166c
| Author | SHA1 | Date | |
|---|---|---|---|
| 08e675166c | |||
| da32b78e98 | |||
| 88b630bdf1 | |||
| d2b6312eac |
29
ccc
29
ccc
@@ -192,19 +192,20 @@ sub extract_section_numbers {
|
|||||||
my @sections;
|
my @sections;
|
||||||
|
|
||||||
# Find section header numbers with Windows lines
|
# Find section header numbers with Windows lines
|
||||||
while($html =~ /\r\n<[p|P] class=MsoNormal[^>]*>(<i[^>]*>)?(\d{1,4})/g) {
|
while($html =~ /\r\n<[pP] class=MsoNormal[^>]*>(<i[^>]*>)?(\d{1,4})\s+/g) {
|
||||||
push @sections, $2;
|
my $section_num = $2;
|
||||||
|
push @sections, $section_num;
|
||||||
}
|
}
|
||||||
|
|
||||||
return @sections;
|
return @sections;
|
||||||
}
|
}
|
||||||
|
|
||||||
sub fetch_and_display_section {
|
sub fetch_and_display_section {
|
||||||
my ($section_num, $section_info) = @_;
|
my ($section_num, $section_page) = @_;
|
||||||
|
|
||||||
my $ua = LWP::UserAgent->new(timeout => 10);
|
my $ua = LWP::UserAgent->new(timeout => 10);
|
||||||
my $base_url = 'https://www.vatican.va/archive/ENG0015';
|
my $base_url = 'https://www.vatican.va/archive/ENG0015';
|
||||||
my $url = "$base_url/$section_info";
|
my $url = "$base_url/$section_page";
|
||||||
|
|
||||||
my $response = $ua->get($url);
|
my $response = $ua->get($url);
|
||||||
|
|
||||||
@@ -214,19 +215,10 @@ sub fetch_and_display_section {
|
|||||||
}
|
}
|
||||||
|
|
||||||
my $content = $response->content;
|
my $content = $response->content;
|
||||||
|
print_section($content, $section_num);
|
||||||
# Extract the section content
|
|
||||||
my $section_content = extract_section_content($content, $section_num);
|
|
||||||
|
|
||||||
if($section_content) {
|
|
||||||
print $section_content;
|
|
||||||
} else {
|
|
||||||
print STDERR "Error: Could not parse section $section_num\n";
|
|
||||||
exit 1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sub extract_section_content {
|
sub print_section {
|
||||||
my ($html, $section_num) = @_;
|
my ($html, $section_num) = @_;
|
||||||
|
|
||||||
# Convert HTML entities
|
# Convert HTML entities
|
||||||
@@ -255,10 +247,11 @@ sub extract_section_content {
|
|||||||
# Add line breaks for readability
|
# Add line breaks for readability
|
||||||
$content =~ s/([.!?])\s+(?=[A-Z])/\n\n/g;
|
$content =~ s/([.!?])\s+(?=[A-Z])/\n\n/g;
|
||||||
|
|
||||||
return "\033[1m$section_num\033[0m $content\n";
|
print "\033[1m$section_num\033[0m $content\n";
|
||||||
|
} else {
|
||||||
|
print STDERR "Error: Could not parse section $section_num\n";
|
||||||
|
exit 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return undef;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sub get_xdg_cache_dir {
|
sub get_xdg_cache_dir {
|
||||||
|
|||||||
Reference in New Issue
Block a user