273 lines
6.8 KiB
Perl
Executable File
273 lines
6.8 KiB
Perl
Executable File
#!/usr/bin/env perl
|
|
|
|
# Copyright (C) 2026 Nicolás Ortega Froysa <nicolas@ortegas.org> All rights reserved.
|
|
# Author: Nicolás Ortega Froysa <nicolas@ortegas.org>
|
|
#
|
|
# This software is provided 'as-is', without any express or implied
|
|
# warranty. In no event will the authors be held liable for any damages
|
|
# arising from the use of this software.
|
|
#
|
|
# Permission is granted to anyone to use this software for any purpose,
|
|
# including commercial applications, and to alter it and redistribute it
|
|
# freely, subject to the following restrictions:
|
|
#
|
|
# 1. The origin of this software must not be misrepresented; you must not
|
|
# claim that you wrote the original software. If you use this software
|
|
# in a product, an acknowledgment in the product documentation would be
|
|
# appreciated but is not required.
|
|
#
|
|
# 2. Altered source versions must be plainly marked as such, and must not be
|
|
# misrepresented as being the original software.
|
|
#
|
|
# 3. This notice may not be removed or altered from any source
|
|
# distribution.
|
|
|
|
use strict;
|
|
use warnings;
|
|
use LWP::UserAgent;
|
|
use JSON::PP;
|
|
use File::Spec;
|
|
use File::HomeDir;
|
|
use File::Path qw(make_path);
|
|
use Math::Base36 ':all';
|
|
|
|
my $VERSION = '0.1';
|
|
my $PROG_NAME = 'ccc';
|
|
my $arg = shift;
|
|
|
|
unless(defined($arg)) {
|
|
print STDERR "No arguments given.\n\n";
|
|
print_usage();
|
|
exit 1;
|
|
} elsif($arg eq '--help' || $arg eq '-h') {
|
|
print_help();
|
|
exit;
|
|
} elsif($arg eq '-l' || $arg eq '--load-cache') {
|
|
my $cache_dir = get_xdg_cache_dir();
|
|
make_path($cache_dir) unless -d $cache_dir;
|
|
my $cache_file = File::Spec->catfile($cache_dir, 'section_map.json');
|
|
build_section_map($cache_file);
|
|
exit;
|
|
} elsif($arg eq '-v' || $arg eq '--version') {
|
|
print "$PROG_NAME v$VERSION\n";
|
|
exit;
|
|
}
|
|
|
|
my ($start_section, $end_section);
|
|
if($arg =~ /^(\d+)-(\d+)$/) {
|
|
$start_section = $1;
|
|
$end_section = $2;
|
|
if($start_section > $end_section) {
|
|
print STDERR "Error: Start section must be <= end section\n";
|
|
exit 1;
|
|
}
|
|
} elsif($arg =~ /^\d+$/) {
|
|
$start_section = $end_section = $arg;
|
|
} else {
|
|
print STDERR "Error: Invalid section format. Use a number (e.g. 270) or range (e.g. 1691-1698)\n";
|
|
exit 1;
|
|
}
|
|
|
|
my $cache_dir = get_xdg_cache_dir();
|
|
make_path($cache_dir) unless -d $cache_dir;
|
|
|
|
my $cache_file = File::Spec->catfile($cache_dir, 'section_map.json');
|
|
my $section_map = load_cache($cache_file);
|
|
unless(scalar keys %$section_map) {
|
|
print STDERR "No cache available. Run with the '-l' or '--load-cache' option to build it first.\n";
|
|
exit 1;
|
|
}
|
|
|
|
# Fetch and display requested sections
|
|
my $found_any = 0;
|
|
for(my $i = $start_section; $i <= $end_section; $i++) {
|
|
my $section_info = $section_map->{$i};
|
|
if($section_info) {
|
|
fetch_and_display_section($i, $section_info);
|
|
$found_any = 1;
|
|
}
|
|
}
|
|
|
|
unless($found_any) {
|
|
if($start_section == $end_section) {
|
|
print STDERR "Error: Section $start_section not found in Catechism\n";
|
|
} else {
|
|
print STDERR "Error: No sections found in range $start_section-$end_section\n";
|
|
}
|
|
exit 1;
|
|
}
|
|
|
|
sub print_usage {
|
|
print "Usage:\n";
|
|
print " ccc <section|range>\n";
|
|
print " ccc [OPTION]\n";
|
|
}
|
|
|
|
sub print_help {
|
|
print_usage();
|
|
print "\nOptions:\n";
|
|
print " -l, --load-cache Build the section cache from Vatican server\n";
|
|
print " -h, --help Show this help message\n";
|
|
print " -v, --version Show version information\n";
|
|
print "\nExamples:\n";
|
|
print " ccc -l # Build cache\n";
|
|
print " ccc 270 # Single section\n";
|
|
print " ccc 1691-1698 # Range of sections\n";
|
|
}
|
|
|
|
sub load_cache {
|
|
my ($file) = @_;
|
|
if(-f $file) {
|
|
open my $fh, '<', $file or die "Cannot read cache: $!\n";
|
|
my $json_text = do { local $/; <$fh> };
|
|
close $fh;
|
|
return JSON::PP::decode_json($json_text);
|
|
}
|
|
return {};
|
|
}
|
|
|
|
sub save_cache {
|
|
my ($file, $data) = @_;
|
|
open my $fh, '>', $file or die "Cannot write cache: $!\n";
|
|
print $fh JSON::PP::encode_json($data);
|
|
close $fh;
|
|
}
|
|
|
|
sub build_section_map {
|
|
my ($cache_file) = @_;
|
|
my %section_map;
|
|
|
|
my $ua = LWP::UserAgent->new(timeout => 15);
|
|
my $base_url = 'https://www.vatican.va/archive/ENG0015';
|
|
|
|
print STDERR "Fetching Catechism sections...\n";
|
|
|
|
# Build list of all possible HTML files from base36 naming
|
|
my @filenames;
|
|
my $total_files = decode_base36('AE');
|
|
for my $i (1..$total_files) {
|
|
my $b36 = encode_base36($i);
|
|
push @filenames, "__P$b36.HTM";
|
|
}
|
|
|
|
my $count = 0;
|
|
my $consecutive_404s = 0;
|
|
foreach my $filename (@filenames) {
|
|
my $url = "$base_url/$filename";
|
|
retry_get:
|
|
my $response = $ua->get($url);
|
|
|
|
unless($response->is_success) {
|
|
$consecutive_404s++;
|
|
goto retry_get if $consecutive_404s < 3;
|
|
|
|
print STDERR "\n404 response on '$url'.\n";
|
|
exit 1;
|
|
}
|
|
|
|
$consecutive_404s = 0;
|
|
|
|
my $content = $response->content;
|
|
my @sections = extract_section_numbers($content);
|
|
foreach my $section (@sections) {
|
|
$section_map{$section} = $filename;
|
|
}
|
|
|
|
$count++;
|
|
|
|
# Print progress
|
|
my $progress = $count * 100 / $total_files;
|
|
print STDERR "\r$progress%";
|
|
}
|
|
|
|
print STDERR "\n";
|
|
save_cache($cache_file, \%section_map);
|
|
print STDERR "Cache built with " . scalar(keys %section_map) . " sections\n";
|
|
|
|
return \%section_map;
|
|
}
|
|
|
|
sub extract_section_numbers {
|
|
my ($html) = @_;
|
|
my @sections;
|
|
|
|
# Find section header numbers with Windows lines
|
|
while($html =~ /\r\n<[p|P] class=MsoNormal[^>]*>(<i[^>]*>)?(\d{1,4})/g) {
|
|
push @sections, $2;
|
|
}
|
|
|
|
return @sections;
|
|
}
|
|
|
|
sub fetch_and_display_section {
|
|
my ($section_num, $section_info) = @_;
|
|
|
|
my $ua = LWP::UserAgent->new(timeout => 10);
|
|
my $base_url = 'https://www.vatican.va/archive/ENG0015';
|
|
my $url = "$base_url/$section_info";
|
|
|
|
my $response = $ua->get($url);
|
|
|
|
unless($response->is_success) {
|
|
print STDERR "Error: Could not fetch section from Vatican\n";
|
|
exit 1;
|
|
}
|
|
|
|
my $content = $response->content;
|
|
|
|
# Extract the section content
|
|
my $section_content = extract_section_content($content, $section_num);
|
|
|
|
if($section_content) {
|
|
print $section_content;
|
|
} else {
|
|
print STDERR "Error: Could not parse section $section_num\n";
|
|
exit 1;
|
|
}
|
|
}
|
|
|
|
sub extract_section_content {
|
|
my ($html, $section_num) = @_;
|
|
|
|
# Convert HTML entities
|
|
$html =~ s/"/"/g;
|
|
$html =~ s/&/&/g;
|
|
$html =~ s/</</g;
|
|
$html =~ s/>/>/g;
|
|
$html =~ s/ / /g;
|
|
|
|
# Remove HTML tags but keep structure
|
|
$html =~ s/<[^>]+>//g;
|
|
|
|
# Remove footnote reference numbers like [1], [2], etc
|
|
$html =~ s/\[\d+\]//g;
|
|
|
|
# Find the section by number
|
|
my $pattern = '\b' . quotemeta($section_num) . '\s+';
|
|
|
|
if($html =~ /$pattern(.+?)(?=\b\d{3,4}\s+|$)/s) {
|
|
my $content = $1;
|
|
|
|
# Clean up excess whitespace
|
|
$content =~ s/\s+/ /g;
|
|
$content =~ s/^\s+|\s+$//g;
|
|
|
|
# Add line breaks for readability
|
|
$content =~ s/([.!?])\s+(?=[A-Z])/\n\n/g;
|
|
|
|
return "\033[1m$section_num\033[0m $content\n";
|
|
}
|
|
|
|
return undef;
|
|
}
|
|
|
|
sub get_xdg_cache_dir {
|
|
my $xdg_cache_home = $ENV{XDG_CACHE_HOME};
|
|
|
|
if(!defined $xdg_cache_home || $xdg_cache_home eq '') {
|
|
$xdg_cache_home = File::Spec->catdir(File::HomeDir->my_home, '.cache');
|
|
}
|
|
|
|
return File::Spec->catdir($xdg_cache_home, 'ccc');
|
|
}
|