Files
ccc/ccc

273 lines
6.8 KiB
Perl
Executable File

#!/usr/bin/env perl
# Copyright (C) 2026 Nicolás Ortega Froysa <nicolas@ortegas.org> All rights reserved.
# Author: Nicolás Ortega Froysa <nicolas@ortegas.org>
#
# This software is provided 'as-is', without any express or implied
# warranty. In no event will the authors be held liable for any damages
# arising from the use of this software.
#
# Permission is granted to anyone to use this software for any purpose,
# including commercial applications, and to alter it and redistribute it
# freely, subject to the following restrictions:
#
# 1. The origin of this software must not be misrepresented; you must not
# claim that you wrote the original software. If you use this software
# in a product, an acknowledgment in the product documentation would be
# appreciated but is not required.
#
# 2. Altered source versions must be plainly marked as such, and must not be
# misrepresented as being the original software.
#
# 3. This notice may not be removed or altered from any source
# distribution.
use strict;
use warnings;
use LWP::UserAgent;
use JSON::PP;
use File::Spec;
use File::HomeDir;
use File::Path qw(make_path);
use Math::Base36 ':all';
my $VERSION = '0.1';
my $PROG_NAME = 'ccc';
my $arg = shift;
unless(defined($arg)) {
print STDERR "No arguments given.\n\n";
print_usage();
exit 1;
} elsif($arg eq '--help' || $arg eq '-h') {
print_help();
exit;
} elsif($arg eq '-l' || $arg eq '--load-cache') {
my $cache_dir = get_xdg_cache_dir();
make_path($cache_dir) unless -d $cache_dir;
my $cache_file = File::Spec->catfile($cache_dir, 'section_map.json');
build_section_map($cache_file);
exit;
} elsif($arg eq '-v' || $arg eq '--version') {
print "$PROG_NAME v$VERSION\n";
exit;
}
my ($start_section, $end_section);
if($arg =~ /^(\d+)-(\d+)$/) {
$start_section = $1;
$end_section = $2;
if($start_section > $end_section) {
print STDERR "Error: Start section must be <= end section\n";
exit 1;
}
} elsif($arg =~ /^\d+$/) {
$start_section = $end_section = $arg;
} else {
print STDERR "Error: Invalid section format. Use a number (e.g. 270) or range (e.g. 1691-1698)\n";
exit 1;
}
my $cache_dir = get_xdg_cache_dir();
make_path($cache_dir) unless -d $cache_dir;
my $cache_file = File::Spec->catfile($cache_dir, 'section_map.json');
my $section_map = load_cache($cache_file);
unless(scalar keys %$section_map) {
print STDERR "No cache available. Run with the '-l' or '--load-cache' option to build it first.\n";
exit 1;
}
# Fetch and display requested sections
my $found_any = 0;
for(my $i = $start_section; $i <= $end_section; $i++) {
my $section_info = $section_map->{$i};
if($section_info) {
fetch_and_display_section($i, $section_info);
$found_any = 1;
}
}
unless($found_any) {
if($start_section == $end_section) {
print STDERR "Error: Section $start_section not found in Catechism\n";
} else {
print STDERR "Error: No sections found in range $start_section-$end_section\n";
}
exit 1;
}
sub print_usage {
print "Usage:\n";
print " ccc <section|range>\n";
print " ccc [OPTION]\n";
}
sub print_help {
print_usage();
print "\nOptions:\n";
print " -l, --load-cache Build the section cache from Vatican server\n";
print " -h, --help Show this help message\n";
print " -v, --version Show version information\n";
print "\nExamples:\n";
print " ccc -l # Build cache\n";
print " ccc 270 # Single section\n";
print " ccc 1691-1698 # Range of sections\n";
}
sub load_cache {
my ($file) = @_;
if(-f $file) {
open my $fh, '<', $file or die "Cannot read cache: $!\n";
my $json_text = do { local $/; <$fh> };
close $fh;
return JSON::PP::decode_json($json_text);
}
return {};
}
sub save_cache {
my ($file, $data) = @_;
open my $fh, '>', $file or die "Cannot write cache: $!\n";
print $fh JSON::PP::encode_json($data);
close $fh;
}
sub build_section_map {
my ($cache_file) = @_;
my %section_map;
my $ua = LWP::UserAgent->new(timeout => 15);
my $base_url = 'https://www.vatican.va/archive/ENG0015';
print STDERR "Fetching Catechism sections...\n";
# Build list of all possible HTML files from base36 naming
my @filenames;
my $total_files = decode_base36('AE');
for my $i (1..$total_files) {
my $b36 = encode_base36($i);
push @filenames, "__P$b36.HTM";
}
my $count = 0;
my $consecutive_404s = 0;
foreach my $filename (@filenames) {
my $url = "$base_url/$filename";
retry_get:
my $response = $ua->get($url);
unless($response->is_success) {
$consecutive_404s++;
goto retry_get if $consecutive_404s < 3;
print STDERR "\n404 response on '$url'.\n";
exit 1;
}
$consecutive_404s = 0;
my $content = $response->content;
my @sections = extract_section_numbers($content);
foreach my $section (@sections) {
$section_map{$section} = $filename;
}
$count++;
# Print progress
my $progress = $count * 100 / $total_files;
print STDERR "\r$progress%";
}
print STDERR "\n";
save_cache($cache_file, \%section_map);
print STDERR "Cache built with " . scalar(keys %section_map) . " sections\n";
return \%section_map;
}
sub extract_section_numbers {
my ($html) = @_;
my @sections;
# Find section header numbers with Windows lines
while($html =~ /\r\n<[p|P] class=MsoNormal[^>]*>(<i[^>]*>)?(\d{1,4})/g) {
push @sections, $2;
}
return @sections;
}
sub fetch_and_display_section {
my ($section_num, $section_info) = @_;
my $ua = LWP::UserAgent->new(timeout => 10);
my $base_url = 'https://www.vatican.va/archive/ENG0015';
my $url = "$base_url/$section_info";
my $response = $ua->get($url);
unless($response->is_success) {
print STDERR "Error: Could not fetch section from Vatican\n";
exit 1;
}
my $content = $response->content;
# Extract the section content
my $section_content = extract_section_content($content, $section_num);
if($section_content) {
print $section_content;
} else {
print STDERR "Error: Could not parse section $section_num\n";
exit 1;
}
}
sub extract_section_content {
my ($html, $section_num) = @_;
# Convert HTML entities
$html =~ s/&quot;/"/g;
$html =~ s/&amp;/&/g;
$html =~ s/&lt;/</g;
$html =~ s/&gt;/>/g;
$html =~ s/&nbsp;/ /g;
# Remove HTML tags but keep structure
$html =~ s/<[^>]+>//g;
# Remove footnote reference numbers like [1], [2], etc
$html =~ s/\[\d+\]//g;
# Find the section by number
my $pattern = '\b' . quotemeta($section_num) . '\s+';
if($html =~ /$pattern(.+?)(?=\b\d{3,4}\s+|$)/s) {
my $content = $1;
# Clean up excess whitespace
$content =~ s/\s+/ /g;
$content =~ s/^\s+|\s+$//g;
# Add line breaks for readability
$content =~ s/([.!?])\s+(?=[A-Z])/\n\n/g;
return "\033[1m$section_num\033[0m $content\n";
}
return undef;
}
sub get_xdg_cache_dir {
my $xdg_cache_home = $ENV{XDG_CACHE_HOME};
if(!defined $xdg_cache_home || $xdg_cache_home eq '') {
$xdg_cache_home = File::Spec->catdir(File::HomeDir->my_home, '.cache');
}
return File::Spec->catdir($xdg_cache_home, 'ccc');
}