From ede7007fbd0bb71543407f75f7ab276da6d57c22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Ortega=20Froysa?= Date: Wed, 1 Apr 2026 11:04:21 +0200 Subject: [PATCH] Fix regex for finding sections. --- ccc | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/ccc b/ccc index 5a7c7d7..6d8c860 100755 --- a/ccc +++ b/ccc @@ -168,19 +168,12 @@ sub extract_section_numbers { my ($html) = @_; my @sections; - # Look for section numbers that appear after a

tag - # Windows line endings (\r\n) are used in the HTML - # Example:

199\r\n"I believe in God... - while ($html =~ /]*>(\d{1,4})[\r\n]+/g) { - my $num = $1; - # Only capture numbers in the valid CCC range (1-2865) - push @sections, $num if $num >= 1 && $num <= 3000; + # Find section header numbers with Windows lines + while($html =~ /\r\n<[p|P] class=MsoNormal[^>]*>(]*>)?(\d{1,4})/g) { + push @sections, $2; } - # Remove duplicates and return sorted - my %seen; - my @unique = grep { !$seen{$_}++ } @sections; - return sort {$a <=> $b} @unique; + return @sections; } sub fetch_and_display_section {