From ede7007fbd0bb71543407f75f7ab276da6d57c22 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=A1s=20Ortega=20Froysa?= <nicolas@ortegas.org>
Date: Wed, 1 Apr 2026 11:04:21 +0200
Subject: [PATCH] Fix regex for finding sections.

---
 ccc | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)
diff --git a/ccc b/ccc
index 5a7c7d7..6d8c860 100755
--- a/ccc
+++ b/ccc
@@ -168,19 +168,12 @@ sub extract_section_numbers {
 	my ($html) = @_;
 	my @sections;
 
-	# Look for section numbers that appear after a <p> tag
-	# Windows line endings (\r\n) are used in the HTML
-	# Example: <p class=MsoNormal>199\r\n&quot;I believe in God...
-	while ($html =~ /<p[^>]*>(\d{1,4})[\r\n]+/g) {
-		my $num = $1;
-		# Only capture numbers in the valid CCC range (1-2865)
-		push @sections, $num if $num >= 1 && $num <= 3000;
+	# Find section header numbers with Windows lines
+	while($html =~ /\r\n<[p|P] class=MsoNormal[^>]*>(<i[^>]*>)?(\d{1,4})/g) {
+		push @sections, $2;
 	}
 
-	# Remove duplicates and return sorted
-	my %seen;
-	my @unique = grep { !$seen{$_}++ } @sections;
-	return sort {$a <=> $b} @unique;
+	return @sections;
 }
 
 sub fetch_and_display_section {