wikiheaders.pl: A bunch of work on correct wordwrap and (de)wikify.

2021-07-14 08:11:18 -04:00 · 2021-07-14 08:11:18 -04:00 · d115857c6f
parent 0ac13dc27f
commit d115857c6f
1 changed files with 108 additions and 62 deletions
--- a/build-scripts/wikiheaders.pl
+++ b/build-scripts/wikiheaders.pl
@ -103,27 +103,36 @@ sub wordwrap {

    my $retval = '';

-    while ($str =~ s/(.*?)(\n+\`\`\`.*?\`\`\`\n+|\n+\<syntaxhighlight.*?\<\/syntaxhighlight\>\n+)//ms) {
+    #print("\n\nWORDWRAP:\n\n$str\n\n\n");
+
+    while ($str =~ s/(.*?)(\`\`\`.*?\`\`\`|\<syntaxhighlight.*?\<\/syntaxhighlight\>)//ms) {
+        #print("\n\nWORDWRAP BLOCK:\n\n$1\n\n ===\n\n$2\n\n\n");
        $retval .= wordwrap_paragraphs($1); # wrap it.
-        $retval .= $2;  # don't wrap it.
+        $retval .= "$2\n\n";  # don't wrap it.
    }

    $retval .= wordwrap_paragraphs($str);  # wrap what's left.
    $retval =~ s/\n+$//;
+
+    #print("\n\nWORDWRAP DONE:\n\n$retval\n\n\n");
    return $retval;
 }

-
-sub wikify {
+# This assumes you're moving from Markdown (in the Doxygen data) to Wiki, which
+#  is why the 'md' section is so sparse.
+sub wikify_chunk {
    my $wikitype = shift;
    my $str = shift;
+    my $codelang = shift;
+    my $code = shift;
+
+    #print("\n\nWIKIFY CHUNK:\n\n$str\n\n\n");

    if ($wikitype eq 'mediawiki') {
        # Convert obvious SDL things to wikilinks.
        $str =~ s/\b(SDL_[a-zA-Z0-9_]+)/[[$1]]/gms;

        # Make some Markdown things into MediaWiki...
-        $str =~ s/\`\`\`(c\+\+|c)(.*?)\`\`\`/<syntaxhighlight lang='$1'>$2<\/syntaxhighlight>/gms;

        # <code></code> is also popular.  :/
        $str =~ s/\`(.*?)\`/<code>$1<\/code>/gms;
@ -139,71 +148,55 @@ sub wikify {

        # bullets
        $str =~ s/^\- /* /gm;
+
+        if (defined $code) {
+            $str .= "<syntaxhighlight lang='$codelang'>$code<\/syntaxhighlight>";
+        }
    } elsif ($wikitype eq 'md') {
        # Convert obvious SDL things to wikilinks.
        $str =~ s/\b(SDL_[a-zA-Z0-9_]+)/[$1]($1)/gms;
+        if (defined $code) {
+            $str .= "```$codelang$code```";
+        }
    }
+
+    #print("\n\nWIKIFY CHUNK DONE:\n\n$str\n\n\n");
+
    return $str;
 }

-sub dewikify {
+sub wikify {
    my $wikitype = shift;
    my $str = shift;
-    return '' if not defined $str;
-    my @lines = split /\n/, $str;
-    return '' if scalar(@lines) == 0;
+    my $retval = '';
+
+    #print("WIKIFY WHOLE:\n\n$str\n\n\n");
+
+    while ($str =~ s/\A(.*?)\`\`\`(c\+\+|c)(.*?)\`\`\`//ms) {
+        $retval .= wikify_chunk($wikitype, $1, $2, $3);
+    }
+    $retval .= wikify_chunk($wikitype, $str, undef, undef);
+
+    #print("WIKIFY WHOLE DONE:\n\n$retval\n\n\n");
+
+    return $retval;
+}
+
+
+sub dewikify_chunk {
+    my $wikitype = shift;
+    my $str = shift;
+    my $codelang = shift;
+    my $code = shift;
+
+    #print("\n\nDEWIKIFY CHUNK:\n\n$str\n\n\n");

-    my $iwikitype = 0;
    if ($wikitype eq 'mediawiki') {
-        $iwikitype = 1;
-    } elsif ($wikitype eq 'md') {
-        $iwikitype = 2;
-    } else {
-        die("Unexpected wikitype '$wikitype'\n");
-    }
-
-    while (1) {
-        my $l = shift @lines;
-        last if not defined $l;
-        chomp($l);
-        $l =~ s/\A\s*//;
-        $l =~ s/\s*\Z//;
-        next if ($l eq '');
-        next if ($iwikitype == 1) and ($l =~ /\A\= .*? \=\Z/);
-        next if ($iwikitype == 1) and ($l =~ /\A\=\= .*? \=\=\Z/);
-        next if ($iwikitype == 2) and ($l =~ /\A\#\# /);
-        unshift @lines, $l;
-        last;
-    }
-
-    while (1) {
-        my $l = pop @lines;
-        last if not defined $l;
-        chomp($l);
-        $l =~ s/\A\s*//;
-        $l =~ s/\s*\Z//;
-        next if ($l eq '');
-        push @lines, $l;
-        last;
-    }
-
-    $str = '';
-    foreach (@lines) {
-        chomp;
-        s/\A\s*//;
-        s/\s*\Z//;
-        $str .= "$_\n";
-    }
-
-    if ($iwikitype == 1) {  #($wikitype eq 'mediawiki')
        # Doxygen supports Markdown (and it just simply looks better than MediaWiki
-        # when looking at the raw headers, so do some conversions here as necessary.
+        # when looking at the raw headers), so do some conversions here as necessary.

        $str =~ s/\[\[(SDL_[a-zA-Z0-9_]+)\]\]/$1/gms;  # Dump obvious wikilinks.

-        # convert mediawiki syntax highlighting to Markdown backticks.
-        $str =~ s/\<syntaxhighlight lang='?(.*?)'?>(.*?)<\/syntaxhighlight>/```$1$2```/gms;
-
        # <code></code> is also popular.  :/
        $str =~ s/\<code>(.*?)<\/code>/`$1`/gms;

@ -220,9 +213,36 @@ sub dewikify {
        $str =~ s/^\* /- /gm;
    }

+    if (defined $code) {
+        $str .= "```$codelang$code```";
+    }
+
+    #print("\n\nDEWIKIFY CHUNK DONE:\n\n$str\n\n\n");
+
    return $str;
 }

+sub dewikify {
+    my $wikitype = shift;
+    my $str = shift;
+    return '' if not defined $str;
+
+    #print("DEWIKIFY WHOLE:\n\n$str\n\n\n");
+
+    $str =~ s/\A[\s\n]*\= .*? \=\s*?\n+//ms;
+    $str =~ s/\A[\s\n]*\=\= .*? \=\=\s*?\n+//ms;
+
+    my $retval = '';
+    while ($str =~ s/\A(.*?)<syntaxhighlight lang='?(.*?)'?>(.*?)<\/syntaxhighlight\>//ms) {
+        $retval .= dewikify_chunk($wikitype, $1, $2, $3);
+    }
+    $retval .= dewikify_chunk($wikitype, $str, undef, undef);
+
+    #print("DEWIKIFY WHOLE DONE:\n\n$retval\n\n\n");
+
+    return $retval;
+}
+
 sub usage {
    die("USAGE: $0 <source code git clone path> <wiki git clone path> [--copy-to-headers|--copy-to-wiki] [--warn-about-missing]\n\n");
 }
@ -280,8 +300,23 @@ while (readdir(DH)) {
            chomp;
            push @templines, $_;
            last if /\A\s*\*\/\Z/;
-            s/\A\s*\*\s*//;
-            $str .= "$_\n";
+            if (s/\A\s*\*\s*\`\`\`/```/) {  # this is a hack, but a lot of other code relies on the whitespace being trimmed, but we can't trim it in code blocks...
+                $str .= "$_\n";
+                while (<FH>) {
+                    chomp;
+                    push @templines, $_;
+                    s/\A\s*\*\s?//;
+                    if (s/\A\s*\`\`\`/```/) {
+                        $str .= "$_\n";
+                        last;
+                    } else {
+                        $str .= "$_\n";
+                    }
+                }
+            } else {
+                s/\A\s*\*\s*//;
+                $str .= "$_\n";
+            }
        }

        my $decl = <FH>;
@ -341,6 +376,7 @@ while (readdir(DH)) {
        }

        #print("$fn:\n$str\n\n");
+
        $headerfuncs{$fn} = $str;
        $headerdecls{$fn} = $decl;
        $headerfuncslocation{$fn} = $dent;
@ -424,8 +460,7 @@ while (readdir(DH)) {
            die("Unexpected wiki file type. Fixme!\n");
        }

-        my $str = ($current_section eq 'Code Examples') ? $orig : $_;
-        $sections{$current_section} .= "$str\n";
+        $sections{$current_section} .= "$orig\n";
    }
    close(FH);

@ -646,14 +681,25 @@ if ($copy_direction == 1) {  # --copy-to-headers
        @doxygenlines = (@briefsplit, @doxygenlines);

        my $remarks = '';
+        # !!! FIXME: wordwrap and wikify might handle this, now.
        while (@doxygenlines) {
            last if $doxygenlines[0] =~ /\A\\/;  # some sort of doxygen command, assume we're past the general remarks.
            my $l = shift @doxygenlines;
-            $l =~ s/\A\s*//;
-            $l =~ s/\s*\Z//;
-            $remarks .= "$l\n";
+            if ($l =~ /\A\`\`\`/) {  # syntax highlighting, don't reformat.
+                $remarks .= "$l\n";
+                while ((@doxygenlines) && (not $l =~ /\`\`\`\Z/)) {
+                    $l = shift @doxygenlines;
+                    $remarks .= "$l\n";
+                }
+            } else {
+                $l =~ s/\A\s*//;
+                $l =~ s/\s*\Z//;
+                $remarks .= "$l\n";
+            }
        }

+        #print("REMARKS:\n\n $remarks\n\n");
+
        $remarks = wordwrap(wikify($wikitype, $remarks));
        $remarks =~ s/\A\s*//;
        $remarks =~ s/\s*\Z//;