semlit_pl.txt


00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363
00364
00365
00366
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416
00417
00418
00419
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454
00455
00456
00457
00458
00459
00460
00461
00462
00463
00464
00465
00466
00467
00468
00469
00470
00471
00472
00473
00474
00475
00476
00477
00478
00479
00480
00481
00482
00483
00484
00485
00486
00487
00488
00489
00490
00491
00492
00493
00494
00495
00496
  #!/usr/local/bin/perl -w
  
  # repeat the shebang for doc purposes (the real shebang needs to be line 1)
  #!/usr/local/bin/perl -w
  
  # semlit.pl - program to implement Steve Ford's "Semi-Literate Documentation".
  # See http://wiki.geeky-boy.com/w/index.php?title=Sford_Semi-literate_documentation
  
  # This work is dedicated to the public domain under CC0 1.0 Universal:
  # http://creativecommons.org/publicdomain/zero/1.0/
  # 
  # To the extent possible under law, Steven Ford has waived all copyright
  # and related or neighboring rights to this work. In other words, you can 
  # use this code for any purpose without any restrictions.
  # This work is published from: United States.
  # Project home: https://github.com/fordsfords/semlit
  
  use strict;
  use English;  # allow long names for special variables
  use Getopt::Long qw(:config no_ignore_case bundling);
  use Text::Tabs;
  use File::Basename;
  
  # globals
  
  my $tool = "semlit.pl";
  my $usage_str = "$tool [-h] [-d delim] [-f fs] [-I dir] [-t tabstop] [files]";
  
  my $main_doc_filename;
  my $cur_file_name = "";
  my $cur_file_linenum = 0;
  
  my $global_src_buffer = "";
  
  my $doc_html_filename;
  my $doc_html_outfd;
  
  my $src_html_outfd;
  
  my %srcblocks;  # lines of source named blocks
  my %active_srcblocks;  # source blocks being added to at this moment
  my %block_numrefs;  # number of doc references to each source block
  
  my $exit_status = 0;  # assume success
  
  # process command options and parameters.  See help().
  
  my $o_help;        # -h
  my $o_fs = ",";    # -f
  my $o_delim = "="; # -d
  my $o_initialsource = "blank.html";  # -i
  my @o_incdirs = (".");  # GetOptions will append additional dirs for each "-I".
  $tabstop = 4;  # defined and used by Text::Tabs - see "expand()" function
  
  GetOptions("h"=> \$o_help, "d=s" => \$o_delim, "f=s" => \$o_fs, "i=s" => \$o_initialsource, "I=s" => \@o_incdirs, "t=i" => \$tabstop) || usage("Error in GetOptions");
  if (defined($o_help)) {
      help();  # if -h had a value, it would be in $opt_h
  }
  
  if (scalar(@ARGV) != 1) {
      usage("Error, .sldoc file missing");
  }
  $main_doc_filename = $ARGV[0];
  if ( ! -r "$main_doc_filename" ) {
      usage("Error, could not read '$main_doc_filename'");
  }
  
  # open main doc file
  
  $doc_html_filename = basename($main_doc_filename) . ".html";  # strip directory
  open($doc_html_outfd, ">", $doc_html_filename) || die "Error, could not open htmlfile '$doc_html_filename'";
  
  # Main loop; read each line in doc file
  
  my $doc_html_str = process_doc_file($main_doc_filename);
  
  # fix up multiple source references
  foreach my $blockname (keys(%block_numrefs)) {
      if ($block_numrefs{$blockname} > 1) {
          # First ref points to next and last
          my $refnum = 1;
          my $this_block = $blockname . "_ref_" . ($refnum);
          my $first_block = $this_block;
          my $last_block = $blockname . "_ref_" . $block_numrefs{$blockname};
          my $next_block = $blockname . "_ref_" . ($refnum + 1);
          $doc_html_str =~ s/<\/pre><!-- endblock $this_block -->/<a href="#$next_block">next ref<\/a>  <a href="#$last_block">last ref<\/a><\/pre>/s;
  
          # Middle refs point to previous and next
          my $prev_block = $this_block;
          for ($refnum = 2; $refnum <= $block_numrefs{$blockname} - 1; $refnum ++) {
              # middle refs point to prev and next
              $this_block = $blockname . "_ref_" . ($refnum);
              $next_block = $blockname . "_ref_" . ($refnum + 1);
              $doc_html_str =~ s/<\/pre><!-- endblock $this_block -->/<a href="#$next_block">next ref<\/a>  <a href="#$prev_block">prev ref<\/a><\/pre>/s;
              $prev_block = $this_block;
          }
  
          # last ref points to first and previous
          $this_block = $blockname . "_ref_" . ($refnum);
          $doc_html_str =~ s/<\/pre><!-- endblock $this_block -->/<a href="#$first_block">first ref<\/a>  <a href="#$prev_block">prev ref<\/a><\/pre>/s;
      }
  }
  
  # write doc html file
  
  print $doc_html_outfd "$doc_html_str\n";
  close($doc_html_outfd);
  
  # Create frameset page
  
  my $index_o_file;
  open($index_o_file, ">", "index.html") || die "Error, could not open htmlfile 'index.html'";
  print $index_o_file <<__EOF__;
  <html><head></head>
  <frameset cols="50%,*">
  <frame src="$doc_html_filename" name="doc">
  <frame src="$o_initialsource" name="src">
  </frameset>
  </html>
  __EOF__
  close($index_o_file);
  
  # Create blank page for initial source frame
  
  my $blank_o_file;
  open($blank_o_file, ">", "blank.html") || die "Error, could not open htmlfile 'blank.html'";
  print $blank_o_file "<html><head></head><body>Click a source line number to see the line in context.</body></html>\n";
  close($blank_o_file);
  
  # All done.
  exit($exit_status);
  
  
  # End of main program, start subroutines.
  
  
  sub process_doc_file {
      my ($doc_filename) = @_;
      my $doc_infd;
  
      # open source file, using one or more search directories
  
      my $incdir;
      my $open_success = 0;
      foreach $incdir (@o_incdirs) {
          if (open($doc_infd, "<", "$incdir/$doc_filename")) {
              $open_success = 1;
              last;  # break out of foreach
          }
      }
      if (! $open_success) {
          err("could not open doc file '$doc_filename', skipping");
          return;
      }
  
      # Read entire file into memory
  
      my @doctexts = <$doc_infd>;
      close($doc_infd);
      chomp(@doctexts);  # remove line delims from every line
      my $num_lines = scalar(@doctexts);  # count lines in file
      my $doctext = join("\n", @doctexts) . "\n";  # combine as a single string
      $doctext =~ s/\r//gs;  # remove carriage returns, if any
  
      my ($save_doc_filename, $save_doc_linenum) = ($cur_file_name, $cur_file_linenum);
      ($cur_file_name, $cur_file_linenum) = ($doc_filename, 0);
  
      # process semlit commands
      while ($doctext =~ /$o_delim\s*semlit\s*$o_fs\s*([^$o_delim]+)$o_delim/is) {
          my $cmd = $1;  # text of command (minus standard stuff)
          my $prefix = $PREMATCH;  # text preceiding the command
          my $suffix = $POSTMATCH;  # text after the command
  
          # calculate line number containing the start of this semlit command
          $cur_file_linenum = $num_lines - scalar(my @t = split("\n", $suffix)) + 1;
  
          my $repl = semlit_cmd($cmd);
  
          # Commands are removed, and often replaced with some result
          $doctext = $prefix . $repl . $suffix;
      }  # while
  
      ($cur_file_name, $cur_file_linenum) = ($save_doc_filename, $save_doc_linenum);
  
      return $doctext;
  }  # process_doc_file
  
  
  # Parse and execute semlit command
  sub semlit_cmd {
      my ($cmd) = @_;
  
      # semlit tabstop - doc: source tab expansion
      if ($cmd =~ /^tabstop\s*$o_fs\s*([^\s$o_fs]+)\s*$/i) {
          if ($1 =~ /^\d+$/) {
              $tabstop = $1;  # used by Text::Tabs
              return "";
          } else {
              err("Tabstop value '$1' must be numeric");
              return "";
          }
      }
  
      # semlit srcfile - doc: read and process source file
      elsif ($cmd =~ /^srcfile\s*$o_fs\s*([^\s$o_fs]+)\s*$o_fs\s*([^\s$o_fs]+)\s*$/i) {
          return process_src_file($1, $2);
      }
  
      # semlit initialsource - doc: set initial source frame
      elsif ($cmd =~ /^initialsource\s*$o_fs\s*([^\s$o_fs]+)\s*/i) {
          $o_initialsource = $1;
          return "";
      }
  
      # semlit include - doc: read and process doc file
      elsif ($cmd =~ /^include\s*$o_fs\s*([^\s$o_fs]+)\s*$/i) {
          return process_doc_file($1);
      }
  
      # semlit insert - doc: insert a source block
      elsif ($cmd =~ /^insert\s*$o_fs\s*([^\s$o_fs]+)\s*$/i) {
          my $block_name = $1;
          if (exists($srcblocks{$block_name})) {
              my $num_refs = 1;
              my $block_ref_name = $block_name;
              if (defined($block_numrefs{$block_name})) {
                  $num_refs = $block_numrefs{$block_name} + 1;
                  $block_ref_name = $block_name . "_ref_$num_refs";
              }
              $block_numrefs{$block_name} = $num_refs;
  
              my $block_str = $srcblocks{$block_name};
              return <<__EOF__;
  <a name="$block_ref_name" id="$block_ref_name"><\/a>
  <small><pre>
  $block_str
  <\/pre><!-- endblock $block_ref_name --></small>\n
  __EOF__
          } else {
              err("attempt to insert block named '$block_name' but block not defined");
              return "";
          }
      }
  
      # semlit block - src: start a named block of source
      elsif ($cmd =~ /^block\s*$o_fs\s*([^\s$o_fs]+)\s*$/i) {
          my $block_name = $1;
          if (defined($srcblocks{$block_name})) {
              err("block '$block_name' already defined");
              return "";
          }
          $srcblocks{$block_name} = "";
          $block_numrefs{$block_name} = 0;
          $active_srcblocks{$block_name} = $cur_file_linenum;
          
          $global_src_buffer = "<span name=\"$block_name\" id=\"$block_name\"><\/span>";
          return "";
      }
  
      # semlit endblock - src: end a named block of source
      elsif ($cmd =~ /^endblock\s*$o_fs\s*([^\s$o_fs]+)\s*$/i) {
          my $block_name = $1;
          if (exists($active_srcblocks{$block_name})) {
              delete($active_srcblocks{$block_name});
              $srcblocks{$block_name} =~ s/\n$//s;
              return "";
          } else {
              err("found endblock for '$block_name', which is not active");
              return "";
          }
      }
  
      # semlit tooltip - create hover over text for a phrase
      elsif ($cmd =~ /^tooltip\s*$o_fs\s*([^\s$o_fs]+)\s*$o_fs\s*([^\s$o_fs]+)\s*$/i) {
          my $text_source = $1;
          my $text_link = $2;
          my $contents = file_get_contents($text_source);
          return <<__EOF__;
  <a href="#" title="$contents" style="color:2222ee;border-bottom:1px dotted #2222ee;text-decoration: none;">$text_link</a>
  __EOF__
      }
  
  
      # unrecognized semlit
      else {
          err("semlit command '$cmd' invalid or malformed");
          return "";
      }
  }  # semlit_cmd
  
  
  # process semlit srcfile command
  sub process_src_file {
      my ($src_filename, $plain_src_filename) = @_;
      my $slsrc_infd;
      my $src_outfd;
      my $src_lines_td;
      my $src_content_td;
  
      # open source file, using one or more search directories
      my $incdir;
      my $open_success = 0;
      foreach $incdir (@o_incdirs) {
          if (open($slsrc_infd, "<", "$incdir/$src_filename")) {
              $open_success = 1;
              last;  # break out of foreach
          }
      }
      if (! $open_success) {
          err("could not open src file '$src_filename', skipping");
          return "";
      }
  
      # create and write initial content to html-ified source file
      if (! open($src_html_outfd, ">", "$src_filename.html")) {
          err("could not open output source html file '$src_filename.html', skipping");
          close($slsrc_infd);
          return "";
      }
      print $src_html_outfd <<__EOF__;
  <!DOCTYPE html><html><head><title>$plain_src_filename</title>
  <link rel="stylesheet" href="//code.jquery.com/ui/1.11.4/themes/smoothness/jquery-ui.css">
  <script src="//code.jquery.com/jquery-1.10.2.js"></script>
  <script src="//code.jquery.com/ui/1.11.4/jquery-ui.js"></script>
  <link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/8.5/styles/default.min.css">
  <script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/8.5/highlight.min.js"></script>
  <script>
    \$(function() {
      \$( document ).tooltip();
    });
  </script>
  <style>
  #code {background-color:#ffffff;};
  </style>
  </head>
  <body><h1>$plain_src_filename</h1>
  <script>hljs.initHighlightingOnLoad();</script>
  <small><pre><code id="code"><table border=0 cellpadding=0 cellspacing=0><tr>
  __EOF__
  
      # Create plaintext source file (without semlit commands)
      if (! open($src_outfd, ">", "$plain_src_filename")) {
          err("could not open output src '$plain_src_filename', skipping");
          close($slsrc_infd);
          close($src_html_outfd);
          return "";
      }
  
      my ($save_doc_filename, $save_doc_linenum) = ($cur_file_name, $cur_file_linenum);
      ($cur_file_name, $cur_file_linenum) = ($src_filename, 0);
      my $src_linenum = 0;  # separate variable to track source output file
      
      $src_lines_td = "<td>";
      $src_content_td = "<td>";
  
      my $iline;
      while (defined($iline = <$slsrc_infd>)) {
          chomp($iline);  # remove line delim
          $iline .= "\n";  # add newline
          $iline =~ s/\r//gs;  # remove carriage returns, if any
          $cur_file_linenum ++;
  
          # check for semlit commands
          if ($iline =~ /$o_delim\s*semlit\s*$o_fs\s*([^$o_delim]+)$o_delim/i) {
              semlit_cmd($1);
              # discard command line
          }
          else {
              $src_linenum ++;  # don't count semlit command lines
  
              print $src_outfd $iline;
  
              # fix up source for html rendering (tab expansion, special char encoding)
              $iline = expand($iline);  # expand tabs according to $tabstop.
              $iline =~ s/\&/\&amp;/g;  $iline =~ s/</\&lt;/g;  $iline =~ s/>/\&gt;/g;
  
              # if we are in at least one block, link the source to the earliest block's first doc reference
              if (scalar(keys(%active_srcblocks)) > 0) {
                  # descending sort so that elemet 0 is largest
                  my @active_blocks = sort { $active_srcblocks{$b} cmp $active_srcblocks{$a} } keys(%active_srcblocks);
                  my $targ = $active_blocks[0] . "_ref_1";
                  $src_lines_td .= sprintf("<a href=\"$doc_html_filename#$targ\" target=\"doc\">%05d<\/a>\n", $src_linenum);
                  if ($global_src_buffer) {
                      $src_content_td .= sprintf("%s  %s", $global_src_buffer, $iline);
                      $global_src_buffer = "";
                  }
                  else {
                      $src_content_td .= sprintf("  %s", $iline);
                  }
  
                  # for each open source block on this line of source, link the doc block to the that source block
                  foreach my $block_name (keys(%active_srcblocks)) {
                      my $a = sprintf("<a href=\"$cur_file_name.html#$block_name\" target=\"src\">%05d<\/a>  %s", $src_linenum, $iline);
                      $srcblocks{$block_name} .= $a;
                  }
              } else {
                  # no active blocks
                  my $a = sprintf("%05d\n", $src_linenum);
                  my $c = sprintf("  %s", $iline);
                  $src_lines_td .= $a;
                  $src_content_td .= $c;
              }
          }
  
      }  # while
  
      # if the global buffer is still full, dump it here
      if ($global_src_buffer) {
          $src_content_td .= sprintf("%s  %s", $global_src_buffer, $iline);
          $global_src_buffer = "";
      }
  
      $src_lines_td .= "<\/td>";
      $src_content_td .= "<\/td>";
  
      print $src_html_outfd $src_lines_td;
      print $src_html_outfd $src_content_td;
  
      close($slsrc_infd);
      close($src_outfd);
  
      print $src_html_outfd "</tr></table></code>\n";
      print $src_html_outfd "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n";
      print $src_html_outfd "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n";
      print $src_html_outfd "</pre></small></body></html>\n";
      close($src_html_outfd);
  
      # if the source file started a block but reached eof without ending it, end it here.
      foreach (keys(%active_srcblocks)) {
          err("block named '$_' started but not ended");
          semlit_cmd("endblock$o_fs$_");  # end it for the user
      }
  
      # the semlit.srcfile command writes a link to the plaintext source file
      ($cur_file_name, $cur_file_linenum) = ($save_doc_filename, $save_doc_linenum);
      return "<a href=\"$plain_src_filename\">$plain_src_filename</a>";
  }  # process_src_file
  
  
  sub err {
      my ($msg) = @_;
  
      print STDERR "Error [$cur_file_name:$cur_file_linenum], $msg\n";
      $exit_status ++;
  }  # err
  
  
  sub usage {
      my($err_str) = @_;
  
      if (defined $err_str) {
          print STDERR "$tool: $err_str\n\n";
      }
      print STDERR "Usage: $usage_str\n\n";
      $exit_status ++;
      exit($exit_status);
  }  # usage
  
  sub file_get_contents{
      my ($text_file) = @_;
      open FILE, $text_file or die $!;
      flock FILE, 1 or die $!;        # wait for lock
      seek(FILE, 0, 0);       # move pointer to beginning
      my $slurp = do{local $/; <FILE>};
      flock FILE, 8;          # release the lock
      close(FILE);
  
      return $slurp;
  } # file_get_contents
  
  sub help {
      my($err_str) = @_;
  
      if (defined $err_str) {
          print "$tool: $err_str\n\n";
      }
      print <<__EOF__;
  Usage: $usage_str
  Where:
      -h - print help screen
      -d delim - delimiter character at start and end of a semlit command.
              (default to '=')
      -f fs - field separator character within a semlit command.
              (default to ',')
      -i initialsource - file name for initial source frame.
              (default to "blank.htmo")  Also, initialsource semlit command.
      -I dir - directory to find files for 'srcfile' and 'include' commands.
              (default to ".")  The "-I dir" option can be repeated.
      -t tabstop - convert tabs to "tabstop" spaces.
              (default to '4')
      files - zero or more input files.  If omitted, inputs from stdin.
  
  __EOF__
  
      exit($exit_status);
  }  # help