$plain_src_filename

#!/usr/local/bin/perl -w # repeat the shebang for doc purposes (the real shebang needs to be line 1) #!/usr/local/bin/perl -w # semlit.pl - program to implement Steve Ford's "Semi-Literate Documentation". # See http://wiki.geeky-boy.com/w/index.php?title=Sford_Semi-literate_documentation # # This code and its documentation is Copyright 2012, 2015 Steven Ford, http://geeky-boy.com # and licensed "public domain" style under Creative Commons "CC0": http://creativecommons.org/publicdomain/zero/1.0/ # To the extent possible under law, the contributors to this project have # waived all copyright and related or neighboring rights to this work. # In other words, you can use this code for any purpose without any # restrictions. This work is published from: United States. The project home # is https://github.com/fordsfords/semlit/tree/gh-pages use strict; use English; # allow long names for special variables use Getopt::Long qw(:config no_ignore_case bundling); use Text::Tabs; use File::Basename; # globals my $tool = "semlit.pl"; my $usage_str = "$tool [-h] [-d delim] [-f fs] [-I dir] [-t tabstop] [files]"; my $main_doc_filename; my $cur_file_name = ""; my $cur_file_linenum = 0; my $global_src_buffer = ""; my $doc_html_filename; my $doc_html_outfd; my $src_html_outfd; my %srcblocks; # lines of source named blocks my %active_srcblocks; # source blocks being added to at this moment my %block_numrefs; # number of doc references to each source block my $exit_status = 0; # assume success # process command options and parameters. See help(). my $o_help; # -h my $o_fs = ","; # -f my $o_delim = "="; # -d my $o_initialsource = "blank.html"; # -i my @o_incdirs = ("."); # GetOptions will append additional dirs for each "-I". $tabstop = 4; # defined and used by Text::Tabs - see "expand()" function GetOptions("h"=> \$o_help, "d=s" => \$o_delim, "f=s" => \$o_fs, "i=s" => \$o_initialsource, "I=s" => \@o_incdirs, "t=i" => \$tabstop) || usage("Error in GetOptions"); if (defined($o_help)) { help(); # if -h had a value, it would be in $opt_h } if (scalar(@ARGV) != 1) { usage("Error, .sldoc file missing"); } $main_doc_filename = $ARGV[0]; if ( ! -r "$main_doc_filename" ) { usage("Error, could not read '$main_doc_filename'"); } # open main doc file $doc_html_filename = basename($main_doc_filename) . ".html"; # strip directory open($doc_html_outfd, ">", $doc_html_filename) || die "Error, could not open htmlfile '$doc_html_filename'"; # Main loop; read each line in doc file my $doc_html_str = process_doc_file($main_doc_filename); # fix up multiple source references foreach my $blockname (keys(%block_numrefs)) { if ($block_numrefs{$blockname} > 1) { # First ref points to next and last my $refnum = 1; my $this_block = $blockname . "_ref_" . ($refnum); my $first_block = $this_block; my $last_block = $blockname . "_ref_" . $block_numrefs{$blockname}; my $next_block = $blockname . "_ref_" . ($refnum + 1); $doc_html_str =~ s/<\/pre>/next ref<\/a> last ref<\/a><\/pre>/s; # Middle refs point to previous and next my $prev_block = $this_block; for ($refnum = 2; $refnum <= $block_numrefs{$blockname} - 1; $refnum ++) { # middle refs point to prev and next $this_block = $blockname . "_ref_" . ($refnum); $next_block = $blockname . "_ref_" . ($refnum + 1); $doc_html_str =~ s/<\/pre>/next ref<\/a> prev ref<\/a><\/pre>/s; $prev_block = $this_block; } # last ref points to first and previous $this_block = $blockname . "_ref_" . ($refnum); $doc_html_str =~ s/<\/pre>/first ref<\/a> prev ref<\/a><\/pre>/s; } } # write doc html file print $doc_html_outfd "$doc_html_str\n"; close($doc_html_outfd); # Create frameset page my $index_o_file; open($index_o_file, ">", "index.html") || die "Error, could not open htmlfile 'index.html'"; print $index_o_file <<__EOF__; __EOF__ close($index_o_file); # Create blank page for initial source frame my $blank_o_file; open($blank_o_file, ">", "blank.html") || die "Error, could not open htmlfile 'blank.html'"; print $blank_o_file "Click a source line number to see the line in context.\n"; close($blank_o_file); # All done. exit($exit_status); # End of main program, start subroutines. sub process_doc_file { my ($doc_filename) = @_; my $doc_infd; # open source file, using one or more search directories my $incdir; my $open_success = 0; foreach $incdir (@o_incdirs) { if (open($doc_infd, "<", "$incdir/$doc_filename")) { $open_success = 1; last; # break out of foreach } } if (! $open_success) { err("could not open doc file '$doc_filename', skipping"); return; } # Read entire file into memory my @doctexts = <$doc_infd>; close($doc_infd); chomp(@doctexts); # remove line delims from every line my $num_lines = scalar(@doctexts); # count lines in file my $doctext = join("\n", @doctexts) . "\n"; # combine as a single string $doctext =~ s/\r//gs; # remove carriage returns, if any my ($save_doc_filename, $save_doc_linenum) = ($cur_file_name, $cur_file_linenum); ($cur_file_name, $cur_file_linenum) = ($doc_filename, 0); # process semlit commands while ($doctext =~ /$o_delim\s*semlit\s*$o_fs\s*([^$o_delim]+)$o_delim/is) { my $cmd = $1; # text of command (minus standard stuff) my $prefix = $PREMATCH; # text preceiding the command my $suffix = $POSTMATCH; # text after the command # calculate line number containing the start of this semlit command $cur_file_linenum = $num_lines - scalar(my @t = split("\n", $suffix)) + 1; my $repl = semlit_cmd($cmd); # Commands are removed, and often replaced with some result $doctext = $prefix . $repl . $suffix; } # while ($cur_file_name, $cur_file_linenum) = ($save_doc_filename, $save_doc_linenum); return $doctext; } # process_doc_file # Parse and execute semlit command sub semlit_cmd { my ($cmd) = @_; # semlit tabstop - doc: source tab expansion if ($cmd =~ /^tabstop\s*$o_fs\s*([^\s$o_fs]+)\s*$/i) { if ($1 =~ /^\d+$/) { $tabstop = $1; # used by Text::Tabs return ""; } else { err("Tabstop value '$1' must be numeric"); return ""; } } # semlit srcfile - doc: read and process source file elsif ($cmd =~ /^srcfile\s*$o_fs\s*([^\s$o_fs]+)\s*$o_fs\s*([^\s$o_fs]+)\s*$/i) { return process_src_file($1, $2); } # semlit initialsource - doc: set initial source frame elsif ($cmd =~ /^initialsource\s*$o_fs\s*([^\s$o_fs]+)\s*/i) { $o_initialsource = $1; return ""; } # semlit include - doc: read and process doc file elsif ($cmd =~ /^include\s*$o_fs\s*([^\s$o_fs]+)\s*$/i) { return process_doc_file($1); } # semlit insert - doc: insert a source block elsif ($cmd =~ /^insert\s*$o_fs\s*([^\s$o_fs]+)\s*$/i) { my $block_name = $1; if (exists($srcblocks{$block_name})) { my $num_refs = 1; my $block_ref_name = $block_name; if (defined($block_numrefs{$block_name})) { $num_refs = $block_numrefs{$block_name} + 1; $block_ref_name = $block_name . "_ref_$num_refs"; } $block_numrefs{$block_name} = $num_refs; my $block_str = $srcblocks{$block_name}; return <<__EOF__; <\/a>

$block_str
<\/pre>\n
__EOF__
		} else {
			err("attempt to insert block named '$block_name' but block not defined");
			return "";
		}
	}

	# semlit block - src: start a named block of source
	elsif ($cmd =~ /^block\s*$o_fs\s*([^\s$o_fs]+)\s*$/i) {
		my $block_name = $1;
		if (defined($srcblocks{$block_name})) {
			err("block '$block_name' already defined");
			return "";
		}
		$srcblocks{$block_name} = "";
		$block_numrefs{$block_name} = 0;
		$active_srcblocks{$block_name} = $cur_file_linenum;
		
		$global_src_buffer = "<\/span>";
		return "";
	}

	# semlit endblock - src: end a named block of source
	elsif ($cmd =~ /^endblock\s*$o_fs\s*([^\s$o_fs]+)\s*$/i) {
		my $block_name = $1;
		if (exists($active_srcblocks{$block_name})) {
			delete($active_srcblocks{$block_name});
			$srcblocks{$block_name} =~ s/\n$//s;
			return "";
		} else {
			err("found endblock for '$block_name', which is not active");
			return "";
		}
	}

	# semlit tooltip - create hover over text for a phrase
	elsif ($cmd =~ /^tooltip\s*$o_fs\s*([^\s$o_fs]+)\s*$o_fs\s*([^\s$o_fs]+)\s*$/i) {
		my $text_source = $1;
		my $text_link = $2;
		my $contents = file_get_contents($text_source);
		return <<__EOF__;
$text_link
__EOF__
	}


	# unrecognized semlit
	else {
		err("semlit command '$cmd' invalid or malformed");
		return "";
	}
}  # semlit_cmd


# process semlit srcfile command
sub process_src_file {
	my ($src_filename, $plain_src_filename) = @_;
	my $slsrc_infd;
	my $src_outfd;
	my $src_lines_td;
	my $src_content_td;

	# open source file, using one or more search directories
	my $incdir;
	my $open_success = 0;
	foreach $incdir (@o_incdirs) {
		if (open($slsrc_infd, "<", "$incdir/$src_filename")) {
			$open_success = 1;
			last;  # break out of foreach
		}
	}
	if (! $open_success) {
		err("could not open src file '$src_filename', skipping");
		return "";
	}

	# create and write initial content to html-ified source file
	if (! open($src_html_outfd, ">", "$src_filename.html")) {
		err("could not open output source html file '$src_filename.html', skipping");
		close($slsrc_infd);
		return "";
	}
	print $src_html_outfd <<__EOF__;
$plain_src_filename








$plain_src_filename


__EOF__

	# Create plaintext source file (without semlit commands)
	if (! open($src_outfd, ">", "$plain_src_filename")) {
		err("could not open output src '$plain_src_filename', skipping");
		close($slsrc_infd);
		close($src_html_outfd);
		return "";
	}

	my ($save_doc_filename, $save_doc_linenum) = ($cur_file_name, $cur_file_linenum);
	($cur_file_name, $cur_file_linenum) = ($src_filename, 0);
	my $src_linenum = 0;  # separate variable to track source output file
	
	$src_lines_td = "";
	$src_content_td = " ";

	my $iline;
	while (defined($iline = <$slsrc_infd>)) {
		chomp($iline);  # remove line delim
		$iline .= "\n";  # add newline
		$iline =~ s/\r//gs;  # remove carriage returns, if any
		$cur_file_linenum ++;

		# check for semlit commands
		if ($iline =~ /$o_delim\s*semlit\s*$o_fs\s*([^$o_delim]+)$o_delim/i) {
			semlit_cmd($1);
			# discard command line
		}
		else {
			$src_linenum ++;  # don't count semlit command lines

			print $src_outfd $iline;

			# fix up source for html rendering (tab expansion, special char encoding)
			$iline = expand($iline);  # expand tabs according to $tabstop.
			$iline =~ s/\&/\&/g;  $iline =~ s//\>/g;

			# if we are in at least one block, link the source to the earliest block's first doc reference
			if (scalar(keys(%active_srcblocks)) > 0) {
				# descending sort so that elemet 0 is largest
				my @active_blocks = sort { $active_srcblocks{$b} cmp $active_srcblocks{$a} } keys(%active_srcblocks);
				my $targ = $active_blocks[0] . "_ref_1";
				$src_lines_td .= sprintf("%05d<\/a>\n", $src_linenum);
				if ($global_src_buffer) {
					$src_content_td .= sprintf("%s  %s", $global_src_buffer, $iline);
					$global_src_buffer = "";
				}
				else {
					$src_content_td .= sprintf("  %s", $iline);
				}

				# for each open source block on this line of source, link the doc block to the that source block
				foreach my $block_name (keys(%active_srcblocks)) {
					my $a = sprintf("%05d<\/a>  %s", $src_linenum, $iline);
					$srcblocks{$block_name} .= $a;
				}
			} else {
				# no active blocks
				my $a = sprintf("%05d\n", $src_linenum);
				my $c = sprintf("  %s", $iline);
				$src_lines_td .= $a;
				$src_content_td .= $c;
			}
		}

	}  # while

	# if the global buffer is still full, dump it here
	if ($global_src_buffer) {
		$src_content_td .= sprintf("%s  %s", $global_src_buffer, $iline);
		$global_src_buffer = "";
	}

	$src_lines_td .= "<\/td>";
	$src_content_td .= "<\/td>";

	print $src_html_outfd $src_lines_td;
	print $src_html_outfd $src_content_td;

	close($slsrc_infd);
	close($src_outfd);

	print $src_html_outfd "\n";
	print $src_html_outfd "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n";
	print $src_html_outfd "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n";
	print $src_html_outfd "\n";
	close($src_html_outfd);

	# if the source file started a block but reached eof without ending it, end it here.
	foreach (keys(%active_srcblocks)) {
		err("block named '$_' started but not ended");
		semlit_cmd("endblock$o_fs$_");  # end it for the user
	}

	# the semlit.srcfile command writes a link to the plaintext source file
	($cur_file_name, $cur_file_linenum) = ($save_doc_filename, $save_doc_linenum);
	return "$plain_src_filename";
}  # process_src_file


sub err {
	my ($msg) = @_;

	print STDERR "Error [$cur_file_name:$cur_file_linenum], $msg\n";
	$exit_status ++;
}  # err


sub usage {
	my($err_str) = @_;

	if (defined $err_str) {
		print STDERR "$tool: $err_str\n\n";
	}
	print STDERR "Usage: $usage_str\n\n";
	$exit_status ++;
	exit($exit_status);
}  # usage

sub file_get_contents{
	my ($text_file) = @_;
	open FILE, $text_file or die $!;
	flock FILE, 1 or die $!; 		# wait for lock
	seek(FILE, 0, 0); 		# move pointer to beginning
	my $slurp = do{local $/; };
	flock FILE, 8; 			# release the lock
	close(FILE);

	return $slurp;
} # file_get_contents

sub help {
	my($err_str) = @_;

	if (defined $err_str) {
		print "$tool: $err_str\n\n";
	}
	print <<__EOF__;
Usage: $usage_str
Where:
    -h - print help screen
    -d delim - delimiter character at start and end of a semlit command.
            (default to '=')
    -f fs - field separator character within a semlit command.
            (default to ',')
    -i initialsource - file name for initial source frame.
            (default to "blank.htmo")  Also, initialsource semlit command.
    -I dir - directory to find files for 'srcfile' and 'include' commands.
            (default to ".")  The "-I dir" option can be repeated.
    -t tabstop - convert tabs to "tabstop" spaces.
            (default to '4')
    files - zero or more input files.  If omitted, inputs from stdin.

__EOF__

	exit($exit_status);
}  # help