#!/usr/local/bin/tcl # # Commence Version nunmbering now 1.0 # # # This script takes a database file, an html file, and a description # and produces as output html which includes references to the # Images described in the original text, but as HTML elements. # # # This first version of the Fields of interest is as per Mike Greenhalgh posting #set Fields {O I II III IV BIa BIb BII BIII BIV DO DB DBIb DBII DBIII DBIV} # This version is enhanced with what I see in the file #set Fields {O I II III IV Ia Ib} #lappend Fields BIa BIb BII BIII BIV #lappend Fields IBa IBb IIB IIIB IVB #lappend Fields "B I" "B II" "B III" "B IV" #lappend Fields "I B" "II B" "III B" "IV B" proc DS {args} { puts [clock format [clock seconds] -format "%r [join $args]" ] } # Iff necessary, declare the lassign command, from tclx if {[info command lassign] != "lassign"} { proc lassign {l1 args} { uplevel foreach \"[join $args]\" \"$l1\" break } } # # This procedure loads key value pair which define the relationship # between an entry in the text, and what will be returned from the DB # proc Add_Text_Key {Key Value} { global Text_Key upvar $Text_Key Local lappend Local($Key) $Value } # Because we are expecting the resultant file to be VERY BIG, we must # split up the output as we go, we will do this by taking X chunks # from a file and splitting it at the next paragrpah marker after that # X & Chunks are yet to be defined, we will try 250 & lines # We don't want to break up any tables or lists when we do this, and we will # want links forward and backward in the Chain, plus an index proc Generate_Output {Base_File Text} { set Line_Count 0 set File_Count 1 set M_Count 0 set File ${Base_File}.${File_Count}.html set output [open $File w] lappend File_List "$File" # Strip out punctuation following our TABLE inserts regsub -all -- "\[ \n\t\]*\[-,.!@#$%&*()^\]*" $Text {} Text set Protected_Level 0 foreach line_orig [split $Text "\n"] { set line $line_orig incr Line_Count incr M_Count regsub -all -- {\[} $line {%5B} line regsub -all -- {\]} $line {%5D} line regsub -all -nocase -- {} $line {[incr Protected_Level -1]} line regsub -all -nocase -- {} $line {[incr Protected_Level -1]]} line regsub -all -nocase -- {<[uo]l>} $line {[incr Protected_Level]} line set Paragraph_Found [regexp -nocase -- {(.*)(

.*)} $line null Before After] subst -novariable -nobackslashes $line # puts "$M_Count, Level = $Protected_Level" if {($Line_Count >= 250) && ($Protected_Level == 0) && ($Paragraph_Found)} { set Last_File $File incr File_Count set File ${Base_File}.${File_Count}.html puts $output $Before puts $output "Next Part - $File" puts $output "" close $output set Line_Count 0 set output [open $File w] lappend File_List "$File" puts $output "\nPrevious Part - $Last_File\n" puts $output $After } else { puts $output $line_orig } } # The last lines have their own tags close $output set output [open $Base_File.index.html w] puts $output "" puts $output "

Index for hii processed file $Base_File

" puts $output "

List of Parts :" puts $output "

[join $File_List "\n
"]\n

" puts $output "" close $output } # Declare read_file & write_file from TclX, because they are handy proc read_file {fileName} { set fp [open $fileName r] set result [read $fp] close $fp return $result } proc write_file {fileName string} { set fp [open $fileName w] set result [puts $fp $string] close $fp return $result } # Process the arguments if {$argc == 1} { set I_File [lindex $argv 0] set Base_File [file rootname $I_File] puts "bf = $Base_File" set O_File "${Base_File}.new.html" } else { puts "Usage: $argv0 Filename" exit } set vipicrc .vipicrc # First we read the .vipicrc file if {[file readable $vipicrc]} { set vipicrc [read_file $vipicrc] } else { puts stderr "Couldn't read the .vipicrc file" exit } set Fulltext $vipicrc # Pull out the Database, Field and Format descriptions regexp "DATABASE=(\[^\n]*)\n" $vipicrc null DB regexp "FIELD=(\[0-9]+)\n" $vipicrc null Field regexp "FORMAT=(.*)END" $vipicrc null Format set Format [string trim $Format] # Load in the data from the Database, into arrays indexed of the words in the Field set f [open $DB] # Dump the first 7 lines of the file for {set i 0} {$i < 7} {incr i} { gets $f } while {![eof $f]} { set line [gets $f] # Substitute out any '[' that may occur in the DB regsub -all {\[} $line {%5B} line # Replace any + character with a space regsub -all {\+} $line { } line # Substitute out the %[0-9a-fA-F][0-9a-fA-F] portions of the doc regsub -all {%([0-9a-fA-F][0-9a-fA-F])} $line {[format %c 0x\1]} line set line [subst -novariables -nobackslashes $line] # puts "line = $line" lassign [split $line ,] Image Type Subject Title Location Orientation Imagesource Section Photonum # Foreach of the words in the Section field # puts "Section = $Section" foreach Word $Section { # puts "Word = $Word" set Photonum_Copy $Photonum # Pull out all the numbers from the Photnum_Copy while {[regexp {[0-9]+} $Photonum_Copy number]} { regsub {[0-9]+} $Photonum_Copy {} Photonum_Copy lappend data($Word,$number) $Image set dataline($Image) $line } } append Fulltext "$line\n" } close $f #parray data #parray dataline # We need to be able to insert text into the document without it being touched # by later regsubs as we work through, one way of doing this is to protect the # text we insert by converting it in some way, to make it immune to our regexp/regsubs # So we must generate a string that doesn't appear anywhere in the text and insert # it between each of the characters of the inserted text # Read the vipicrc, DB, and file into a single string append Fulltext [read_file $I_File] # The string we use is a sequence of X's longer than any that occurs in the Fuilltext # Get rid of Tcl special chars regsub -all {[[$]} $Fulltext {} Fulltext # Embed commands that will figure out the longest X string regsub -all {(X+)} $Fulltext {[set longest [expr max($longest,[string length \1])]]} Fulltext set longest 0 subst $Fulltext set Xstring {} for {set i 0} {$i <= $longest} {incr i} { append Xstring X } puts "longest run of X's $longest, $Xstring" # Load the entire file into contents set f [open $I_File] set contents [read $f] close $f # Load the Text to DB relationship set Text_Key Text_to_DB source Text_Keys parray Text_to_DB set Cycle [list ] # Work throught the fields, constructing regexps as we go foreach Text_Section [array names Text_to_DB] { # set re [subst -nocommands {[^${section}]${section}[ \t\n]*[0-9]+([ \t\n,]*[0-9]+)*} ] # set re [subst -nocommands {[^A-Za-z0-9]${section}[ \t\n]*[0-9]+([ \t\n,]*[0-9]+)*} ] set re [subst -nocommands {[^A-Za-z0-9]${Text_Section}[ \t\n]*[0-9]+(( |\t|\n|,|and|or)+[0-9]+)*} ] puts "re = $re" while {[regexp -- $re $contents match]} { # Delete the first character puts "match = [set match [string range $match 1 end]], section = $Text_Section/DB_Section" # Given the string to sub, break the string into separate image refs set Cycle_Count 0 set Replacement "\n$match\n\n[lindex $Cycle $Cycle_Count]\n" # Search out every number within the matched string while {[regexp {[0-9]+} $match number]} { regsub {[0-9]+} $match {} match # We now have a Section and number look them up in data(,), with all the possible DB_Sections # set list [array names data "${Text_Section},$number"] # puts "list = $list" set list {} foreach i1 $Text_to_DB($Text_Section) { foreach i2 [array names data "${i1},$number" ] { lappend list $i2 } } # puts "list = $list" if {[llength $list] > 0} { puts "Found ([llength $list]) reference(s), $Text_Section, $number" # append Replacement "\n\n\nNew Rep vs $Text_Section, $number" foreach item1 $list { puts "Matching vs $Text_Section,$number : '$item1'" foreach item2 $data($item1) { puts " submatch $item2" lassign [split $dataline($item2) ,] 0 1 2 3 4 5 6 7 8 9 regexp -- {[0-9]+} $0 integer_part set img_path [file join [format %04d [expr $integer_part / 100]] $0] set J $img_path.JPG ; set G $img_path.GIF set Cycle_Count [expr ($Cycle_Count + 1) % [llength $Cycle]] append Replacement "\n[subst -nocommands -nobackslashes $Format]\n" append Replacement "\n[lindex $Cycle $Cycle_Count]\n" } puts "-" } } else { puts "Couldn't find reference, $Text_Section, $number" append Replacement "\n\n

####No Reference found $Text_Section, $number

\n" } } # The End Insert is used when Generate_Output is punctuation stripping append Replacement "\n

\n" puts "New Rep = $Replacement" puts "Subbing out the $re, $Text_Section, $number portion" # Protect the Replacement from a later subbing regsub -all {.} $Replacement "\&$Xstring" Replacement regsub -all {&} $Replacement "\\\\&" Replacement regsub -- $re $contents $Replacement contents # Can be hidden as needed # regsub -all $Xstring $contents {} contents2 # write_file $O_File $contents2 } } # Remove those Xstrings regsub -all $Xstring $contents {} contents # Write out the file, the unsplit dump write_file $O_File $contents Generate_Output $Base_File $contents exit