#!/usr/local/bin/tclsh # # Copyright (c) Joseph A Knapka 2001 # # This software is provided free for any use, but with no warranty of # any kind. Use at your own risk. # # tclsh lxrreplace.tcl # # Requires Tcl 8.4 or later. # # lxrreplace.tcl takes any file "foo.xyz", scans it for anything that # looks like a function, macro, struct, or typedef reference, or a # filename or reference to a line number in a recently-mentioned file, # replaces those items with hyperlinks into the LXR kernel # crossreference website, and writes the results to "foo.html". # # It is intended for producing static pages before publication, not # for generating dynamic pages as a CGI script (although that would be # possible with some modifications). It requires working LXR "ident" # and "find" scripts and an indexed source tree to do its work; these # can be local or remote, but of course processing will be VERY MUCH # slower if an HTTP request must be made for every identifier. Get lxr # from http://lxr.linux.no, install it, run "genexref" to index your # kernel tree, and make sure all the things "ident" depends on # (mainly, perl5) are configured properly, and then you can just use # the local scripts and it will be fast. We do cache the results of # each LXR query, so things could be worse :) # # This script looks at the input file a line at a time, so it # won't do replacements on items that cross linebreaks, such as # "line\n100". # # This is a plain Tcl script; it doesn't depend on any # extensions. It requires Tcl 8.4 or later, since it # depends heavily on the "-indices" option to the # "regexp" command. # # 1-31-2001: Initial version. # # 2-6-2001: Works on NT with Apache and hacked-up LXR. This # is not trivial to set up; contact me for details. I only # use this when I'm travelling and have only my NT laptop # available. ############################################################# # Configuration parameters. You probably should edit these. ############################################################# # Ugh. Set this to 1 if you're running this on Windows. set WINDOWS 0 # Name and address of the dude(tte) to contact about these # pages. See the FOOTER proc below if you want to change # the standard links that get included on every page. set adminEmail jknapka@earthlink.net set adminName "Joe Knapka" # Version we are interested in. set LXR_VERSION "2.4.0" # Architecture we are interested in. This is a regexp which # is used to check the file references returned from LXR when # they contain "arch-" or "asm-". set currentArch "\[-\]i386" #set currentArch "\[-\]m68k" # This should point to a working copy of the LXR "ident" # script that can be called from the command line, or the # url of an accessible LXR repository's "ident" CGI script. #set lxrIdent "/home/www/lxr/http/ident" #set lxrIdent "http://lxr.linux.no/ident" #set lxrIdent "D:/home/www/lxr/http/ident.cmd" #set lxrIdent "http://localhost/lxr/http/ident.cmd" #set lxrIdent "http://orado/lxr/http/ident" set lxrIdent "http://kneuro.net/cgi-bin/lxr/http/ident" # This should point to a working copy of the LXR "find" # script that can be called from the command line, or the # url of an accessible LXR repository's "find" CGI script. #set lxrFind "/home/www/lxr/http/find" #set lxrFind "http://lxr.linux.no/find" #set lxrFind "D:/home/www/lxr/http/find.cmd" #set lxrFind "http://localhost/lxr/http/find.cmd" #set lxrFind "http://orado/lxr/http/find" set lxrFind "http://kneuro.net/cgi-bin/lxr/http/find" # This page will be linked at the bottom of every page # processed by lxrreplace.tcl, if you use the FOOTER # procedure defined below in your source. set topUrl "Linux MM Outline" # Stylesheet to use. set styleSheet "vm.css" ############################################################# # Replacer functions. ############################################################# # Symbols and procedures we'll replace when encountered in # the text. set syms {} set prcs {} # A line "PROCS 0" or "PROCS 1" disables or enables # procedure-call replacement. set procs_enabled 1 proc PROCS {enable} { global procs_enabled set procs_enabled $enable } # Define a procedure replacement. First argument is # the procedure name. When it's encountered in the # text, the next $nargs words are provided to # the named procedure as arguments, and the procedure # name and all argument words are replaced by the # procedure output. proc dproc {prc nargs} { global prcs lappend prcs [list $prc $nargs] } # Define a symbol replacement. The first argument is # simply replaced by the second. If "cuttoff" is "yes", # further replacement of any symbol that matches the # symbol's first argument is disabled; otherwise such # replacements will be performed (which can lead to # infinite loops if you're not careful). proc dsym {sym def {cutoff "yes"}} { global syms set syms [linsert $syms -1 [list $sym $def $cutoff]] } # Replace dproc-defined procedure calls in $line. proc processLine {line} { global prcs procs_enabled set lline [split $line " "] if {[lindex $lline 0] == "PROCS"} { eval $line return "" } if {$procs_enabled == 0} { return $line } set nwords [llength $lline] set wword 0 set pline {} while {$wword<$nwords} { set proc_fired 0 set word [lindex $lline $wword] foreach prc $prcs { set prcname [lindex $prc 0] if {$prcname != $word} { continue } set nargs [lindex $prc 1] if {$nargs == "all"} { return [eval $line] } for {set arg 0} {$arg<$nargs} {incr arg} { incr wword set the_arg [lindex $lline $wword] lappend prcname $the_arg } set result [eval $prcname] set proc_fired 1 lappend pline $result break; } incr wword if {!$proc_fired} {lappend pline $word} } set line [join $pline " "] } # Replace dsym-defined symbols in $line. proc replaceLine {line} { set flag 0 if {[regexp LXRVER $line]} { set flag 1 } global syms set cutoff_list {} foreach def $syms { set sym [lindex $def 0] if {[regexp $sym $cutoff_list]} { continue } set val [lindex $def 1] set cutoff [lindex $def 2] if {$cutoff == "yes"} { append cutoff_list " " append cutoff_list $sym } regsub -all ((\[^'\])|^)$sym $line \\1$val line regsub -all "'$sym" $line $sym line } set line } # Extract an anchor from an LXR result line. All the silly # regsubs are to deal with variations in the way LXR scripts # produce output. This is called both from lxrLookup to # process LXR "ident" output, and from lxrFileLookup to # process LXR "find" output. proc extractAnchor {line {re .*}} { if {![regexp $re $line]} { return "" } global currentArch set anchor "" regexp "<\[Aa\] \[^>\]*>" $line anchor if {$anchor == ""} { regexp "<\[Aa\] \[^>\]*$" $line anchor if {$anchor != ""} { set anchor "$anchor\">" } } if {[regexp "arch-" $anchor]} { if {![regexp $currentArch $anchor]} { return "" } } if {[regexp "asm-" $anchor]} { if {![regexp $currentArch $anchor]} { return "" } } regsub \"source// $anchor \"LXR/ anchor regsub \"source $anchor \"LXR anchor #regsub {(\?v=[0-9.]*)} $anchor "" anchor if {![regexp {\?v=} $anchor]} { regsub {#L([0-9]+)">} $anchor {LXRVER#L\1" target="codewindow">} anchor } set anchor } # Try to find a cached anchor string for a function name. array set lxrCache {} proc checkLxrCache {func} { global lxrCache if {[info exists lxrCache($func)]} { return $lxrCache($func) } return "" } # Get the LXR HTML results for the given identifier. If the # global $lxrIdent begins with "http://" then do this by calling # up the given server; otherwise invoke the local script. proc getIdent {func} { global lxrIdent LXR_VERSION if {![regexp {^http://} $lxrIdent]} { if {$::WINDOWS == 1} { # What a pain... set chan [open TMP.CMD w] puts $chan "set QUERY_STRING=\i=${func}" puts $chan $lxrIdent close $chan set cmd [list C:/winnt/system32/cmd.exe \/C TMP.CMD] return [eval exec $cmd] } else { return [exec sh -c "QUERY_STRING='i=${func}\&v=${LXR_VERSION}' $lxrIdent"] } } # Nope, it requires a network call. set port "" set hostname "" set url "" set match "" regexp {^http://([A-Za-z0-9.]+)(:([0-9]+))?/(.*)$} $lxrIdent match hostname dummy port url if {$port == ""} { set port 80 } set chan [socket $hostname $port] #puts "GET /${url}?i=${func}&v=${LXR_VERSION}\n\n" puts $chan "GET /${url}?i=${func}&v=${LXR_VERSION} HTTP/1.0\n\n" flush $chan set results [read $chan] close $chan return $results } # Try to find a function, struct, typedef, or macro def # in the LXR database. Return an appropriate anchor string. proc lxrLookup {func {type func}} { global lxrIdent lxrCache # Maybe we already looked it up. set anchor [checkLxrCache $func] if {$anchor == "NO_ANCHOR_FOUND"} { return "" } if {$anchor != ""} { return $anchor } # Search the identifier database. global LXR_VERSION set results [getIdent $func] set rlines [split $results "\n"] set found 0 foreach line $rlines { if {$found} { set anchor [extractAnchor $line] if {$anchor != ""} { #set anchor "${anchor}${func}${tail}" set lxrCache($func) $anchor return $anchor } } if {[regexp {Defined as a } $line]} { # Look at all lines hencefort until a legitimate anchor is seen. set found 1 } } set lxrCache($func) NO_ANCHOR_FOUND return "" } # Replace function, struct, typedef, and macro references # with LXR links. proc lxrReplaceLine {line} { set index 0 set start 0 set end 0 # Mysterious regexp alert: # Functions are anything ending in ( set lxr1 {([_0-9A-Za-z]+[(])} # Variables and structures are, for all intents and purposes, # anything containing an underscore. set lxr2 {([0-9A-Za-z]*_[_0-9A-Za-z]+)} # Structs are anything ending in _struct. #set lxr2 {([_0-9A-Za-z]+_struct)} # Typedefs are anything ending in _t #set lxr3 {([_0-9A-Za-z]+_t)} # TEST TEST TEST: Just try indexing *everything*. # set lxr3 {[:space:][A-Za-z_1-9]+[:space:]} # Ok, that was a terrible idea... # Macros are anything with caps and _ that's over 5 # characters long. We also suck up any possible HTML # metachars at the front and back so we can test for # them later. set lxr4 {([<"]?[_A-Z]{5,}[">]?)} set lxrRegexps "$lxr1|$lxr2|$lxr4" # Search the line sequentially using regexp. while {[regexp -indices -start $index $lxrRegexps $line match]} { set start [lindex $match 0] set end [lindex $match 1] set index $end set prevchar [expr $start-1] # Found something that looks like a function. Look it up # in the LXR DB. set mstring [string range $line $start $end] # (Don't do anything if there's already an anchor here.) if {[regexp {[<">]} $mstring]} { continue } if {[string range $line $prevchar $prevchar]==">"} { # puts "Not linking $mstring due to existing anchor" continue } # Peel last char (if lparen). set tail "" if {[string index $mstring end] == "("} { set mstring [string range $mstring 0 [expr [string length $mstring] - 2]] set tail "(" } set anchor [lxrLookup $mstring] if {$anchor != ""} { # Found a good anchor. Replace the function text with # the link. set left [string range $line 0 [expr $start-1]] set right [string range $line [expr $end+1] end] set line "${left}${anchor}${mstring}${tail}${right}" set index [expr $start + [string length $anchor]] } } set line } # Get the HTML results from LXR for a given filename, either # by invoking a local script or calling up a WWW server. proc getLxrFile {name} { global lxrFind LXR_VERSION if {![regexp {^http://} $lxrFind]} { if {$::WINDOWS == 1} { # What a pain... set chan [open TMP.CMD w] puts $chan "set QUERY_STRING=\string=${name}\&v=${LXR_VERSION}" puts $chan $lxrFind close $chan set cmd [list C:/winnt/system32/cmd.exe \/C TMP.CMD] return [eval exec $cmd] } else { return [exec sh -c "QUERY_STRING='string=${name}\&v=${LXR_VERSION}' $lxrFind"] } } # Nope, it requires a network call. set port "" set hostname "" set url "" set match "" regexp {^http://([A-Za-z0-9.]+)(:([0-9]+))?/(.*)$} $lxrFind match hostname dummy port url if {$port == ""} { set port 80 } set chan [socket $hostname $port] puts $chan "GET /${url}?string=${name}&v=${LXR_VERSION} HTTP/1.0\n\n" flush $chan set results [read $chan] close $chan return $results } # Look up the named file in the LXR database, and return the path # relative to "linux/" if found. proc lxrFileLookup {name} { global lxrFind regsub {\.} $name {\.} newname if {[string range $newname 0 0] != "/"} { set newname "/$newname" } set results [getLxrFile $newname] set rlines [split $results "\n"] set found 0 foreach line $rlines { if {$found} { set anchor [extractAnchor $line $newname] if {$anchor != ""} { regexp {"LXR/*(.*)"} $anchor match fullname return $fullname } } if {[regexp {(Defined as a )|(Search for files )} $line]} { # Look at all lines hencefort until a legitimate anchor is seen. set found 1 } } } # Try to find a cached anchor string for a file name. array set lxrFileCache {} proc checkLxrFileCache {fname} { global lxrFileCache if {[info exists lxrFileCache($fname)]} { return $lxrFileCache($fname) } return "" } # Make an LXR anchor for the file named by $mstring, and # set the "current file" for "line" replacements. proc makeLxrFileAnchor {mstring} { # Have we got a cached anchor for this file? set basename [lindex [file split $mstring] end] set anchorList [checkLxrFileCache $basename] if {$anchorList != {}} { set fullname [lindex $anchorList 0] # Only used the cached value if the filename is bare # or the path matches. if {[regexp / $mstring] && $fullname != $mstring} { # Don't use cached value; it's for the wrong file. } else { set anchor [lindex $anchorList 1] SETFILE $fullname set anchor "$anchor${mstring}" return $anchor } } # No. If there's not at least one /, try to find it in # the LXR database. We change the anchor text to the full # pathname so that the author can see what we guessed. if {-1 == [string first "/" $mstring]} { #return "" set mstring [lxrFileLookup $mstring] if {$mstring == ""} { return "" } } set anchor "" global lxrFileCache set lxrFileCache($basename) [list $mstring $anchor] # Set the filename for "line" references, while # we're at it. SETFILE $mstring set anchor "$anchor${mstring}" return $anchor } # Replace file references with LXR links in $line. proc lxrFileReplaceLine {line} { set index 0 set start 0 set end 0 # Search the line sequentially using regexp. while {[regexp -indices -start $index \ {[^_0-9A_Za-z/]*([_0-9A_Za-z/]+\.[hcS])[[:space:],;:.)]} $line match0 match]} { set start [lindex $match 0] set end [lindex $match 1] set index $end # Found something that looks like a file. Create a link # to the LXR DB, if we can find the file there, or if it # looks like a linux/ -relative pathname. set mstring [string range $line $start $end] set anchor [makeLxrFileAnchor $mstring] if {$anchor == ""} { continue } # Found it. set left [string range $line 0 [expr $start-1]] set right [string range $line [expr $end+1] end] set line "${left}${anchor}${right}" set index [expr $start + [string length $anchor]] } set line } ############################################################# # Versioning and standard linking symbols. ############################################################# # Page we link to at the bottom of each document. dsym TOPNODE "

$topUrl" # Base URL of the LXR engine we'll use in the finished # documents. NOTE: you must also have a working copy # of LXR installed at $lxrIdent, defined above. #dsym LXR http://lxr.linux.no/source #dsym LXR http://localhost/lxr/http/source dsym LXR http://kneuro.net/cgi-bin/lxr/http/source # Version string to replace into LXR anchor text. dsym LXRVER "\?v=$LXR_VERSION" no # Include string for LXR anchors. dsym LXRINC LXR/include/linux no ############################################################# # Definitions for procedure calls embedded in the source. ############################################################# # Create an anchor string. Used in some of the embedded # procs and symbols. proc an {link txt} { return "${txt}" } # Set a filename for "line nnn" phrases to refer to. # This can be called in the source as "SETFILE name"; # Also, any mention of a file in the text causes this # to be called. set filename "none" proc SETFILE {fname} { global filename set filename $fname return "" } dproc SETFILE 1 # When a phrase "line nnn" is encountered in the text, # this is called to replace it with a link into the LXR # pages for whatever the current file is. proc line {n {rstr line}} { global filename set tail "" set match "" regexp {(^[0-9]+)([^0-9]{0,1})$} $n match n tail if {$filename != "none"} { # Check that the argument is numeric. set rc [catch {expr $n + 1}] if {$rc} { return "$rstr $n" } else { } set link "LXR/${filename}LXRVER\#L${n}" set val [an $link "$rstr $n"] append val $tail set val } else { return "$rstr $n" } } dproc line 1 # Handle line references at the beginning of sentences. proc Line {n} { return [line $n "Line"] } dproc Line 1 # Make a standard HTML header. proc HEADER {txt} { global styleSheet set hdr "\n$txt\n" if {$styleSheet != ""} { #append hdr "\n" } append hdr "\n\n" append hdr "

$txt

" #append hdr "\n


$txt


" set hdr } dproc HEADER all # Set up standard navigation links (prev, up, next). If any # of these is "NONE" it will be omitted. proc NAVLINKS {prev prevtxt up uptxt next nextxt} { set navtxt "" #if {$prev != "NONE"} { # append navtxt "" #} #if {$up != "NONE"} { # append navtxt "" #} #if {$next != "NONE"} { # append navtxt "" #} set haveLinks 0 append navtxt "" if {$prev != "NONE"} { incr haveLinks append navtxt "" } if {$up != "NONE"} { incr haveLinks append navtxt "" } if {$next != "NONE"} { incr haveLinks append navtxt "" } append navtxt "
PreviousUpNext

${prevtxt}

${uptxt}

${nextxt}

" if {$haveLinks != 0} { append navtxt "

\n" } set navtxt } # Make a standard HTML footer. proc FOOTER {args} { global adminName adminEmail puts "FOOTER $args" set prev NONE set prevText "" set up NONE set upText "" set next NONE set nextText "" set pidx [lsearch $args "PR"] set uidx [lsearch $args "UP"] set nidx [lsearch $args "NX"] if {$pidx != -1} { set prev [lindex $args [expr $pidx+1]] set prevText [lindex $args [expr $pidx+2]] } if {$uidx != -1} { set up [lindex $args [expr $uidx+1]] set upText [lindex $args [expr $uidx+2]] } if {$nidx != -1} { set next [lindex $args [expr $nidx+1]] set nextText [lindex $args [expr $nidx+2]] } set ftr "

\n" puts "NAVLINKS $prev $prevText $up $upText $next $nextText" append ftr "[NAVLINKS $prev $prevText $up $upText $next $nextText]\n" append ftr "Questions and comments to " append ftr "${adminName}\n

\n" append ftr "The 'LXR links in this page were produced by " append ftr {lxrreplace.tcl} append ftr ", which is available for free." append ftr {

Credits} append ftr "" set ftr } dproc FOOTER all ############################################################# # Symbols the LXR replacer won't catch (and isn't worth the # trouble to make it do so). Instances of the first argument # to "dsym" will be replaced in the text with the second # argument. ############################################################# dsym {page struct} [an "LXRINC/mm.hLXRVER#L126" "page struct"] dsym {struct page} [an "LXRINC/mm.hLXRVER#L126" "page struct"] ############################################################# # Symbols to make code look nicer. ############################################################# dsym {

  • } {

  • } dsym {} {

    } dsym {} {

    } dsym {

    } {

    } dsym {

    } {

    } dsym {
    } {

    } ############################################################# # Main loop over source file lines. ############################################################# set file [lindex $argv 0] set outfile "[file rootname $file].html" set ichan [open $file r] set ochan [open $outfile w] #set nlines [exec wc -l $file] #set nlines [lindex $nlines 0] #set n 0 #set verbose 0 while {![eof $ichan]} { #incr n #if {$verbose && [expr $n % 10] == 0} { # puts -nonewline "[expr $n*100/$nlines]% \r" ; flush stdout #} set line [gets $ichan] set line [processLine $line] set line [lxrReplaceLine $line] set line [lxrFileReplaceLine $line] set line [replaceLine $line] puts $ochan $line } close $ichan close $ochan