Kroc 09 Fev 2005 : Pour faire suite à la demande d'Andres Kupries, voici un petit outil pour récupérer les scripts des pages de ce wiki. L'utilisation est très simple :
wikireaper.tcl page ?page ...?"
Voilà qui devrait éviter bien des copier/coller hasardeux.
#!/usr/bin/env tclsh
package require Tcl 8.3
if {[llength $argv] == 0} {
puts stderr "usage: wiki-reaper page ?page ...?"
exit 1
}
if {![catch { package require nstcl-html }] &&
![catch { package require nstcl-http }]} {
namespace import nstcl::*
} else {
package require http
proc ns_geturl {url} {
set conn [http::geturl $url]
set html [http::data $conn]
http::cleanup $conn
return $html
}
proc ns_striphtml {-tags_only html} {
regsub -all -- {<[^>]+>} $html "" html
return $html ;# corrected a typo here
}
proc ns_urlencode {string} {
set allowed_chars {[a-zA-Z0-9]}
set encoded_string ""
foreach char [split $string ""] {
if {[string match $allowed_chars $char]} {
append encoded_string $char
} else {
scan $char %c ascii
append encoded_string %[format %02x $ascii]
}
}
return $encoded_string
}
}
proc output {data} {
# we don't want to throw an error if stdout has been closed
catch { puts $data }
}
proc reap {page} {
set url http://wfr.tcl.tk/[ns_urlencode $page]
set now [clock format [clock seconds] -format "%e %b %Y, %H:%M" -gmt 1]
set html [ns_geturl $url]
# can't imagine why these characters would be in here, but just to be safe
set html [string map [list \x00 "" \x0d ""] $html]
set html [string map [list <pre> \x00 </pre> \x0d] $html]
if {![regexp -nocase {<title>([^<]*)</title>} $html => title]} {
set title "(no title!?)"
}
if {![regexp -nocase {<i>Updated on ([^G]+ GMT)} $html => updated]} {
set updated "???"
}
output "#####"
output "#"
output "# \"$title\""
output "# $url"
output "#"
output "# Tcl code harvested on: $now GMT"
output "# Wiki page last updated: $updated"
output "#"
output "#####"
output \n
set html [ns_striphtml -tags_only $html]
foreach chunk [regexp -inline -all {\x00[^\x0d]+\x0d} $html] {
set chunk [string range $chunk 1 end-1]
set chunk [string map [list """ \x22 \
"&" & \
"<" < \
">" >] $chunk]
foreach line [split $chunk \n] {
if {[string index $line 0] == " "} {
set line [string range $line 1 end]
}
output $line
}
}
output \n
output "# EOF"
output \n
}
foreach page $argv {
reap $page
}Catégorie wiki tcl francophone