WikiReaper

 

Kroc 09 Fev 2005 : Pour faire suite à la demande d'Andres Kupries, voici un petit outil pour récupérer les scripts des pages de ce wiki. L'utilisation est très simple :

 wikireaper.tcl page ?page ...?"

Voilà qui devrait éviter bien des copier/coller hasardeux.

    #!/usr/bin/env tclsh

    package require Tcl 8.3

    if {[llength $argv] == 0} {
        puts stderr "usage: wiki-reaper page ?page ...?"
        exit 1
    }

    if {![catch { package require nstcl-html }] &&
        ![catch { package require nstcl-http }]} {
        namespace import nstcl::*
    } else {
        package require http

        proc ns_geturl {url} {
            set conn [http::geturl $url]
            set html [http::data $conn]
            http::cleanup $conn
            return $html
        }

        proc ns_striphtml {-tags_only html} {
            regsub -all -- {<[^>]+>} $html "" html
            return $html ;# corrected a typo here
        }

        proc ns_urlencode {string} {
            set allowed_chars  {[a-zA-Z0-9]}
            set encoded_string ""

            foreach char [split $string ""] {
                if {[string match $allowed_chars $char]} {
                    append encoded_string $char
                } else {
                    scan $char %c ascii
                    append encoded_string %[format %02x $ascii]
                }
            }

            return $encoded_string
        }
    }

    proc output {data} {
        # we don't want to throw an error if stdout has been closed
        catch { puts $data }
    }

    proc reap {page} {
        set url  http://wfr.tcl.tk/[ns_urlencode $page]
        set now  [clock format [clock seconds] -format "%e %b %Y, %H:%M" -gmt 1]
        set html [ns_geturl $url]

        # can't imagine why these characters would be in here, but just to be safe
        set html [string map [list \x00 "" \x0d ""] $html]
        set html [string map [list <pre> \x00 </pre> \x0d] $html]

        if {![regexp -nocase {<title>([^<]*)</title>} $html => title]} {
            set title "(no title!?)"
        }

        if {![regexp -nocase {<i>Updated on ([^G]+ GMT)} $html => updated]} {
            set updated "???"
        }

        output "#####"
        output "#"
        output "# \"$title\""
        output "# $url"
        output "#"
        output "# Tcl code harvested on:  $now GMT"
        output "# Wiki page last updated: $updated"
        output "#"
        output "#####"
        output \n

        set html [ns_striphtml -tags_only $html]

        foreach chunk [regexp -inline -all {\x00[^\x0d]+\x0d} $html] {
            set chunk [string range $chunk 1 end-1]
            set chunk [string map [list "&quot;" \x22 \
                    "&amp;"  &    \
                    "&lt;"   <    \
                    "&gt;"   >] $chunk]

            foreach line [split $chunk \n] {
                if {[string index $line 0] == " "} {
                    set line [string range $line 1 end]
                }

                output $line
            }
        }

        output \n
        output "# EOF"
        output \n
    }

    foreach page $argv {
        reap $page
    }

Catégorie wiki tcl francophone