Dual-license your content for inclusion in The Perl 5 Wiki using this HOWTO, or join us for a chat on irc.freenode.net#PerlNet.

User:CpanBot

From PerlNet

Jump to: navigation, search

CpanBot is operated by Paul Fenwick. Its purpose is to adjust links to cpan. Presently it performs only a single task, finding links in the form:

cpan:Foo::Bar

and changing them to the more beautiful and correct:

Foo::Bar

The bot is presently started by hand.

Source code

#!/usr/bin/perl -w
use strict;

# CpanBot.  Automatically linkify and wikify CPAN links.
# Paul Fenwick <pjf@perl.net.au>, March 2006.
#
# WARNING: This code is extremely ugly.

use constant WIKI_BASE      => "http://perl.net.au/wiki";
use constant LOGIN_URL      => WIKI_BASE."/Special:Userlogin";
use constant LOGIN_NAME     => 'CpanBot';
use constant LOGIN_PASSWORD => 'SECRET';
use constant DIRECTORY_URL  => WIKI_BASE."/Special:Allpages";

use WWW::Mechanize;

# Make our user-agent and login.

my $mech = WWW::Mechanize->new( agent => "CpanBot" );
$mech->get(LOGIN_URL);
$mech->form_number(1);
$mech->field("wpName",    LOGIN_NAME);
$mech->field("wpPassword",LOGIN_PASSWORD);
$mech->click("wpLoginattempt");

$mech->content =~ /Login successful/
        or die "Login failed!";

$mech->get(DIRECTORY_URL);

# Now find all the main namespace links.  We'll going to crawl them
# all and look for things to fix.

my @links = $mech->find_all_links(
        url_regex => qr{^/wiki/(?!(?:Special|User|PerlNet|[^:]+_talk):)[^?]},
);

# Walk through each page in our wiki, and examine
# them for things the bot can edit.

foreach my $link (@links) {
        print $link->url,"\n";
        $mech->get($link);
        $mech->follow_link(text => "Edit")
                or die "Cannot find edit link";
        $mech->form_name("editform");

        # Page contents in wiki format.
        my $content = $mech->value('wpTextbox1');

        # Simple flag for recording if we've made
        # changes.
        my $edited = 0;

        # Yes, it's the world's fifth most ugliest
        # regular expression.  But it matches the
        # tags we need.

        $content =~ s{
                (?<=\[\[                        # Look-behind.  Find open link
                cpan:)                          # and CPAN tag
                (                               # $1 - Link target
                        (?:
                                [^:|\]]+        # Non :|] characters
                        ::                      # A double colon
                        )+
                        [^|\]]+                 # Final part of module name.
                )
                (                               # $2 - Link description
                        \|
                        [^\]]+                  # Possibly the descr.
                )?
                (?=\]\])                        # Close tag
        }{linkify($1,$2,\$edited)}exg;

        if ($edited) {
                print "$content\n";

                $mech->field("wpTextbox1", $content);
                $mech->field("wpSummary","Automatic cpan beautification.  See [[User:CpanBot]] for details");
                $mech->click("wpSave");

                # Since the bot is still in testing, we only allow
                # at most one edit at a time.  This means it's
                # easy to clean up if it goes crazy.
                exit;
        }

}

# Linkify simply produces a link in the form of:
#
#       Foo-Bar|Foo::Bar
#
# suitable for including in a [[cpan:]] tag.

sub linkify {
        my ($link, $desc, $edit_flag) = @_;

        $$edit_flag = 1;

        print "Handed $link : $desc\n";

        $desc ||= "|$link";
        $link =~ s/::/-/g;

        print "Rewritten to $link$desc\n";

        return "$link$desc";
}

Personal tools