Dual-license your content for inclusion in The Perl 5 Wiki using this HOWTO, or join us for a chat on irc.freenode.net#PerlNet.
User:CpanBot
From PerlNet
CpanBot is operated by Paul Fenwick. Its purpose is to adjust links to cpan. Presently it performs only a single task, finding links in the form:
and changing them to the more beautiful and correct:
The bot is presently started by hand.
[edit]
Source code
#!/usr/bin/perl -w
use strict;
# CpanBot. Automatically linkify and wikify CPAN links.
# Paul Fenwick <pjf@perl.net.au>, March 2006.
#
# WARNING: This code is extremely ugly.
use constant WIKI_BASE => "http://perl.net.au/wiki";
use constant LOGIN_URL => WIKI_BASE."/Special:Userlogin";
use constant LOGIN_NAME => 'CpanBot';
use constant LOGIN_PASSWORD => 'SECRET';
use constant DIRECTORY_URL => WIKI_BASE."/Special:Allpages";
use WWW::Mechanize;
# Make our user-agent and login.
my $mech = WWW::Mechanize->new( agent => "CpanBot" );
$mech->get(LOGIN_URL);
$mech->form_number(1);
$mech->field("wpName", LOGIN_NAME);
$mech->field("wpPassword",LOGIN_PASSWORD);
$mech->click("wpLoginattempt");
$mech->content =~ /Login successful/
or die "Login failed!";
$mech->get(DIRECTORY_URL);
# Now find all the main namespace links. We'll going to crawl them
# all and look for things to fix.
my @links = $mech->find_all_links(
url_regex => qr{^/wiki/(?!(?:Special|User|PerlNet|[^:]+_talk):)[^?]},
);
# Walk through each page in our wiki, and examine
# them for things the bot can edit.
foreach my $link (@links) {
print $link->url,"\n";
$mech->get($link);
$mech->follow_link(text => "Edit")
or die "Cannot find edit link";
$mech->form_name("editform");
# Page contents in wiki format.
my $content = $mech->value('wpTextbox1');
# Simple flag for recording if we've made
# changes.
my $edited = 0;
# Yes, it's the world's fifth most ugliest
# regular expression. But it matches the
# tags we need.
$content =~ s{
(?<=\[\[ # Look-behind. Find open link
cpan:) # and CPAN tag
( # $1 - Link target
(?:
[^:|\]]+ # Non :|] characters
:: # A double colon
)+
[^|\]]+ # Final part of module name.
)
( # $2 - Link description
\|
[^\]]+ # Possibly the descr.
)?
(?=\]\]) # Close tag
}{linkify($1,$2,\$edited)}exg;
if ($edited) {
print "$content\n";
$mech->field("wpTextbox1", $content);
$mech->field("wpSummary","Automatic cpan beautification. See [[User:CpanBot]] for details");
$mech->click("wpSave");
# Since the bot is still in testing, we only allow
# at most one edit at a time. This means it's
# easy to clean up if it goes crazy.
exit;
}
}
# Linkify simply produces a link in the form of:
#
# Foo-Bar|Foo::Bar
#
# suitable for including in a [[cpan:]] tag.
sub linkify {
my ($link, $desc, $edit_flag) = @_;
$$edit_flag = 1;
print "Handed $link : $desc\n";
$desc ||= "|$link";
$link =~ s/::/-/g;
print "Rewritten to $link$desc\n";
return "$link$desc";
}

