livedoorフレンドパーク de RSS
MAX_PAGINGは、ページングしている何ページ目までをRSSに取り込むかを指定します。 あとは$usernameと$passwordを自分のに変えてcronで定期的に実行してファイルに吐き出せばよいかと思います。
#!/usr/local/bin/perl
use strict;
use WWW::Mechanize;
use Jcode;
use XML::RSS;
use Time::Piece;
use constant MAX_PAGING => 10;
my $username = 'your_userid';
my $password = 'your_passwd';
my $mech = WWW::Mechanize->new;
$mech->get('http://member.livedoor.com/login/?.next=http%3A%2F%2Ffp.livedoor.com&.sv=sns');
# do login
my $r = $mech->submit_form(
form_number => 1,
fields => {
'.next' => 'http://fp.livedoor.com',
'.sv' => 'sns',
livedoor_id => $username,
password => $password,
},
);
# prepare rss instance
my $rss = XML::RSS->new({ version => '1.0' });
my $t = localtime;
$rss->channel(
title => "fp.livedoor.com",
link => "http://fp.livedoor.com/",
description => "livedoor friend park",
dc => {
date => $t->datetime,
subject => "friend park dialy",
creator => "clouder",
language => "ja"
},
);
for my $i (1..MAX_PAGING) {
# load friend blog
$mech->get("http://fp.livedoor.com/home/friend_blog/?p=$i");
# parse dialy list
parse_dialy($rss, $mech->content);
}
print $rss->as_string;
sub friends_lists {
$mech->get('http://mixi.jp/list_friend.pl');
return $mech->find_all_links(url_regex => qr/show_friend\.pl/);
}
sub parse_dialy {
my($rss, $html) = @_;
my $re = _dialy_pattern();
while ($html =~ m#$re#g) {
# name date time title content
my $datetime = jcode($1)->h2z->euc;
my $title = jcode($3)->h2z->euc;
my $link = sprintf('http://fp.livedoor.com%s', $2);
my $name = jcode($5)->h2z->euc;
$datetime =~ s/^(\d+)?[^\d]+(\d+)?[^\d]+(\d+)?[^\s]+ (\d+):(\d+)$/$1-$2-$3 $4:$5:00/;
$rss->add_item(
title => sprintf('%s (%s)', encode_xml_valid_entities(jcode(remove_tag($title))->utf8), jcode($name)->utf8),
link => $link,
dc => {
date => $datetime,
subject => jcode($title)->utf8,
creator => jcode($name)->utf8,
},
);
}
}
sub _dialy_pattern {
return <<'RE';
<td bgcolor="#ffffff" style="border-bottom:1px solid #CCCCCC;background:#EFEFEF;" nowrap><small>([^<]+)</small></td>
<td bgcolor="#ffffff" style="border-bottom:1px solid #CCCCCC;background:#EFEFEF;"><small>[^<]+</small></td>
<td bgcolor="#ffffff" style="border-bottom:1px solid #CCCCCC;background:#EFEFEF;" width="100%"><small>
<a href="([^"]+)">([^<]+)</a> by
<a href="([^"]+)">([^<]+)</a>
RE
}
sub remove_tag {
my $str = shift;
$str =~ s/<.*?>//g;
return $str;
}
sub encode_xml_valid_entities {
my $input = shift;
return HTML::Entities::encode_entities($input, '<>&"');
}