获取页面上所有图片地址的perl脚本

时间：2008-11-20 来源：xiaowei

#cat get_img_url.pl #!/usr/bin/perl use HTML::LinkExtor;
use URI::URL;
use Getopt::Std; Getopt::Std::getopts('u:', \%options);
$url = $options{u};
$ua = LWP::UserAgent->new; # Set up a callback that collect image links
my @imgs = ();
sub callback {
   my($tag, %attr) = @_;
   return if $tag ne 'img'; # we only look closer at <img ...>
   push(@imgs, values %attr);
} # Make the parser. Unfortunately, we don't know the base yet
# (it might be diffent from $url)
$p = HTML::LinkExtor->new(\&callback); # Request document and parse it as it arrives
$res = $ua->request(HTTP::Request->new(GET => $url),
                    sub {$p->parse($_[0])}); # Expand all image URLs to absolute ones
my $base = $res->base;
@imgs = map { $_ = url($_, $base)->abs; } @imgs; # Print them out
my $prefix = '&dirs=&Submit=%CC%E1%BD%BB';
foreach my $links (@imgs)
{
# if ($links =~ m/yourdomain.cn/i) {
    print "$links\n";
# } } 去掉红色部分注释，并修改yourdomain.cn为你想要匹配的域名，可显示该域名所有图片的链接。用法： #chmod 755 get_img_url.pl #./get_img_url.pl -u URL地址

相关阅读更多 +