获取页面上所有图片地址的perl脚本
时间:2008-11-20 来源:xiaowei
#cat get_img_url.pl
#!/usr/bin/perl
use HTML::LinkExtor;
use URI::URL;
use Getopt::Std; Getopt::Std::getopts('u:', \%options);
$url = $options{u};
$ua = LWP::UserAgent->new; # Set up a callback that collect image links
my @imgs = ();
sub callback {
my($tag, %attr) = @_;
return if $tag ne 'img'; # we only look closer at <img ...>
push(@imgs, values %attr);
} # Make the parser. Unfortunately, we don't know the base yet
# (it might be diffent from $url)
$p = HTML::LinkExtor->new(\&callback); # Request document and parse it as it arrives
$res = $ua->request(HTTP::Request->new(GET => $url),
sub {$p->parse($_[0])}); # Expand all image URLs to absolute ones
my $base = $res->base;
@imgs = map { $_ = url($_, $base)->abs; } @imgs; # Print them out
my $prefix = '&dirs=&Submit=%CC%E1%BD%BB';
foreach my $links (@imgs)
{
# if ($links =~ m/yourdomain.cn/i) {
print "$links\n";
# } } 去掉红色部分注释,并修改yourdomain.cn为你想要匹配的域名,可显示该域名所有图片的链接。 用法: #chmod 755 get_img_url.pl #./get_img_url.pl -u URL地址
use URI::URL;
use Getopt::Std; Getopt::Std::getopts('u:', \%options);
$url = $options{u};
$ua = LWP::UserAgent->new; # Set up a callback that collect image links
my @imgs = ();
sub callback {
my($tag, %attr) = @_;
return if $tag ne 'img'; # we only look closer at <img ...>
push(@imgs, values %attr);
} # Make the parser. Unfortunately, we don't know the base yet
# (it might be diffent from $url)
$p = HTML::LinkExtor->new(\&callback); # Request document and parse it as it arrives
$res = $ua->request(HTTP::Request->new(GET => $url),
sub {$p->parse($_[0])}); # Expand all image URLs to absolute ones
my $base = $res->base;
@imgs = map { $_ = url($_, $base)->abs; } @imgs; # Print them out
my $prefix = '&dirs=&Submit=%CC%E1%BD%BB';
foreach my $links (@imgs)
{
# if ($links =~ m/yourdomain.cn/i) {
print "$links\n";
# } } 去掉红色部分注释,并修改yourdomain.cn为你想要匹配的域名,可显示该域名所有图片的链接。 用法: #chmod 755 get_img_url.pl #./get_img_url.pl -u URL地址
相关阅读 更多 +