summaryrefslogtreecommitdiff
path: root/lib/MooseX
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--lib/MooseX/UserAgent.pm56
-rw-r--r--lib/MooseX/UserAgent/Async.pm2
-rw-r--r--lib/MooseX/UserAgent/Cache.pm2
-rw-r--r--lib/MooseX/UserAgent/Config.pm64
-rw-r--r--lib/MooseX/UserAgent/Content.pm2
5 files changed, 105 insertions, 21 deletions
diff --git a/lib/MooseX/UserAgent.pm b/lib/MooseX/UserAgent.pm
index edcd5dd..d43312a 100644
--- a/lib/MooseX/UserAgent.pm
+++ b/lib/MooseX/UserAgent.pm
@@ -2,27 +2,30 @@ package MooseX::UserAgent;
our $VERSION = '0.2.0';
-use Moose::Role;
-with qw/MooseX::UserAgent::Config MooseX::UserAgent::Content
- MooseX::UserAgent::Cache/;
-
use URI;
use HTTP::Request;
use HTTP::Response;
use LWP::UserAgent;
+use Moose::Role;
+with qw/
+ MooseX::UserAgent::Config
+ MooseX::UserAgent::Content
+ MooseX::UserAgent::Cache
+ /;
+
sub fetch {
my ( $self, $url ) = @_;
- my $req = HTTP::Request->new( GET => URI->new( $url ) );
+ my $req = HTTP::Request->new( GET => URI->new($url) );
$req->header( 'Accept-Encoding', 'gzip' );
my $last_modified = $self->get_ua_cache($url);
$req->header( 'If-Modified-Since' => $last_modified )
if $last_modified;
- my $res = $self->agent->request( $req );
- $self->store_ua_cache($url, $res);
+ my $res = $self->agent->request($req);
+ $self->store_ua_cache( $url, $res );
$res;
}
@@ -69,18 +72,40 @@ This is a role which provides a useragent to a Moose Class.
The role will do the caching for you if you need it, using Cache::*Cache
modules. By default it uses Cache::FileCache, but you can use any Cache
-modules you want.
+modules you want:
+
+ my $cache = new Cache::MemoryCache(
+ {
+ 'namespace' => 'mymemorycacheforbot',
+ 'default_expires_in' => 600
+ }
+ );
+
+ my $class = $MyClassUsingUA->new(
+ useragent_conf => {
+ cache => {
+ use_cache => 1,
+ namespace => 'testua',
+ }
+ },
+ ua_cache => $cache,
+ );
=head2 METHODS
+=head3 useragent_conf
+
+This is an attribut you need to add to your Class. It's a HashRef that
+contains all the required configuration for the useragent.
+
=over 4
=item B<agent>
-The default useragent is a LWPx::ParanoidAgent object. In the
-configuration, the name, mail of the useragent have to be defined. The
-default size of a page manipulated can't excess 3 000 000 octets and the
-timeout is set to 30 seconds.
+The default useragent is a LWP::UserAgent object. In the configuration,
+the name and mail of the useragent have to be defined. The default size of
+a page manipulated can't excess 3 000 000 octets and the timeout is set to
+30 seconds.
=item B<fetch>
@@ -88,10 +113,7 @@ This method will fetch a given URL. This method handle only the http
protocol.
If there is a cache configuration, the url will be checked in the cache,
-and if there is a match, the content will be returned.
-
-In the case of scraping search engines, a delay may be given, so we will
-not hammer the server.
+and if there is a match, a 304 HTTP code will be returned.
=item B<get_content>
@@ -103,7 +125,7 @@ This method will return a content in utf8.
=head1 AUTHOR
-franck cuny C<< <franck@lumberjaph.net> >>
+franck cuny C<< <franck.cuny@rtgi.fr> >>
=head1 LICENCE AND COPYRIGHT
diff --git a/lib/MooseX/UserAgent/Async.pm b/lib/MooseX/UserAgent/Async.pm
index 3c9a09d..186a183 100644
--- a/lib/MooseX/UserAgent/Async.pm
+++ b/lib/MooseX/UserAgent/Async.pm
@@ -63,7 +63,7 @@ RTGI::Role::UserAgent::Async - Fetch an url using AnyEvent::HTTP
=head1 AUTHOR
-franck cuny C<< <franck@lumberjaph.net> >>
+franck cuny C<< <franck.cuny@rtgi.fr> >>
=head1 LICENCE AND COPYRIGHT
diff --git a/lib/MooseX/UserAgent/Cache.pm b/lib/MooseX/UserAgent/Cache.pm
index de9b24d..fa235d7 100644
--- a/lib/MooseX/UserAgent/Cache.pm
+++ b/lib/MooseX/UserAgent/Cache.pm
@@ -56,7 +56,7 @@ RTGI::Role::UserAgent::Cache
=head1 AUTHOR
-franck cuny C<< <franck@lumberjaph.net> >>
+franck cuny C<< <franck.cuny@rtgi.fr> >>
=head1 LICENCE AND COPYRIGHT
diff --git a/lib/MooseX/UserAgent/Config.pm b/lib/MooseX/UserAgent/Config.pm
index 40b0720..d5d6730 100644
--- a/lib/MooseX/UserAgent/Config.pm
+++ b/lib/MooseX/UserAgent/Config.pm
@@ -10,6 +10,8 @@ has 'agent' => (
my $self = shift;
my $ua = LWP::UserAgent->new;
+ if (!$self->can('useragent_conf')) {
+ }
my $conf = $self->useragent_conf;
$ua->agent( $conf->{name} ) if $conf->{name};
$ua->from( $conf->{mail} ) if $conf->{mail};
@@ -27,13 +29,73 @@ __END__
RTGI::Role::UserAgent::Config
+=head1 SYNOPSIS
+
+ has useragent_conf => (
+ isa => 'HashRef',
+ default => sub {
+ {
+ name => 'myownbot',
+ mail => 'mail\@bot.com',
+ timeout => 60,
+ max_size => 50000,
+ cache => {
+ use_cache => 1,
+ namespace => 'mybotua',
+ root => '/tmp',
+ }
+ };
+ }
+ );
+
=head1 DESCRIPTION
+=over 4
+
+=item B<name>
+
+UserAgent string used by the HTTP client. Default is to use the LWP or
+AnyEvent::HTTP string.
+
+=item B<mail>
+
+Mail string used by the HTTP client (only for LWP). Default is to use the
+LWP string.
+
+=item B<max_size>
+
+Max size that will be fetched by the useragent, in octets (only for LWP).
+Default is set to 3 000 000.
+
+=item B<timeout>
+
+Time out. Default is set to 30.
+
+=item B<cache>
+
+=over 2
+
+=item B<use_cache>
+
+If you need caching, set to 1. Default is no cache.
+
+=item B<root>
+
+Where to store the cache.
+
+=item B<default_expires_in>
+
+=item B<namespace>
+
+=back
+
+=back
+
=head1 BUGS AND LIMITATIONS
=head1 AUTHOR
-franck cuny C<< <franck@lumberjaph.net> >>
+franck cuny C<< <franck.cuny@rtgi.fr> >>
=head1 LICENCE AND COPYRIGHT
diff --git a/lib/MooseX/UserAgent/Content.pm b/lib/MooseX/UserAgent/Content.pm
index 024531f..1b3f5ee 100644
--- a/lib/MooseX/UserAgent/Content.pm
+++ b/lib/MooseX/UserAgent/Content.pm
@@ -37,7 +37,7 @@ RTGI::Role::UserAgent::Content
=head1 AUTHOR
-franck cuny C<< <franck@lumberjaph.net> >>
+franck cuny C<< <franck.cuny@rtgi.fr> >>
=head1 LICENCE AND COPYRIGHT