diff options
| author | franck cuny <franck@lumberjaph.net> | 2010-01-23 19:36:24 +0100 |
|---|---|---|
| committer | franck cuny <franck@lumberjaph.net> | 2010-01-23 19:36:24 +0100 |
| commit | a7cc690ced15e1a0191d27034006bfb17a0deeb5 (patch) | |
| tree | 6cef1a2e07727e8cd5249764f461222073e8211a /lib | |
| download | github-explorer-a7cc690ced15e1a0191d27034006bfb17a0deeb5.tar.gz | |
basic github crawler using api
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/githubexplorer.pm | 51 | ||||
| -rw-r--r-- | lib/githubexplorer/Gexf.pm | 27 | ||||
| -rw-r--r-- | lib/githubexplorer/Profile.pm | 59 | ||||
| -rw-r--r-- | lib/githubexplorer/Repositorie.pm | 52 | ||||
| -rw-r--r-- | lib/githubexplorer/Schema.pm | 7 | ||||
| -rw-r--r-- | lib/githubexplorer/Schema/Result/Follow.pm | 18 | ||||
| -rw-r--r-- | lib/githubexplorer/Schema/Result/Profiles.pm | 28 | ||||
| -rw-r--r-- | lib/githubexplorer/Schema/Result/Repositories.pm | 23 |
8 files changed, 265 insertions, 0 deletions
diff --git a/lib/githubexplorer.pm b/lib/githubexplorer.pm new file mode 100644 index 0000000..fdd609a --- /dev/null +++ b/lib/githubexplorer.pm @@ -0,0 +1,51 @@ +package githubexplorer; +use 5.010; +use lib ('/home/franck/code/git/net-github/lib'); +use YAML::Syck; +use Moose; +use githubexplorer::Schema; + +with qw/githubexplorer::Profile githubexplorer::Repositorie/; + +has seed => ( isa => 'ArrayRef', is => 'ro', required => 1 ); +has api_login => ( isa => 'Str', is => 'ro', required => 1 ); +has api_token => ( isa => 'Str', is => 'ro', required => 1 ); +has connect_info => ( isa => 'ArrayRef', is => 'ro', required => 1 ); +has with_repo => ( isa => 'Bool', is => 'ro', default => sub {0} ); +has schema => ( + isa => 'githubexplorer::Schema', + is => 'rw', + predicate => 'has_schema' +); + +sub deploy { + my ($self) = @_; + $self->_connect() unless $self->has_schema; + $self->schema->deploy; +} + +sub _connect { + my $self = shift; + $self->schema( + githubexplorer::Schema->connect( @{ $self->connect_info } ) ); +} + +sub harvest_profiles { + my ( $self, $depth) = @_; + $self->_connect() unless $self->has_schema; + $depth //= 1; + foreach my $login ( @{ $self->seed } ) { + $self->fetch_profile($login, $depth); + } +} + +sub harvest_repo { + my ($self) = @_; + $self->_connect unless $self->has_schema; + my $profiles = $self->schema->resultset('Profiles')->search(); + while (my $p = $profiles->next) { + $self->fetch_repo($p); + } +} + +1; diff --git a/lib/githubexplorer/Gexf.pm b/lib/githubexplorer/Gexf.pm new file mode 100644 index 0000000..a82a741 --- /dev/null +++ b/lib/githubexplorer/Gexf.pm @@ -0,0 +1,27 @@ +package githubexplorer::Gexf; + +use Moose; +use XML::Simple; + +has graph => ( + is => 'rw', + isa => 'HashRef', + default => sub { + my $graph = { + gexf => { + version => "1.0", + meta => { creator => ['rtgi'] }, + graph => { + type => 'static', + attributes => { + class => 'node', + type => 'static', + attribute => [ { id => 0, type => 'string' } ] + } + } + } + }; + } +); + +1; diff --git a/lib/githubexplorer/Profile.pm b/lib/githubexplorer/Profile.pm new file mode 100644 index 0000000..f580f79 --- /dev/null +++ b/lib/githubexplorer/Profile.pm @@ -0,0 +1,59 @@ +package githubexplorer::Profile; +use 5.010; +use Moose::Role; +use Net::GitHub::V2::Users; + +sub fetch_profile { + my ( $self, $login, $depth ) = @_; + + my $profile = $self->_profile_exists($login); + + say "fetch profile for $login ($depth)..."; + sleep(1); + my $github = Net::GitHub::V2::Users->new( + owner => $login, + login => $self->api_login, + token => $self->api_token, + ); + sleep(2); + + if ( !$profile ) { + $profile = $self->_create_profile( $login, $github->show, $depth ); + if ( $self->with_repo ) { + foreach my $repo ( @{ $github->list } ) { + $self->fetch_repo( $profile, $repo->{name} ); + } + } + sleep(1); + } + my $followers = $github->followers(); + my $local_depth = $depth + 1; + return $profile if $local_depth > 3; + foreach my $f (@$followers) { + my $p = $self->fetch_profile( $f, $depth + 1 ); + next unless $p; + $self->schema->resultset('Follow') + ->create( + { id_following => $profile->id, id_follower => $p->id } ); + } + $profile; +} + +sub _profile_exists { + my ( $self, $login ) = @_; + my $profile + = $self->schema->resultset('Profiles')->find( { login => $login } ); + return $profile; +} + +sub _create_profile { + my ( $self, $user_name, $profile, $depth ) = @_; + + $profile->{depth} = $depth; + + my $profile_rs = $self->schema->resultset('Profiles')->create($profile); + say $profile_rs->login."'s profile created"; + return $profile_rs; +} + +1; diff --git a/lib/githubexplorer/Repositorie.pm b/lib/githubexplorer/Repositorie.pm new file mode 100644 index 0000000..907a3b8 --- /dev/null +++ b/lib/githubexplorer/Repositorie.pm @@ -0,0 +1,52 @@ +package githubexplorer::Repositorie; +use 5.010; +use Moose::Role; +use Net::GitHub::V2::Repositories; + +sub fetch_repo { + my ( $self, $profile, $repo_name ) = @_; + + return if $self->_repo_exists($profile, $repo_name); + + say "check ".$profile->login."'s $repo_name"; + sleep(1); + my $github = Net::GitHub::V2::Repositories->new( + owner => $profile->login, + repo => $repo_name, + login => $self->api_login, + token => $self->api_token, + ); + my $langs = [ keys %{ $github->languages() } ]; + sleep(1); + return unless grep {/perl/i} @$langs; + my $repo_desc = $github->show(); + $repo_desc->{languages} = $langs; + $self->_create_repo( $profile, $repo_desc ); + sleep(1); +} + +sub _repo_exists { + my ( $self, $profile, $repo_name ) = @_; + return + if $self->schema->resultset('Repositories') + ->find( { name => $repo_name, id_profile => $profile->id } ); +} + +sub _create_repo { + my ( $self, $profile, $repo_desc ) = @_; + + my $repo_rs = $self->schema->resultset('Repositories') + ->find( { id_profile => $profile->id, name => $repo_desc->{name} } ); + if ( !$repo_rs ) { + my $repo_insert = { + id_profile => $profile->id, + map { $_ => $repo_desc->{$_} } + (qw/description name homepage url watchers forks/) + }; + $repo_rs + = $self->schema->resultset('Repositories')->create($repo_insert); + } + $repo_rs; +} + +1; diff --git a/lib/githubexplorer/Schema.pm b/lib/githubexplorer/Schema.pm new file mode 100644 index 0000000..306480c --- /dev/null +++ b/lib/githubexplorer/Schema.pm @@ -0,0 +1,7 @@ +package githubexplorer::Schema; + +use base qw/DBIx::Class::Schema/; + +__PACKAGE__->load_namespaces(); + +1; diff --git a/lib/githubexplorer/Schema/Result/Follow.pm b/lib/githubexplorer/Schema/Result/Follow.pm new file mode 100644 index 0000000..735980b --- /dev/null +++ b/lib/githubexplorer/Schema/Result/Follow.pm @@ -0,0 +1,18 @@ +package githubexplorer::Schema::Result::Follow; + +use base qw/DBIx::Class/; + +__PACKAGE__->load_components(qw/Core/); +__PACKAGE__->table('follow'); + +__PACKAGE__->add_columns( + id_follower => { data_type => 'int', }, + id_following => { data_type => 'int' }, +); +__PACKAGE__->set_primary_key(qw/id_follower id_following/); +__PACKAGE__->belongs_to( 'id_follower', + 'githubexplorer::Schema::Result::Profiles' ); +__PACKAGE__->belongs_to( 'id_following', + 'githubexplorer::Schema::Result::Profiles' ); + +1; diff --git a/lib/githubexplorer/Schema/Result/Profiles.pm b/lib/githubexplorer/Schema/Result/Profiles.pm new file mode 100644 index 0000000..001057e --- /dev/null +++ b/lib/githubexplorer/Schema/Result/Profiles.pm @@ -0,0 +1,28 @@ +package githubexplorer::Schema::Result::Profiles; + +use base qw/DBIx::Class/; + +__PACKAGE__->load_components(qw/Core/); +__PACKAGE__->table('profiles'); +__PACKAGE__->add_columns( + id => { data_type => 'integer', }, + login => { data_type => 'varchar' }, + blog => { data_type => 'varchar', is_nullable => 1 }, + company => { data_type => 'varchar', is_nullable => 1 }, + created_at => { data_type => 'timestamp' }, + email => { data_type => 'varchar', is_nullable => 1 }, + followers_count => { data_type => 'int' }, + following_count => { data_type => 'int' }, + gravatar_id => { data_type => 'varchar', is_nullable => 1 }, + location => { data_type => 'varchar', is_nullable => 1 }, + name => { data_type => 'varchar', is_nullable => 1 }, + public_gist_count => { data_type => 'int' }, + public_repo_count => { data_type => 'int' }, + depth => { data_type => 'boolean' }, +); + +__PACKAGE__->set_primary_key('id'); +__PACKAGE__->has_many( 'get_repos', + 'githubexplorer::Schema::Result::Repositories', 'id_profile' ); + +1; diff --git a/lib/githubexplorer/Schema/Result/Repositories.pm b/lib/githubexplorer/Schema/Result/Repositories.pm new file mode 100644 index 0000000..641305f --- /dev/null +++ b/lib/githubexplorer/Schema/Result/Repositories.pm @@ -0,0 +1,23 @@ +package githubexplorer::Schema::Result::Repositories; + +use base qw/DBIx::Class/; + +__PACKAGE__->load_components(qw/Core/); +__PACKAGE__->table('repositories'); +__PACKAGE__->add_columns( + id => { data_type => 'integer', is_auto_increment => 1 }, + description => { data_type => 'text', is_nullable => 1 }, + name => { data_type => 'varchar' }, + homepage => { data_type => 'varchar', is_nullable => 1 }, + url => { data_type => 'varchar', is_nullable => 1 }, + watchers => { data_type => 'int' }, + forks => { data_type => 'int' }, + id_profile => { data_type => 'int', is_foreign_key => 1 }, +); + +__PACKAGE__->set_primary_key('id'); +__PACKAGE__->belongs_to( 'id_profile', + 'githubexplorer::Schema::Result::Profiles' ); +__PACKAGE__->add_unique_constraint( [qw/name id_profile/] ); + +1; |
