summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--crawl.pl25
-rw-r--r--lib/githubexplorer.pm51
-rw-r--r--lib/githubexplorer/Gexf.pm27
-rw-r--r--lib/githubexplorer/Profile.pm59
-rw-r--r--lib/githubexplorer/Repositorie.pm52
-rw-r--r--lib/githubexplorer/Schema.pm7
-rw-r--r--lib/githubexplorer/Schema/Result/Follow.pm18
-rw-r--r--lib/githubexplorer/Schema/Result/Profiles.pm28
-rw-r--r--lib/githubexplorer/Schema/Result/Repositories.pm23
9 files changed, 290 insertions, 0 deletions
diff --git a/crawl.pl b/crawl.pl
new file mode 100644
index 0000000..fa7ae4e
--- /dev/null
+++ b/crawl.pl
@@ -0,0 +1,25 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use lib ('lib');
+use githubexplorer;
+use Getopt::Long;
+
+GetOptions(
+ 'deploy' => \my $deploy,
+ 'profiles' => \my $profiles,
+ 'repo' => \my $repo
+);
+
+my $gh = githubexplorer->new(
+ seed => [qw/franckcuny/],
+ api_token => $ENV{'GITHUB_APIKEY'},
+ api_login => $ENV{'GITHUB_LOGIN'},
+ with_repo => $repo,
+ connect_info =>
+ [ 'dbi:SQLite:dbname=test.sqlite', '', '', { AutoCommit => 1 } ],
+);
+
+$gh->deploy if $deploy;
+$gh->harvest_profiles;
+
diff --git a/lib/githubexplorer.pm b/lib/githubexplorer.pm
new file mode 100644
index 0000000..fdd609a
--- /dev/null
+++ b/lib/githubexplorer.pm
@@ -0,0 +1,51 @@
+package githubexplorer;
+use 5.010;
+use lib ('/home/franck/code/git/net-github/lib');
+use YAML::Syck;
+use Moose;
+use githubexplorer::Schema;
+
+with qw/githubexplorer::Profile githubexplorer::Repositorie/;
+
+has seed => ( isa => 'ArrayRef', is => 'ro', required => 1 );
+has api_login => ( isa => 'Str', is => 'ro', required => 1 );
+has api_token => ( isa => 'Str', is => 'ro', required => 1 );
+has connect_info => ( isa => 'ArrayRef', is => 'ro', required => 1 );
+has with_repo => ( isa => 'Bool', is => 'ro', default => sub {0} );
+has schema => (
+ isa => 'githubexplorer::Schema',
+ is => 'rw',
+ predicate => 'has_schema'
+);
+
+sub deploy {
+ my ($self) = @_;
+ $self->_connect() unless $self->has_schema;
+ $self->schema->deploy;
+}
+
+sub _connect {
+ my $self = shift;
+ $self->schema(
+ githubexplorer::Schema->connect( @{ $self->connect_info } ) );
+}
+
+sub harvest_profiles {
+ my ( $self, $depth) = @_;
+ $self->_connect() unless $self->has_schema;
+ $depth //= 1;
+ foreach my $login ( @{ $self->seed } ) {
+ $self->fetch_profile($login, $depth);
+ }
+}
+
+sub harvest_repo {
+ my ($self) = @_;
+ $self->_connect unless $self->has_schema;
+ my $profiles = $self->schema->resultset('Profiles')->search();
+ while (my $p = $profiles->next) {
+ $self->fetch_repo($p);
+ }
+}
+
+1;
diff --git a/lib/githubexplorer/Gexf.pm b/lib/githubexplorer/Gexf.pm
new file mode 100644
index 0000000..a82a741
--- /dev/null
+++ b/lib/githubexplorer/Gexf.pm
@@ -0,0 +1,27 @@
+package githubexplorer::Gexf;
+
+use Moose;
+use XML::Simple;
+
+has graph => (
+ is => 'rw',
+ isa => 'HashRef',
+ default => sub {
+ my $graph = {
+ gexf => {
+ version => "1.0",
+ meta => { creator => ['rtgi'] },
+ graph => {
+ type => 'static',
+ attributes => {
+ class => 'node',
+ type => 'static',
+ attribute => [ { id => 0, type => 'string' } ]
+ }
+ }
+ }
+ };
+ }
+);
+
+1;
diff --git a/lib/githubexplorer/Profile.pm b/lib/githubexplorer/Profile.pm
new file mode 100644
index 0000000..f580f79
--- /dev/null
+++ b/lib/githubexplorer/Profile.pm
@@ -0,0 +1,59 @@
+package githubexplorer::Profile;
+use 5.010;
+use Moose::Role;
+use Net::GitHub::V2::Users;
+
+sub fetch_profile {
+ my ( $self, $login, $depth ) = @_;
+
+ my $profile = $self->_profile_exists($login);
+
+ say "fetch profile for $login ($depth)...";
+ sleep(1);
+ my $github = Net::GitHub::V2::Users->new(
+ owner => $login,
+ login => $self->api_login,
+ token => $self->api_token,
+ );
+ sleep(2);
+
+ if ( !$profile ) {
+ $profile = $self->_create_profile( $login, $github->show, $depth );
+ if ( $self->with_repo ) {
+ foreach my $repo ( @{ $github->list } ) {
+ $self->fetch_repo( $profile, $repo->{name} );
+ }
+ }
+ sleep(1);
+ }
+ my $followers = $github->followers();
+ my $local_depth = $depth + 1;
+ return $profile if $local_depth > 3;
+ foreach my $f (@$followers) {
+ my $p = $self->fetch_profile( $f, $depth + 1 );
+ next unless $p;
+ $self->schema->resultset('Follow')
+ ->create(
+ { id_following => $profile->id, id_follower => $p->id } );
+ }
+ $profile;
+}
+
+sub _profile_exists {
+ my ( $self, $login ) = @_;
+ my $profile
+ = $self->schema->resultset('Profiles')->find( { login => $login } );
+ return $profile;
+}
+
+sub _create_profile {
+ my ( $self, $user_name, $profile, $depth ) = @_;
+
+ $profile->{depth} = $depth;
+
+ my $profile_rs = $self->schema->resultset('Profiles')->create($profile);
+ say $profile_rs->login."'s profile created";
+ return $profile_rs;
+}
+
+1;
diff --git a/lib/githubexplorer/Repositorie.pm b/lib/githubexplorer/Repositorie.pm
new file mode 100644
index 0000000..907a3b8
--- /dev/null
+++ b/lib/githubexplorer/Repositorie.pm
@@ -0,0 +1,52 @@
+package githubexplorer::Repositorie;
+use 5.010;
+use Moose::Role;
+use Net::GitHub::V2::Repositories;
+
+sub fetch_repo {
+ my ( $self, $profile, $repo_name ) = @_;
+
+ return if $self->_repo_exists($profile, $repo_name);
+
+ say "check ".$profile->login."'s $repo_name";
+ sleep(1);
+ my $github = Net::GitHub::V2::Repositories->new(
+ owner => $profile->login,
+ repo => $repo_name,
+ login => $self->api_login,
+ token => $self->api_token,
+ );
+ my $langs = [ keys %{ $github->languages() } ];
+ sleep(1);
+ return unless grep {/perl/i} @$langs;
+ my $repo_desc = $github->show();
+ $repo_desc->{languages} = $langs;
+ $self->_create_repo( $profile, $repo_desc );
+ sleep(1);
+}
+
+sub _repo_exists {
+ my ( $self, $profile, $repo_name ) = @_;
+ return
+ if $self->schema->resultset('Repositories')
+ ->find( { name => $repo_name, id_profile => $profile->id } );
+}
+
+sub _create_repo {
+ my ( $self, $profile, $repo_desc ) = @_;
+
+ my $repo_rs = $self->schema->resultset('Repositories')
+ ->find( { id_profile => $profile->id, name => $repo_desc->{name} } );
+ if ( !$repo_rs ) {
+ my $repo_insert = {
+ id_profile => $profile->id,
+ map { $_ => $repo_desc->{$_} }
+ (qw/description name homepage url watchers forks/)
+ };
+ $repo_rs
+ = $self->schema->resultset('Repositories')->create($repo_insert);
+ }
+ $repo_rs;
+}
+
+1;
diff --git a/lib/githubexplorer/Schema.pm b/lib/githubexplorer/Schema.pm
new file mode 100644
index 0000000..306480c
--- /dev/null
+++ b/lib/githubexplorer/Schema.pm
@@ -0,0 +1,7 @@
+package githubexplorer::Schema;
+
+use base qw/DBIx::Class::Schema/;
+
+__PACKAGE__->load_namespaces();
+
+1;
diff --git a/lib/githubexplorer/Schema/Result/Follow.pm b/lib/githubexplorer/Schema/Result/Follow.pm
new file mode 100644
index 0000000..735980b
--- /dev/null
+++ b/lib/githubexplorer/Schema/Result/Follow.pm
@@ -0,0 +1,18 @@
+package githubexplorer::Schema::Result::Follow;
+
+use base qw/DBIx::Class/;
+
+__PACKAGE__->load_components(qw/Core/);
+__PACKAGE__->table('follow');
+
+__PACKAGE__->add_columns(
+ id_follower => { data_type => 'int', },
+ id_following => { data_type => 'int' },
+);
+__PACKAGE__->set_primary_key(qw/id_follower id_following/);
+__PACKAGE__->belongs_to( 'id_follower',
+ 'githubexplorer::Schema::Result::Profiles' );
+__PACKAGE__->belongs_to( 'id_following',
+ 'githubexplorer::Schema::Result::Profiles' );
+
+1;
diff --git a/lib/githubexplorer/Schema/Result/Profiles.pm b/lib/githubexplorer/Schema/Result/Profiles.pm
new file mode 100644
index 0000000..001057e
--- /dev/null
+++ b/lib/githubexplorer/Schema/Result/Profiles.pm
@@ -0,0 +1,28 @@
+package githubexplorer::Schema::Result::Profiles;
+
+use base qw/DBIx::Class/;
+
+__PACKAGE__->load_components(qw/Core/);
+__PACKAGE__->table('profiles');
+__PACKAGE__->add_columns(
+ id => { data_type => 'integer', },
+ login => { data_type => 'varchar' },
+ blog => { data_type => 'varchar', is_nullable => 1 },
+ company => { data_type => 'varchar', is_nullable => 1 },
+ created_at => { data_type => 'timestamp' },
+ email => { data_type => 'varchar', is_nullable => 1 },
+ followers_count => { data_type => 'int' },
+ following_count => { data_type => 'int' },
+ gravatar_id => { data_type => 'varchar', is_nullable => 1 },
+ location => { data_type => 'varchar', is_nullable => 1 },
+ name => { data_type => 'varchar', is_nullable => 1 },
+ public_gist_count => { data_type => 'int' },
+ public_repo_count => { data_type => 'int' },
+ depth => { data_type => 'boolean' },
+);
+
+__PACKAGE__->set_primary_key('id');
+__PACKAGE__->has_many( 'get_repos',
+ 'githubexplorer::Schema::Result::Repositories', 'id_profile' );
+
+1;
diff --git a/lib/githubexplorer/Schema/Result/Repositories.pm b/lib/githubexplorer/Schema/Result/Repositories.pm
new file mode 100644
index 0000000..641305f
--- /dev/null
+++ b/lib/githubexplorer/Schema/Result/Repositories.pm
@@ -0,0 +1,23 @@
+package githubexplorer::Schema::Result::Repositories;
+
+use base qw/DBIx::Class/;
+
+__PACKAGE__->load_components(qw/Core/);
+__PACKAGE__->table('repositories');
+__PACKAGE__->add_columns(
+ id => { data_type => 'integer', is_auto_increment => 1 },
+ description => { data_type => 'text', is_nullable => 1 },
+ name => { data_type => 'varchar' },
+ homepage => { data_type => 'varchar', is_nullable => 1 },
+ url => { data_type => 'varchar', is_nullable => 1 },
+ watchers => { data_type => 'int' },
+ forks => { data_type => 'int' },
+ id_profile => { data_type => 'int', is_foreign_key => 1 },
+);
+
+__PACKAGE__->set_primary_key('id');
+__PACKAGE__->belongs_to( 'id_profile',
+ 'githubexplorer::Schema::Result::Profiles' );
+__PACKAGE__->add_unique_constraint( [qw/name id_profile/] );
+
+1;