summaryrefslogtreecommitdiff
path: root/lib/GitHub
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--lib/GitHub/Collector.pm6
-rw-r--r--lib/GitHub/Collector/Command/country.pm70
-rw-r--r--lib/GitHub/Collector/Command/edges.pm67
-rw-r--r--lib/GitHub/Collector/Command/graph.pm47
-rw-r--r--lib/GitHub/Collector/Command/indegree.pm29
-rw-r--r--lib/GitHub/Collector/Command/lang.pm72
-rw-r--r--lib/GitHub/Collector/Command/profile.pm90
-rw-r--r--lib/GitHub/Collector/Command/repository.pm44
-rw-r--r--lib/GitHub/Collector/Command/stats.pm94
-rw-r--r--lib/GitHub/Collector/Role/Context.pm13
-rw-r--r--lib/GitHub/Collector/Role/Graph/Edges.pm13
-rw-r--r--lib/GitHub/Collector/Role/Graph/Gexf.pm47
-rw-r--r--lib/GitHub/Collector/Role/Graph/Neighbors.pm128
-rw-r--r--lib/GitHub/Collector/Role/Graph/Nodes.pm13
-rw-r--r--lib/GitHub/Collector/Role/Graph/Query.pm45
-rw-r--r--lib/GitHub/Collector/Role/Graph/Search.pm19
-rw-r--r--lib/GitHub/Collector/Role/Languages.pm25
-rw-r--r--lib/GitHub/Collector/Role/Logger.pm27
-rw-r--r--lib/GitHub/Collector/Role/MongoDB.pm41
-rw-r--r--lib/GitHub/Collector/Role/Pause.pm18
-rw-r--r--lib/GitHub/Collector/Role/Profile.pm84
-rw-r--r--lib/GitHub/Collector/Role/Relation.pm70
-rw-r--r--lib/GitHub/Collector/Role/Repository.pm173
-rw-r--r--lib/GitHub/Collector/Role/SPORE.pm27
24 files changed, 1262 insertions, 0 deletions
diff --git a/lib/GitHub/Collector.pm b/lib/GitHub/Collector.pm
new file mode 100644
index 0000000..56be19d
--- /dev/null
+++ b/lib/GitHub/Collector.pm
@@ -0,0 +1,6 @@
+package GitHub::Collector;
+
+use Moose;
+extends qw(MooseX::App::Cmd);
+
+1;
diff --git a/lib/GitHub/Collector/Command/country.pm b/lib/GitHub/Collector/Command/country.pm
new file mode 100644
index 0000000..f7f5107
--- /dev/null
+++ b/lib/GitHub/Collector/Command/country.pm
@@ -0,0 +1,70 @@
+package GitHub::Collector::Command::country;
+
+use Moose;
+use boolean;
+
+extends qw(MooseX::App::Cmd::Command);
+
+has geo_conf => (
+ is => 'rw',
+ isa => 'HashRef',
+ required => 1,
+ documentation => 'SPORE configuration for Geo API',
+);
+
+with
+ 'GitHub::Collector::Role::Logger',
+ 'GitHub::Collector::Role::Context',
+ 'GitHub::Collector::Role::MongoDB',
+ 'Net::HTTP::Spore::Role' =>
+ { spore_clients => [ { name => 'geo', config => 'geo_conf' } ] };
+
+sub execute {
+ my $self = shift;
+
+ $self->log("start to tag user using country");
+
+ my $profiles = $self->db_profiles->find({country_done => false});
+
+ while ( my $profile = $profiles->next ) {
+ $self->_tag_profile_by_country($profile);
+ }
+
+ $self->log("done tagging users");
+}
+
+sub _tag_profile_by_country{
+ my ($self, $profile) = @_;
+
+ if ( !defined $profile->{location} ) {
+ $self->_update_country($profile->{login}, false);
+ return;
+ }
+
+ $self->log( "searching for "
+ . $profile->{login}
+ . " based in "
+ . $profile->{location} );
+
+ my $res = $self->geo->search(
+ q => $profile->{location},
+ username => $self->geo_conf->{api_username},
+ )->body;
+
+ die "no more requests" if $res->{status} && $res->{status}->{value} == 19;
+
+ if (my $country = $res->{geonames}->[0]->{countryName}){
+ $self->_update_country($profile->{login}, $country);
+ }else{
+ $self->_update_country($profile->{login}, false)
+ }
+}
+
+sub _update_country {
+ my ( $self, $login, $country ) = @_;
+
+ $self->db_profiles->update( { login => $login },
+ { '$set' => { country => $country, country_done => true } } );
+}
+
+1;
diff --git a/lib/GitHub/Collector/Command/edges.pm b/lib/GitHub/Collector/Command/edges.pm
new file mode 100644
index 0000000..4ffe0c8
--- /dev/null
+++ b/lib/GitHub/Collector/Command/edges.pm
@@ -0,0 +1,67 @@
+package GitHub::Collector::Command::edges;
+
+use Moose;
+use boolean;
+
+extends qw(MooseX::App::Cmd::Command);
+
+with qw(
+ GitHub::Collector::Role::Context
+ GitHub::Collector::Role::Logger
+ GitHub::Collector::Role::MongoDB
+);
+
+sub execute {
+ my $self = shift;
+
+ $self->log("start to merge contributions");
+
+ my $profiles = $self->db_profiles->find({edges_done => false});
+
+ while ( my $profile = $profiles->next ) {
+ next if $self->_is_done($profile->{login});
+ $self->log("merge contributions for ".$profile->{login});
+ $self->_contributions($profile->{login});
+ }
+
+ $self->log("done merging contributions");
+}
+
+sub _is_done {
+ my ($self, $login) = @_;
+ $self->db_edges->find({source => $login})->count;
+}
+
+sub _contributions {
+ my ( $self, $login ) = @_;
+
+ my $contributions =
+ $self->db_contributors->find( { contributor => $login } );
+
+ my $profiles = {};
+
+ while ( my $contrib = $contributions->next ) {
+ my $project = $self->db_repositories->find_one(
+ { uniq_name => $contrib->{project} } );
+
+ next if $project->{size} == 0;
+ my $total =
+ int( ( $contrib->{contributions} / $project->{size} ) * 100 );
+ $total ||= 1;
+ $profiles->{ $contrib->{owner} } += $total;
+ }
+
+ foreach my $pr ( keys %$profiles ) {
+ $self->db_edges->insert({
+ source => $login,
+ target => $pr,
+ weight => $profiles->{$pr}
+ });
+ }
+ $self->db_profiles->update(
+ { login => $login },
+ { '$set' => { edges_done => true } },
+ );
+}
+
+1;
diff --git a/lib/GitHub/Collector/Command/graph.pm b/lib/GitHub/Collector/Command/graph.pm
new file mode 100644
index 0000000..c7766a8
--- /dev/null
+++ b/lib/GitHub/Collector/Command/graph.pm
@@ -0,0 +1,47 @@
+package GitHub::Collector::Command::graph;
+
+use Moose;
+use YAML::Syck;
+
+extends qw(MooseX::App::Cmd::Command);
+
+with qw(
+ GitHub::Collector::Role::Context
+ GitHub::Collector::Role::Logger
+ GitHub::Collector::Role::MongoDB
+ GitHub::Collector::Role::Graph::Query
+ GitHub::Collector::Role::Graph::Nodes
+ GitHub::Collector::Role::Graph::Edges
+ GitHub::Collector::Role::Graph::Neighbors
+ GitHub::Collector::Role::Graph::Search
+ GitHub::Collector::Role::Graph::Gexf
+);
+
+has profile => (
+ is => 'ro',
+ isa => 'Str',
+ predicate => 'has_profile',
+);
+
+has indegree => (
+ is => 'ro',
+ isa => 'Int',
+ predicate => 'has_indegree',
+);
+
+sub execute {
+ my $self = shift;
+
+ if ($self->has_profile){
+ $self->neighbors($self->profile, 1);
+ $self->remove_leaves();
+ }elsif($self->has_indegree){
+ $self->build_from_query( { indegree => { '$gt' => $self->indegree } } );
+ }else{
+ $self->build_from_query();
+ }
+
+ $self->export() if $self->should_export;
+}
+
+1;
diff --git a/lib/GitHub/Collector/Command/indegree.pm b/lib/GitHub/Collector/Command/indegree.pm
new file mode 100644
index 0000000..fe9bf78
--- /dev/null
+++ b/lib/GitHub/Collector/Command/indegree.pm
@@ -0,0 +1,29 @@
+package GitHub::Collector::Command::indegree;
+
+use Moose;
+
+extends qw(MooseX::App::Cmd::Command);
+
+with qw(
+ GitHub::Collector::Role::Context
+ GitHub::Collector::Role::Logger
+ GitHub::Collector::Role::MongoDB
+);
+
+sub execute {
+ my $self = shift;
+
+ my $edges = $self->db_edges->find();
+
+ my $profiles = {};
+ while ( my $edge = $edges->next ) {
+ $profiles->{ $edge->{target} } += $edge->{weight};
+ }
+
+ foreach my $login ( keys %$profiles ) {
+ $self->db_profiles->update( { login => $login },
+ { '$set' => { indegree => $profiles->{$login} } } );
+ }
+}
+
+1;
diff --git a/lib/GitHub/Collector/Command/lang.pm b/lib/GitHub/Collector/Command/lang.pm
new file mode 100644
index 0000000..8ab6c20
--- /dev/null
+++ b/lib/GitHub/Collector/Command/lang.pm
@@ -0,0 +1,72 @@
+package GitHub::Collector::Command::lang;
+
+use Moose;
+use boolean;
+
+extends qw(MooseX::App::Cmd::Command);
+
+with qw(
+ GitHub::Collector::Role::Logger
+ GitHub::Collector::Role::Context
+ GitHub::Collector::Role::MongoDB
+);
+
+sub execute {
+ my $self = shift;
+
+ $self->log("start to tag user using langs");
+
+ my $profiles = $self->db_profiles->find({ language => undef } );
+
+ while (my $profile = $profiles->next){
+ $self->_tag_profile_by_lang($profile);
+ }
+
+ $self->log("done tagging users");
+}
+
+sub _tag_profile_by_lang {
+ my ( $self, $profile ) = @_;
+
+ my $languages = {};
+
+ $self->_repos( $languages, $profile->{login} );
+ $self->_contribs( $languages, $profile->{login} );
+
+ my $lang = (
+ sort { $languages->{$b} <=> $languages->{$a} }
+ keys %$languages
+ )[0];
+
+ $lang = "none "if ( !$lang );
+
+ $self->log( "pour " . $profile->{login} . " on a " . $lang );
+ $self->db_profiles->update(
+ { login => $profile->{login}, },
+ { '$set' => { language => $lang } }
+ );
+}
+
+sub _repos {
+ my ( $self, $languages, $login ) = @_;
+
+ my $repositories = $self->db_repositories->find( { owner => $login } );
+
+ while ( my $repo = $repositories->next ) {
+ $languages->{ $repo->{lang} }++ if $repo->{lang};
+ }
+}
+
+sub _contribs {
+ my ( $self, $languages, $login ) = @_;
+
+ my $contribs = $self->db_contributors->find( { contributor => $login } );
+
+ while ( my $contrib = $contribs->next ) {
+ my $repo = $self->db_repositories->find_one(
+ { uniq_name => $contrib->{project} } );
+ $languages->{ $repo->{lang} }++ if $repo->{lang};
+ }
+}
+
+1;
diff --git a/lib/GitHub/Collector/Command/profile.pm b/lib/GitHub/Collector/Command/profile.pm
new file mode 100644
index 0000000..872bf28
--- /dev/null
+++ b/lib/GitHub/Collector/Command/profile.pm
@@ -0,0 +1,90 @@
+package GitHub::Collector::Command::profile;
+
+use YAML;
+use Try::Tiny;
+use Moose;
+use boolean;
+
+extends qw(MooseX::App::Cmd::Command);
+
+with qw(
+ GitHub::Collector::Role::Context
+ GitHub::Collector::Role::Logger
+ GitHub::Collector::Role::SPORE
+ GitHub::Collector::Role::Profile
+ GitHub::Collector::Role::MongoDB
+ GitHub::Collector::Role::Pause
+);
+
+has seed => (
+ isa => 'ArrayRef',
+ is => 'ro',
+ required => 1,
+ auto_deref => 1,
+ documentation => 'seed to crawl',
+ lazy => 1,
+ default => sub {
+ my $self = shift;
+ return $self->context->{seed};
+ }
+);
+
+sub execute {
+ my $self = shift;
+
+ $self->log("start to crawl profiles");
+
+ foreach my $profile ($self->seed) {
+ $self->_bootstrap_profile($profile);
+ }
+
+ $self->log("finish to boostrap the seed");
+ $self->_crawl(0);
+ $self->log("crawl completed");
+}
+
+sub get_profile {
+ my ( $self, $profile ) = @_;
+
+ my $login = $profile->{login};
+
+ my $profile_info = $self->fetch_profile($login);
+
+ return unless $profile_info;
+
+ $self->save_profile($profile_info);
+ $self->add_relations($login);
+ $self->profile_is_done($login);
+}
+
+sub _crawl {
+ my $self = shift;
+
+ my $profiles_to_crawl = $self->db_profiles->find({done => false});
+
+ while (my $profile = $profiles_to_crawl->next) {
+ $self->get_profile($profile);
+ }
+
+ if ($self->db_profiles->find({done => false})->count > 0) {
+ $self->_crawl;
+ }
+}
+
+sub _bootstrap_profile {
+ my ( $self, $profile ) = @_;
+
+ my $has_profile = $self->db_profiles->find( { login => $profile } );
+ return if $has_profile->count > 0;
+ $self->debug("insert $profile into profiles");
+ my $res = $self->db_profiles->insert(
+ { login => $profile, done => false, repositories_done => false } );
+}
+
+1;
+
+=head1 NAME
+
+GitHub::Collector::Command::profile - foo
+
+=cut
diff --git a/lib/GitHub/Collector/Command/repository.pm b/lib/GitHub/Collector/Command/repository.pm
new file mode 100644
index 0000000..3e7fd57
--- /dev/null
+++ b/lib/GitHub/Collector/Command/repository.pm
@@ -0,0 +1,44 @@
+package GitHub::Collector::Command::repository;
+
+use Moose;
+use boolean;
+
+extends qw(MooseX::App::Cmd::Command);
+
+with qw(
+ GitHub::Collector::Role::Context
+ GitHub::Collector::Role::Logger
+ GitHub::Collector::Role::SPORE
+ GitHub::Collector::Role::MongoDB
+ GitHub::Collector::Role::Repository
+);
+
+sub execute {
+ my $self = shift;
+
+ $self->log("start to crawl repositories");
+ $self->_crawl();
+ $self->log("crawl completed");
+}
+
+sub get_repositories {
+ my ($self, $profile) = @_;
+
+ my $login = $profile->{login};
+
+ $self->log("fetch repositories for $login");
+ $self->fetch_repositories($profile);
+ $self->log("finished to work on $login");
+}
+
+sub _crawl {
+ my $self = shift;
+
+ my $profiles = $self->db_profiles->find( { repositories_done => false } );
+
+ while ( my $profile = $profiles->next ) {
+ $self->get_repositories($profile);
+ }
+}
+
+1;
diff --git a/lib/GitHub/Collector/Command/stats.pm b/lib/GitHub/Collector/Command/stats.pm
new file mode 100644
index 0000000..fc71d10
--- /dev/null
+++ b/lib/GitHub/Collector/Command/stats.pm
@@ -0,0 +1,94 @@
+package GitHub::Collector::Command::stats;
+
+use 5.010;
+use Moose;
+use boolean;
+use JSON;
+use DateTime;
+
+extends qw(MooseX::App::Cmd::Command);
+
+with qw(
+ GitHub::Collector::Role::Context
+ GitHub::Collector::Role::Logger
+ GitHub::Collector::Role::MongoDB
+ GitHub::Collector::Role::Languages
+);
+
+sub execute {
+ my ($self, ) = @_;
+
+ my $profiles = $self->db_profiles->find();
+
+ my $languages = {};
+ my $country = {};
+ my $company = {};
+ my $created = {};
+
+ while ( my $profile = $profiles->next ) {
+ my $date = $profile->{created_at};
+
+ next if !defined $date;
+
+ my ($year, $month) = $date =~ /(\d{4})(?:-|\/)(\d{2})/;
+ next if (!defined $year || !defined $month);
+
+ my $lang = $self->map_languages( $profile->{language} );
+ $languages->{$lang}++ if $lang ne 'Other';
+
+ $country->{ $profile->{country} }++ if $profile->{country};
+ $company->{ $profile->{company} }++ if defined $profile->{company};
+
+ $created->{global}->{ $year . '/' . $month }->{total}++;
+ $created->{languages}->{$lang}->{ $year . '/' . $month }->{total}++;
+ }
+
+# $self->_sort_and_display($languages);
+# $self->_sort_and_display($country, 10);
+# $self->_sort_and_display($company, 100);
+
+ $self->_create_flot( $created->{global}, 'global' );
+ foreach my $lang ( keys %{ $created->{languages} } ) {
+ $self->_create_flot( $created->{languages}->{$lang}, $lang );
+ }
+}
+
+sub _sort_and_display {
+ my ($self, $data, $iter) = @_;
+
+ my @sorted = sort {$data->{$b} <=> $data->{$a}} keys %$data;
+
+ my $total = 0;
+ map {$total += $data->{$_} } @sorted;
+ $iter ||= (scalar @sorted - 1);
+
+ for(0..$iter){
+ my $pct = int(($data->{$sorted[$_]} / $total) * 100);
+ say " # ".$sorted[$_].":".$data->{$sorted[$_]}. " ($pct%)";
+ }
+}
+
+sub _create_flot {
+ my ($self, $data, $label) = @_;
+
+ my $graph = {};
+ $graph->{label} = $label;
+
+ my @sorted = sort {$a cmp $b} keys %$data;
+
+ # remove the first and last value since they're not really worthy
+ shift @sorted;
+ pop @sorted;
+
+ foreach my $month (@sorted) {
+ (my $y, my $m) = $month =~ /(\d{4})\/(\d{2})/;
+ my $epoch = DateTime->new(year => $y, month => $m, day => 01)->epoch * 1000;
+ push @{$graph->{data}}, [$epoch, $data->{$month}->{total}];
+ }
+
+ open my $fh, '>', $label.'.json';
+ print $fh JSON::encode_json($graph);
+ close $fh;
+}
+
+1;
diff --git a/lib/GitHub/Collector/Role/Context.pm b/lib/GitHub/Collector/Role/Context.pm
new file mode 100644
index 0000000..aa30d2b
--- /dev/null
+++ b/lib/GitHub/Collector/Role/Context.pm
@@ -0,0 +1,13 @@
+package GitHub::Collector::Role::Context;
+
+use YAML;
+use Moose::Role;
+with qw(MooseX::ConfigFromFile);
+
+sub get_config_from_file {
+ my ( $self, $file ) = @_;
+ my $conf = YAML::LoadFile($file);
+ $conf;
+}
+
+1;
diff --git a/lib/GitHub/Collector/Role/Graph/Edges.pm b/lib/GitHub/Collector/Role/Graph/Edges.pm
new file mode 100644
index 0000000..bea5914
--- /dev/null
+++ b/lib/GitHub/Collector/Role/Graph/Edges.pm
@@ -0,0 +1,13 @@
+package GitHub::Collector::Role::Graph::Edges;
+
+use Moose::Role;
+
+has edges => (
+ is => 'rw',
+ isa => 'HashRef',
+ lazy => 1,
+ default => sub { {} },
+ auto_deref => 1,
+);
+
+1;
diff --git a/lib/GitHub/Collector/Role/Graph/Gexf.pm b/lib/GitHub/Collector/Role/Graph/Gexf.pm
new file mode 100644
index 0000000..ab83dd4
--- /dev/null
+++ b/lib/GitHub/Collector/Role/Graph/Gexf.pm
@@ -0,0 +1,47 @@
+package GitHub::Collector::Role::Graph::Gexf;
+
+use Moose::Role;
+use Graph::GEXF;
+
+has output => (
+ is => 'ro',
+ isa => 'Int',
+ predicate => 'should_export',
+);
+
+sub export {
+ my ($self, ) = @_;
+
+ my $gexf = Graph::GEXF->new();
+ $gexf->add_node_attribute( name => 'string' );
+ $gexf->add_node_attribute( lang => 'string' );
+ $gexf->add_node_attribute( size => 'int' );
+ $gexf->add_node_attribute( country => 'string' );
+ $gexf->add_node_attribute( indegree => 'int' );
+
+ my $nodes = {};
+ foreach my $node ( keys %{ $self->nodes } ) {
+ my $n = $gexf->add_node( $self->nodes->{$node}->{id} );
+ $n->label($node);
+ $n->attribute( name => $node );
+ $n->attribute( size => $self->nodes->{$node}->{size} );
+ $n->attribute( lang => $self->nodes->{$node}->{language} || '' );
+ $n->attribute( country => $self->nodes->{$node}->{country} || '' );
+ $n->attribute( indegree => $self->nodes->{$node}->{indegree} );
+ $nodes->{$node} = $n;
+ }
+
+ foreach my $edge (keys %{$self->edges}){
+ $nodes->{ $self->edges->{$edge}->{sourceId} }->link_to(
+ {
+ target => $self->edges->{$edge}->{targetId},
+ weight => $self->edges->{$edge}->{weight}
+ }
+ );
+ }
+
+ my $xml = $gexf->to_xml();
+ print $xml;
+}
+
+1;
diff --git a/lib/GitHub/Collector/Role/Graph/Neighbors.pm b/lib/GitHub/Collector/Role/Graph/Neighbors.pm
new file mode 100644
index 0000000..627de94
--- /dev/null
+++ b/lib/GitHub/Collector/Role/Graph/Neighbors.pm
@@ -0,0 +1,128 @@
+package GitHub::Collector::Role::Graph::Neighbors;
+
+use Moose::Role;
+with qw(GitHub::Collector::Role::Languages);
+
+sub neighbors {
+ my ( $self, $name, $with_connections ) = @_;
+
+ $self->_neighbors($name);
+
+ if ($with_connections) {
+ foreach my $id ( keys %{ $self->edges } ) {
+ $self->_connections( $id, $name );
+ }
+ }
+}
+
+sub _neighbors {
+ my ( $self, $name ) = @_;
+
+ if ( !defined $self->nodes->{$name} ) {
+ $self->_create_node($name);
+ }
+
+ $self->_fetch_edges_from($name);
+ $self->_fetch_edges_to($name);
+}
+
+sub _connections {
+ my ( $self, $id, $name ) = @_;
+
+ if ( $self->edges->{$id}->{sourceId} eq $name ) {
+ $self->_neighbors( $self->edges->{$id}->{targetId} );
+ }
+ else {
+ $self->_neighbors( $self->edges->{$id}->{sourceId} );
+ }
+}
+
+sub remove_leaves {
+ my $self = shift;
+
+ foreach my $id ( keys %{$self->nodes} ) {
+ if ( $self->nodes->{$id}->{size} < 2 ) {
+ delete $self->nodes->{$id};
+ }
+ }
+
+ foreach my $id ( keys %{$self->edges} ) {
+ unless ( $self->nodes->{ $self->edges->{$id}->{sourceId} }
+ && $self->nodes->{ $self->edges->{$id}->{targetId} } )
+ {
+ delete $self->edges->{ $id };
+ }
+ }
+}
+
+sub _fetch_edges_from {
+ my ( $self, $name ) = @_;
+
+ my $connections = $self->db_edges->find( { source => $name } );
+
+ while ( my $edge = $connections->next ) {
+ my $edge_id = $edge->{source} . $edge->{target};
+
+ if ( !defined $self->edges->{$edge_id} ) {
+ $self->edges->{$edge_id} = {
+ id => $edge_id,
+ targetId => $edge->{target},
+ sourceId => $name,
+ weight => $edge->{weight},
+ };
+
+ $self->nodes->{$name}->{size}++;
+
+ if ( defined $self->nodes->{ $edge->{target} } ) {
+ $self->nodes->{ $edge->{target} }->{size}++;
+ }
+ else {
+ $self->_create_node( $edge->{target}, 1 );
+ }
+ }
+ }
+}
+
+sub _fetch_edges_to {
+ my ($self, $name) = @_;
+ my $connections = $self->db_edges->find( { target => $name } );
+ while ( my $edge = $connections->next ) {
+ my $edge_id = $edge->{source} . $edge->{target};
+
+ if ( !defined $self->edges->{$edge_id} ) {
+ $self->edges->{$edge_id} = {
+ id => $edge_id,
+ targetId => $name,
+ sourceId => $edge->{source},
+ weight => $edge->{weight},
+ };
+
+ $self->nodes->{$name}->{size}++;
+
+ if ( defined $self->nodes->{ $edge->{source} } ) {
+ $self->nodes->{ $edge->{source} }->{size}++;
+ }
+ else {
+ $self->_create_node( $edge->{source}, 1 );
+ }
+ }
+ }
+}
+
+sub _create_node {
+ my ( $self, $login, $size ) = @_;
+
+ $size ||= 0;
+
+ my $info = $self->db_profiles->find_one( { login => $login } );
+ $self->nodes->{$login} = {
+ id => $login,
+ label => $login,
+ size => $size,
+ language => $self->map_languages($info->{language}),
+ country => $info->{country} || '',
+ indegree => $info->{indegree},
+ };
+}
+
+1;
diff --git a/lib/GitHub/Collector/Role/Graph/Nodes.pm b/lib/GitHub/Collector/Role/Graph/Nodes.pm
new file mode 100644
index 0000000..5109e9a
--- /dev/null
+++ b/lib/GitHub/Collector/Role/Graph/Nodes.pm
@@ -0,0 +1,13 @@
+package GitHub::Collector::Role::Graph::Nodes;
+
+use Moose::Role;
+
+has nodes => (
+ is => 'rw',
+ isa => 'HashRef',
+ lazy => 1,
+ default => sub { {} },
+ auto_deref => 1,
+);
+
+1;
diff --git a/lib/GitHub/Collector/Role/Graph/Query.pm b/lib/GitHub/Collector/Role/Graph/Query.pm
new file mode 100644
index 0000000..d3ab7f0
--- /dev/null
+++ b/lib/GitHub/Collector/Role/Graph/Query.pm
@@ -0,0 +1,45 @@
+package GitHub::Collector::Role::Graph::Query;
+
+use Moose::Role;
+
+has language => (
+ is => 'rw',
+ isa => 'Str',
+ predicate => 'has_language',
+);
+
+has location => (
+ is => 'rw',
+ isa => 'Str',
+ predicate => 'has_location',
+);
+
+has company => (
+ is => 'ro',
+ isa => 'Str',
+ predicate => 'has_company',
+);
+
+has country => (
+ is => 'ro',
+ isa => 'Str',
+ predicate => 'has_country',
+);
+
+
+sub build_query {
+ my $self = shift;
+
+ my $search = {};
+
+ foreach my $attr (qw/language location company country/) {
+ my $predicate = "has_$attr";
+ if ( $self->$predicate ) {
+ $search->{$attr} = $self->$attr;
+ }
+ }
+
+ return $search;
+}
+
+1;
diff --git a/lib/GitHub/Collector/Role/Graph/Search.pm b/lib/GitHub/Collector/Role/Graph/Search.pm
new file mode 100644
index 0000000..fbd0d07
--- /dev/null
+++ b/lib/GitHub/Collector/Role/Graph/Search.pm
@@ -0,0 +1,19 @@
+package GitHub::Collector::Role::Graph::Search;
+
+use Moose::Role;
+
+sub build_from_query {
+ my ($self, $search) = @_;
+
+ $search ||= $self->build_query();
+
+ my $profiles = $self->db_profiles->find($search);
+
+ while ( my $profile = $profiles->next ) {
+ $self->neighbors( $profile->{login}, 0 );
+ }
+
+ $self->remove_leaves();
+}
+
+1;
diff --git a/lib/GitHub/Collector/Role/Languages.pm b/lib/GitHub/Collector/Role/Languages.pm
new file mode 100644
index 0000000..a8c6583
--- /dev/null
+++ b/lib/GitHub/Collector/Role/Languages.pm
@@ -0,0 +1,25 @@
+package GitHub::Collector::Role::Languages;
+
+use Moose::Role;
+
+has mapping => (
+ is => 'ro',
+ isa => 'HashRef',
+);
+
+sub map_languages {
+ my ( $self, $language ) = @_;
+
+ return "Other" if !defined $language;
+
+ my $languages_map = $self->mapping->{languages};
+
+ if ( defined $languages_map->{$language} ) {
+ return $languages_map->{$language};
+ }
+ else {
+ return "Other";
+ }
+}
+
+1;
diff --git a/lib/GitHub/Collector/Role/Logger.pm b/lib/GitHub/Collector/Role/Logger.pm
new file mode 100644
index 0000000..0d16307
--- /dev/null
+++ b/lib/GitHub/Collector/Role/Logger.pm
@@ -0,0 +1,27 @@
+package GitHub::Collector::Role::Logger;
+
+use Moose::Role;
+use Log::Dispatchouli;
+
+has logger => (
+ is => 'rw',
+ isa => 'Log::Dispatchouli',
+ lazy => 1,
+ default => sub {
+ my $logger = Log::Dispatchouli->new(
+ {
+ ident => 'GitHub::Collector',
+ facility => 'user',
+ to_stdout => 1,
+ }
+ );
+ },
+ handles => {
+ log => 'log',
+ debug => 'log_debug',
+ fatal => 'log_fatal',
+ error => 'log_error',
+ },
+);
+
+1;
diff --git a/lib/GitHub/Collector/Role/MongoDB.pm b/lib/GitHub/Collector/Role/MongoDB.pm
new file mode 100644
index 0000000..e4cc5b1
--- /dev/null
+++ b/lib/GitHub/Collector/Role/MongoDB.pm
@@ -0,0 +1,41 @@
+package GitHub::Collector::Role::MongoDB;
+
+use Moose::Role;
+use MongoDB;
+
+has mongodb => (
+ is => 'ro',
+ isa => 'Object',
+ lazy => 1,
+ default => sub {
+ my $self = shift;
+ my $conn =
+ MongoDB::Connection->new( timeout => 60000, query_timeout => 60000 );
+ my $db = $conn->github;
+ $self->_create_indexes($db);
+ return $db;
+ },
+ handles => {
+ db_profiles => 'profiles',
+ db_repositories => 'repositories',
+ db_relations => 'relations',
+ db_contributors => 'contributors',
+ db_edges => 'edges',
+ }
+);
+
+sub _create_indexes {
+ my ( $self, $db ) = @_;
+
+ $db->profiles->ensure_index( { login => 1 }, { unique => 1 } );
+ $db->repositories->ensure_index( { uniq_name => 1 }, { unique => 1 } );
+ $db->contributors->ensure_index( { project => 1 } );
+ $db->contributors->ensure_index( { owner => 1 } );
+ $db->relations->ensure_index( { source => 1 } );
+ $db->relations->ensure_index( { target => 1 } );
+ $db->relations->ensure_index( { login => 1 } );
+ $db->edges->ensure_index({source => 1});
+ $db->edges->ensure_index({target => 1});
+}
+
+1;
diff --git a/lib/GitHub/Collector/Role/Pause.pm b/lib/GitHub/Collector/Role/Pause.pm
new file mode 100644
index 0000000..ba7a779
--- /dev/null
+++ b/lib/GitHub/Collector/Role/Pause.pm
@@ -0,0 +1,18 @@
+package GitHub::Collector::Role::Pause;
+
+use Moose::Role;
+
+has pause_on_error => (
+ is => 'ro',
+ isa => 'Int',
+ default => 10,
+);
+
+sub pause {
+ my $self = shift;
+ my $rand = int(rand(10));
+ my $pause = $rand == 1 ? $rand : 0;
+ sleep($pause);
+}
+
+1;
diff --git a/lib/GitHub/Collector/Role/Profile.pm b/lib/GitHub/Collector/Role/Profile.pm
new file mode 100644
index 0000000..01241d9
--- /dev/null
+++ b/lib/GitHub/Collector/Role/Profile.pm
@@ -0,0 +1,84 @@
+package GitHub::Collector::Role::Profile;
+
+use Try::Tiny;
+use Moose::Role;
+use boolean;
+
+with qw/
+ GitHub::Collector::Role::Pause
+ GitHub::Collector::Role::Relation
+ /;
+
+sub fetch_profile {
+ my ( $self, $profile ) = @_;
+
+ my ( $res, $error );
+
+ try {
+ $res = $self->spore_client->get_info(
+ format => 'json',
+ username => $profile,
+ );
+ }
+ catch {
+ $error = $_;
+ };
+
+ if ($error) {
+ if ($error->status == 403){
+ $self->debug( [ "need to pause (while working on %s)", $profile ] );
+
+ sleep( $self->pause_on_error );
+ return $self->fetch_profile($profile);
+ }elsif($error->status == 404){
+ $self->debug("profile $profile doesn't exists anymore");
+ $self->delete_profile($profile);
+ return;
+ }else{
+ $self->return("can't fetch information for $profile: $error");
+ return;
+ }
+ }
+ sleep($self->pause);
+ return $res->body;
+}
+
+sub save_profile {
+ my ( $self, $profile_info ) = @_;
+
+ my $id = delete $profile_info->{user}->{id};
+ my $time = time();
+
+ $self->db_profiles->update(
+ { login => $profile_info->{user}->{login} },
+ {
+ '$set' => {
+ crawled_at => $time,
+ repositories_done => false,
+ %{ $profile_info->{user} }
+ },
+ }
+ );
+
+ $self->log( "profile " . $profile_info->{user}->{login} . " saved" );
+}
+
+sub delete_profile {
+ my ($self, $profile) = @_;
+
+ $self->db_profiles->remove({login => $profile});
+ foreach my $type (qw/target source/){
+ $self->db_relations->remove({$type => $profile});
+ }
+ $self->log("all informations regarding $profile have been deleted");
+}
+
+sub profile_is_done {
+ my ( $self, $login ) = @_;
+ $self->db_profiles->update(
+ { login => $login },
+ { '$set' => { done => true } },
+ );
+}
+
+1;
diff --git a/lib/GitHub/Collector/Role/Relation.pm b/lib/GitHub/Collector/Role/Relation.pm
new file mode 100644
index 0000000..b89d0fc
--- /dev/null
+++ b/lib/GitHub/Collector/Role/Relation.pm
@@ -0,0 +1,70 @@
+package GitHub::Collector::Role::Relation;
+
+use Try::Tiny;
+use Moose::Role;
+
+with qw/GitHub::Collector::Role::Pause/;
+
+has types => (
+ is => 'ro',
+ isa => 'ArrayRef',
+ auto_deref => 1,
+ default => sub { [qw/followers following/] }
+);
+
+sub add_relations {
+ my ( $self, $login ) = @_;
+
+ foreach my $type ($self->types) {
+ my $users = $self->_grab_relations( $login, $type );
+ foreach my $user (@$users) {
+ $self->_bootstrap_profile($user);
+ if ($type eq 'followers'){
+ $self->_add_relation($user, $login);
+ }else{
+ $self->_add_relation($login, $user);
+ }
+ }
+ }
+}
+
+sub _grab_relations {
+ my ( $self, $login, $type ) = @_;
+
+ $self->log( [ "fetching %s informations for %s", $type, $login ] );
+
+ my $method = 'list_' . $type;
+ my ( $users, $error );
+ try {
+ $users = $self->spore_client->$method(
+ format => 'json',
+ user => $login,
+ )->body->{users};
+ }
+ catch {
+ $error = $_;
+ if ( $error->status == 403 ) {
+ $self->debug(
+ [ "need to pause (while grabbing relations for %s)", $login ] );
+ sleep($self->pause_on_error);
+ $self->_grab_relations( $login, $type );
+ }
+ else {
+ $self->debug(
+ [ "can't fetch %s relation for %s: %s", $type, $login, $error ]
+ );
+ }
+ };
+
+ sleep( $self->pause );
+ return $users;
+}
+
+sub _add_relation {
+ my ($self, $source, $target) = @_;
+ my $search = {source => $source, target => $target};
+ my $exists = $self->db_relations->find_one($search);
+ $self->db_relations->insert($search) if !$exists;
+}
+
+1;
diff --git a/lib/GitHub/Collector/Role/Repository.pm b/lib/GitHub/Collector/Role/Repository.pm
new file mode 100644
index 0000000..8a13693
--- /dev/null
+++ b/lib/GitHub/Collector/Role/Repository.pm
@@ -0,0 +1,173 @@
+package GitHub::Collector::Role::Repository;
+
+use Moose::Role;
+use Try::Tiny;
+use boolean;
+
+with qw/GitHub::Collector::Role::Pause/;
+
+sub fetch_repositories {
+ my ( $self, $profile ) = @_;
+
+ my ( $repositories, $languages, $error );
+
+ try {
+ $repositories = $self->spore_client->list_repos(
+ user => $profile->{login},
+ format => 'json',
+ )->body->{repositories};
+ }
+ catch {
+ $error = $_;
+ };
+
+ if ($error) {
+ if ( $error->status == 403 ) {
+ $self->debug(
+ [ "need to pause (while working on %s)", $profile->{login} ] );
+ sleep($self->pause_on_error);
+ return $self->fetch_repositories($profile);
+ }
+ else {
+ $self->debug("can't fetch repositories for ".$profile->{login}.": $error");
+ return;
+ }
+ }
+
+ foreach my $repo (@$repositories) {
+
+ next if $repo->{fork};
+ next unless $repo->{forks};
+
+ $self->_get_lang($profile, $repo, $languages);
+ $self->_get_contributors($profile, $repo);
+ $self->_save_repository($profile, $repo);
+ }
+
+ sleep ($self->pause);
+ $self->_update_profile($profile->{login}, $languages);
+ return 1;
+}
+
+sub _update_profile {
+ my ( $self, $login, $languages ) = @_;
+
+ my $lang = $self->_main_lang($languages);
+
+ $self->db_profiles->update(
+ { login => $login },
+ { '$set' => { repositories_done => true, language => $lang } },
+ );
+}
+
+sub _save_repository {
+ my ( $self, $profile, $repo ) = @_;
+
+ my $contributors = delete $repo->{contributors};
+
+ if ( scalar @$contributors > 1 ) {
+ my $project_name = $profile->{login} . '/' . $repo->{name};
+
+ $repo->{uniq_name} = $project_name;
+ $self->db_repositories->insert($repo);
+
+ $self->_save_contributors( $profile->{login}, $project_name,
+ $contributors );
+
+ $self->log(
+ [
+ 'Add repository %s owned by %s', $repo->{name},
+ $profile->{login}
+ ]
+ );
+ }
+}
+
+sub _save_contributors {
+ my ( $self, $owner, $project_name, $contributors ) = @_;
+
+ $self->log(
+ [ 'Add %s contributor(s) to %s', scalar @$contributors, $project_name ]
+ );
+
+ foreach my $contrib (@$contributors) {
+ next if $owner eq $contrib->{login};
+ $self->db_contributors->insert(
+ {
+ project => $project_name,
+ owner => $owner,
+ contributor => $contrib->{login},
+ contributions => $contrib->{contributions},
+ }
+ );
+ }
+}
+
+sub _get_lang {
+ my ( $self, $profile, $repo, $languages ) = @_;
+
+ my $pr_languages;
+ try {
+ $pr_languages = $self->spore_client->list_languages(
+ user => $profile->{login},
+ repo => $repo->{name},
+ format => 'json'
+ )->body->{languages};
+ }
+ catch {
+ my $error = $_;
+ if ( $error->status == 403 ) {
+ $self->debug(
+ [ "need to pause (while getting lang for %s)", $repo->{name} ]
+ );
+ sleep($self->pause_on_error);
+ $self->_get_lang( $profile, $repo, $languages );
+ }
+ };
+ foreach my $l ( keys %$pr_languages ) {
+ $languages->{$l} += $pr_languages->{$l};
+ }
+
+ my $lang = $self->_main_lang($pr_languages);
+ $repo->{lang} = $lang;
+}
+
+sub _main_lang {
+ my ( $self, $languages ) = @_;
+ my $lang = (
+ sort { $languages->{$b} <=> $languages->{$a} }
+ keys %$languages
+ )[0];
+ return $lang;
+}
+
+sub _get_contributors {
+ my ( $self, $profile, $repo ) = @_;
+
+ try {
+ my $contributors = $self->spore_client->list_contributors(
+ user => $profile->{login},
+ repo => $repo->{name},
+ format => 'json'
+ )->body->{contributors};
+ $repo->{contributors} = scalar @$contributors > 1 ? $contributors : [];
+ }
+ catch {
+ my $error = $_;
+ if ( $error->status == 403 ) {
+ $self->debug(
+ [
+ "need to pause (while getting contributors for %s)",
+ $repo->{name}
+ ]
+ );
+ sleep($self->pause_on_error);
+ $self->_get_contributors( $profile, $repo );
+ }
+ else {
+ $repo->{contributors} = [];
+ }
+ };
+}
+
+1;
diff --git a/lib/GitHub/Collector/Role/SPORE.pm b/lib/GitHub/Collector/Role/SPORE.pm
new file mode 100644
index 0000000..079c494
--- /dev/null
+++ b/lib/GitHub/Collector/Role/SPORE.pm
@@ -0,0 +1,27 @@
+package GitHub::Collector::Role::SPORE;
+
+use Moose::Role;
+use Net::HTTP::Spore;
+
+has spore_configuration => (
+ is => 'ro',
+ isa => 'HashRef',
+ required => 1,
+ documentation => 'SPORE configuration',
+);
+
+has spore_client => (
+ is => 'rw',
+ isa => 'Object',
+ lazy => 1,
+ default => sub {
+ my $self = shift;
+ my $client = Net::HTTP::Spore->new_from_spec(
+ $self->spore_configuration->{github}->{description},
+ );
+ $client->enable('Format::JSON');
+ $client;
+ }
+);
+
+1;