diff options
Diffstat (limited to 'lib/GitHub/Collector/Command')
| -rw-r--r-- | lib/GitHub/Collector/Command/country.pm | 70 | ||||
| -rw-r--r-- | lib/GitHub/Collector/Command/edges.pm | 67 | ||||
| -rw-r--r-- | lib/GitHub/Collector/Command/graph.pm | 47 | ||||
| -rw-r--r-- | lib/GitHub/Collector/Command/indegree.pm | 29 | ||||
| -rw-r--r-- | lib/GitHub/Collector/Command/lang.pm | 72 | ||||
| -rw-r--r-- | lib/GitHub/Collector/Command/profile.pm | 90 | ||||
| -rw-r--r-- | lib/GitHub/Collector/Command/repository.pm | 44 | ||||
| -rw-r--r-- | lib/GitHub/Collector/Command/stats.pm | 94 |
8 files changed, 513 insertions, 0 deletions
diff --git a/lib/GitHub/Collector/Command/country.pm b/lib/GitHub/Collector/Command/country.pm new file mode 100644 index 0000000..f7f5107 --- /dev/null +++ b/lib/GitHub/Collector/Command/country.pm @@ -0,0 +1,70 @@ +package GitHub::Collector::Command::country; + +use Moose; +use boolean; + +extends qw(MooseX::App::Cmd::Command); + +has geo_conf => ( + is => 'rw', + isa => 'HashRef', + required => 1, + documentation => 'SPORE configuration for Geo API', +); + +with + 'GitHub::Collector::Role::Logger', + 'GitHub::Collector::Role::Context', + 'GitHub::Collector::Role::MongoDB', + 'Net::HTTP::Spore::Role' => + { spore_clients => [ { name => 'geo', config => 'geo_conf' } ] }; + +sub execute { + my $self = shift; + + $self->log("start to tag user using country"); + + my $profiles = $self->db_profiles->find({country_done => false}); + + while ( my $profile = $profiles->next ) { + $self->_tag_profile_by_country($profile); + } + + $self->log("done tagging users"); +} + +sub _tag_profile_by_country{ + my ($self, $profile) = @_; + + if ( !defined $profile->{location} ) { + $self->_update_country($profile->{login}, false); + return; + } + + $self->log( "searching for " + . $profile->{login} + . " based in " + . $profile->{location} ); + + my $res = $self->geo->search( + q => $profile->{location}, + username => $self->geo_conf->{api_username}, + )->body; + + die "no more requests" if $res->{status} && $res->{status}->{value} == 19; + + if (my $country = $res->{geonames}->[0]->{countryName}){ + $self->_update_country($profile->{login}, $country); + }else{ + $self->_update_country($profile->{login}, false) + } +} + +sub _update_country { + my ( $self, $login, $country ) = @_; + + $self->db_profiles->update( { login => $login }, + { '$set' => { country => $country, country_done => true } } ); +} + +1; diff --git a/lib/GitHub/Collector/Command/edges.pm b/lib/GitHub/Collector/Command/edges.pm new file mode 100644 index 0000000..4ffe0c8 --- /dev/null +++ b/lib/GitHub/Collector/Command/edges.pm @@ -0,0 +1,67 @@ +package GitHub::Collector::Command::edges; + +use Moose; +use boolean; + +extends qw(MooseX::App::Cmd::Command); + +with qw( + GitHub::Collector::Role::Context + GitHub::Collector::Role::Logger + GitHub::Collector::Role::MongoDB +); + +sub execute { + my $self = shift; + + $self->log("start to merge contributions"); + + my $profiles = $self->db_profiles->find({edges_done => false}); + + while ( my $profile = $profiles->next ) { + next if $self->_is_done($profile->{login}); + $self->log("merge contributions for ".$profile->{login}); + $self->_contributions($profile->{login}); + } + + $self->log("done merging contributions"); +} + +sub _is_done { + my ($self, $login) = @_; + $self->db_edges->find({source => $login})->count; +} + +sub _contributions { + my ( $self, $login ) = @_; + + my $contributions = + $self->db_contributors->find( { contributor => $login } ); + + my $profiles = {}; + + while ( my $contrib = $contributions->next ) { + my $project = $self->db_repositories->find_one( + { uniq_name => $contrib->{project} } ); + + next if $project->{size} == 0; + my $total = + int( ( $contrib->{contributions} / $project->{size} ) * 100 ); + $total ||= 1; + $profiles->{ $contrib->{owner} } += $total; + } + + foreach my $pr ( keys %$profiles ) { + $self->db_edges->insert({ + source => $login, + target => $pr, + weight => $profiles->{$pr} + }); + } + $self->db_profiles->update( + { login => $login }, + { '$set' => { edges_done => true } }, + ); +} + +1; diff --git a/lib/GitHub/Collector/Command/graph.pm b/lib/GitHub/Collector/Command/graph.pm new file mode 100644 index 0000000..c7766a8 --- /dev/null +++ b/lib/GitHub/Collector/Command/graph.pm @@ -0,0 +1,47 @@ +package GitHub::Collector::Command::graph; + +use Moose; +use YAML::Syck; + +extends qw(MooseX::App::Cmd::Command); + +with qw( + GitHub::Collector::Role::Context + GitHub::Collector::Role::Logger + GitHub::Collector::Role::MongoDB + GitHub::Collector::Role::Graph::Query + GitHub::Collector::Role::Graph::Nodes + GitHub::Collector::Role::Graph::Edges + GitHub::Collector::Role::Graph::Neighbors + GitHub::Collector::Role::Graph::Search + GitHub::Collector::Role::Graph::Gexf +); + +has profile => ( + is => 'ro', + isa => 'Str', + predicate => 'has_profile', +); + +has indegree => ( + is => 'ro', + isa => 'Int', + predicate => 'has_indegree', +); + +sub execute { + my $self = shift; + + if ($self->has_profile){ + $self->neighbors($self->profile, 1); + $self->remove_leaves(); + }elsif($self->has_indegree){ + $self->build_from_query( { indegree => { '$gt' => $self->indegree } } ); + }else{ + $self->build_from_query(); + } + + $self->export() if $self->should_export; +} + +1; diff --git a/lib/GitHub/Collector/Command/indegree.pm b/lib/GitHub/Collector/Command/indegree.pm new file mode 100644 index 0000000..fe9bf78 --- /dev/null +++ b/lib/GitHub/Collector/Command/indegree.pm @@ -0,0 +1,29 @@ +package GitHub::Collector::Command::indegree; + +use Moose; + +extends qw(MooseX::App::Cmd::Command); + +with qw( + GitHub::Collector::Role::Context + GitHub::Collector::Role::Logger + GitHub::Collector::Role::MongoDB +); + +sub execute { + my $self = shift; + + my $edges = $self->db_edges->find(); + + my $profiles = {}; + while ( my $edge = $edges->next ) { + $profiles->{ $edge->{target} } += $edge->{weight}; + } + + foreach my $login ( keys %$profiles ) { + $self->db_profiles->update( { login => $login }, + { '$set' => { indegree => $profiles->{$login} } } ); + } +} + +1; diff --git a/lib/GitHub/Collector/Command/lang.pm b/lib/GitHub/Collector/Command/lang.pm new file mode 100644 index 0000000..8ab6c20 --- /dev/null +++ b/lib/GitHub/Collector/Command/lang.pm @@ -0,0 +1,72 @@ +package GitHub::Collector::Command::lang; + +use Moose; +use boolean; + +extends qw(MooseX::App::Cmd::Command); + +with qw( + GitHub::Collector::Role::Logger + GitHub::Collector::Role::Context + GitHub::Collector::Role::MongoDB +); + +sub execute { + my $self = shift; + + $self->log("start to tag user using langs"); + + my $profiles = $self->db_profiles->find({ language => undef } ); + + while (my $profile = $profiles->next){ + $self->_tag_profile_by_lang($profile); + } + + $self->log("done tagging users"); +} + +sub _tag_profile_by_lang { + my ( $self, $profile ) = @_; + + my $languages = {}; + + $self->_repos( $languages, $profile->{login} ); + $self->_contribs( $languages, $profile->{login} ); + + my $lang = ( + sort { $languages->{$b} <=> $languages->{$a} } + keys %$languages + )[0]; + + $lang = "none "if ( !$lang ); + + $self->log( "pour " . $profile->{login} . " on a " . $lang ); + $self->db_profiles->update( + { login => $profile->{login}, }, + { '$set' => { language => $lang } } + ); +} + +sub _repos { + my ( $self, $languages, $login ) = @_; + + my $repositories = $self->db_repositories->find( { owner => $login } ); + + while ( my $repo = $repositories->next ) { + $languages->{ $repo->{lang} }++ if $repo->{lang}; + } +} + +sub _contribs { + my ( $self, $languages, $login ) = @_; + + my $contribs = $self->db_contributors->find( { contributor => $login } ); + + while ( my $contrib = $contribs->next ) { + my $repo = $self->db_repositories->find_one( + { uniq_name => $contrib->{project} } ); + $languages->{ $repo->{lang} }++ if $repo->{lang}; + } +} + +1; diff --git a/lib/GitHub/Collector/Command/profile.pm b/lib/GitHub/Collector/Command/profile.pm new file mode 100644 index 0000000..872bf28 --- /dev/null +++ b/lib/GitHub/Collector/Command/profile.pm @@ -0,0 +1,90 @@ +package GitHub::Collector::Command::profile; + +use YAML; +use Try::Tiny; +use Moose; +use boolean; + +extends qw(MooseX::App::Cmd::Command); + +with qw( + GitHub::Collector::Role::Context + GitHub::Collector::Role::Logger + GitHub::Collector::Role::SPORE + GitHub::Collector::Role::Profile + GitHub::Collector::Role::MongoDB + GitHub::Collector::Role::Pause +); + +has seed => ( + isa => 'ArrayRef', + is => 'ro', + required => 1, + auto_deref => 1, + documentation => 'seed to crawl', + lazy => 1, + default => sub { + my $self = shift; + return $self->context->{seed}; + } +); + +sub execute { + my $self = shift; + + $self->log("start to crawl profiles"); + + foreach my $profile ($self->seed) { + $self->_bootstrap_profile($profile); + } + + $self->log("finish to boostrap the seed"); + $self->_crawl(0); + $self->log("crawl completed"); +} + +sub get_profile { + my ( $self, $profile ) = @_; + + my $login = $profile->{login}; + + my $profile_info = $self->fetch_profile($login); + + return unless $profile_info; + + $self->save_profile($profile_info); + $self->add_relations($login); + $self->profile_is_done($login); +} + +sub _crawl { + my $self = shift; + + my $profiles_to_crawl = $self->db_profiles->find({done => false}); + + while (my $profile = $profiles_to_crawl->next) { + $self->get_profile($profile); + } + + if ($self->db_profiles->find({done => false})->count > 0) { + $self->_crawl; + } +} + +sub _bootstrap_profile { + my ( $self, $profile ) = @_; + + my $has_profile = $self->db_profiles->find( { login => $profile } ); + return if $has_profile->count > 0; + $self->debug("insert $profile into profiles"); + my $res = $self->db_profiles->insert( + { login => $profile, done => false, repositories_done => false } ); +} + +1; + +=head1 NAME + +GitHub::Collector::Command::profile - foo + +=cut diff --git a/lib/GitHub/Collector/Command/repository.pm b/lib/GitHub/Collector/Command/repository.pm new file mode 100644 index 0000000..3e7fd57 --- /dev/null +++ b/lib/GitHub/Collector/Command/repository.pm @@ -0,0 +1,44 @@ +package GitHub::Collector::Command::repository; + +use Moose; +use boolean; + +extends qw(MooseX::App::Cmd::Command); + +with qw( + GitHub::Collector::Role::Context + GitHub::Collector::Role::Logger + GitHub::Collector::Role::SPORE + GitHub::Collector::Role::MongoDB + GitHub::Collector::Role::Repository +); + +sub execute { + my $self = shift; + + $self->log("start to crawl repositories"); + $self->_crawl(); + $self->log("crawl completed"); +} + +sub get_repositories { + my ($self, $profile) = @_; + + my $login = $profile->{login}; + + $self->log("fetch repositories for $login"); + $self->fetch_repositories($profile); + $self->log("finished to work on $login"); +} + +sub _crawl { + my $self = shift; + + my $profiles = $self->db_profiles->find( { repositories_done => false } ); + + while ( my $profile = $profiles->next ) { + $self->get_repositories($profile); + } +} + +1; diff --git a/lib/GitHub/Collector/Command/stats.pm b/lib/GitHub/Collector/Command/stats.pm new file mode 100644 index 0000000..fc71d10 --- /dev/null +++ b/lib/GitHub/Collector/Command/stats.pm @@ -0,0 +1,94 @@ +package GitHub::Collector::Command::stats; + +use 5.010; +use Moose; +use boolean; +use JSON; +use DateTime; + +extends qw(MooseX::App::Cmd::Command); + +with qw( + GitHub::Collector::Role::Context + GitHub::Collector::Role::Logger + GitHub::Collector::Role::MongoDB + GitHub::Collector::Role::Languages +); + +sub execute { + my ($self, ) = @_; + + my $profiles = $self->db_profiles->find(); + + my $languages = {}; + my $country = {}; + my $company = {}; + my $created = {}; + + while ( my $profile = $profiles->next ) { + my $date = $profile->{created_at}; + + next if !defined $date; + + my ($year, $month) = $date =~ /(\d{4})(?:-|\/)(\d{2})/; + next if (!defined $year || !defined $month); + + my $lang = $self->map_languages( $profile->{language} ); + $languages->{$lang}++ if $lang ne 'Other'; + + $country->{ $profile->{country} }++ if $profile->{country}; + $company->{ $profile->{company} }++ if defined $profile->{company}; + + $created->{global}->{ $year . '/' . $month }->{total}++; + $created->{languages}->{$lang}->{ $year . '/' . $month }->{total}++; + } + +# $self->_sort_and_display($languages); +# $self->_sort_and_display($country, 10); +# $self->_sort_and_display($company, 100); + + $self->_create_flot( $created->{global}, 'global' ); + foreach my $lang ( keys %{ $created->{languages} } ) { + $self->_create_flot( $created->{languages}->{$lang}, $lang ); + } +} + +sub _sort_and_display { + my ($self, $data, $iter) = @_; + + my @sorted = sort {$data->{$b} <=> $data->{$a}} keys %$data; + + my $total = 0; + map {$total += $data->{$_} } @sorted; + $iter ||= (scalar @sorted - 1); + + for(0..$iter){ + my $pct = int(($data->{$sorted[$_]} / $total) * 100); + say " # ".$sorted[$_].":".$data->{$sorted[$_]}. " ($pct%)"; + } +} + +sub _create_flot { + my ($self, $data, $label) = @_; + + my $graph = {}; + $graph->{label} = $label; + + my @sorted = sort {$a cmp $b} keys %$data; + + # remove the first and last value since they're not really worthy + shift @sorted; + pop @sorted; + + foreach my $month (@sorted) { + (my $y, my $m) = $month =~ /(\d{4})\/(\d{2})/; + my $epoch = DateTime->new(year => $y, month => $m, day => 01)->epoch * 1000; + push @{$graph->{data}}, [$epoch, $data->{$month}->{total}]; + } + + open my $fh, '>', $label.'.json'; + print $fh JSON::encode_json($graph); + close $fh; +} + +1; |
