summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--clean-country.pl21
-rw-r--r--crawl.pl2
-rw-r--r--lib/githubexplorer.pm40
-rw-r--r--lib/githubexplorer/Gexf.pm270
4 files changed, 179 insertions, 154 deletions
diff --git a/clean-country.pl b/clean-country.pl
index 7a99e99..460b29f 100644
--- a/clean-country.pl
+++ b/clean-country.pl
@@ -9,27 +9,32 @@ use YAML::Syck;
my $conf = LoadFile(shift);
-my $schema = githubexplorer::Schema->connect(@{$conf->{connect_info}});
+my $schema = githubexplorer::Schema->connect( @{ $conf->{connect_info} } );
-my $profiles = $schema->resultset('Profiles')->search({id => {'>' => 61498}, location => {'!=' =>
- undef}, location => {'!=' => ''}});
+my $profiles = $schema->resultset('Profiles')->search(
+ {
+ id => { '>' => 55781 },
+ location => { '!=' => undef },
+ location => { '!=' => '' }
+ }
+);
my $geo = Geo::GeoNames->new();
-my $i = 0;
-while (my $pr = $profiles->next) {
+while ( my $pr = $profiles->next ) {
next if $pr->location =~ /^http/;
next if $pr->country;
next if $pr->location =~ /earth/i;
- say "-> process ".$pr->login." with ".$pr->location;
+ say "-> process " . $pr->login . " with " . $pr->location;
my $result = $geo->search( q => $pr->location, maxRows => 1 );
my $res = shift @$result;
if ($res) {
eval {
- $pr->update({city => $res->{name}, country => $res->{countryName}});
+ $pr->update(
+ { city => $res->{name}, country => $res->{countryName} } );
};
next if $@;
- say "** fix with ".$pr->city . " in ".$pr->country;
+ say "** fix with " . $pr->city . " in " . $pr->country;
}
if (++$i == 10) {
sleep(2);
diff --git a/crawl.pl b/crawl.pl
index d844893..300cfd5 100644
--- a/crawl.pl
+++ b/crawl.pl
@@ -12,7 +12,7 @@ GetOptions(
'repo' => \my $repo,
'graph' => \my $graph,
'network' => \my $network,
- 'seed' => \my $seed,
+ 'seed' => \my $seed,
'conf=s' => \my $conf,
);
diff --git a/lib/githubexplorer.pm b/lib/githubexplorer.pm
index 5744e08..7b9c252 100644
--- a/lib/githubexplorer.pm
+++ b/lib/githubexplorer.pm
@@ -7,7 +7,7 @@ use githubexplorer::Gexf;
use IO::All;
with qw/githubexplorer::Profile githubexplorer::Repository
-githubexplorer::Network/;
+ githubexplorer::Network/;
has seed => (
isa => 'ArrayRef',
@@ -25,10 +25,10 @@ has seed => (
return \@seeds;
}
);
-has api_login => ( isa => 'Str|Undef', is => 'ro', required => 1 );
-has api_token => ( isa => 'Str|Undef', is => 'ro', required => 1 );
-has connect_info => ( isa => 'ArrayRef', is => 'ro', required => 1 );
-has with_repo => ( isa => 'Bool', is => 'ro', default => sub {0} );
+has api_login => ( isa => 'Str|Undef', is => 'ro', required => 1 );
+has api_token => ( isa => 'Str|Undef', is => 'ro', required => 1 );
+has connect_info => ( isa => 'ArrayRef', is => 'ro', required => 1 );
+has with_repo => ( isa => 'Bool', is => 'ro', default => sub {0} );
has schema => (
isa => 'githubexplorer::Schema',
is => 'rw',
@@ -75,7 +75,8 @@ sub gen_graph {
sub graph_repo {
my $self = shift;
$self->_connect unless $self->has_schema;
- my $repos = $self->schema->resultset('Repositories')->search({fork => 0});
+ my $repos
+ = $self->schema->resultset('Repositories')->search( { fork => 0 } );
while ( my $r = $repos->next ) {
$self->fetch_network($r);
}
@@ -90,20 +91,25 @@ sub gen_seed {
open my $fh, '>', 'seed.csv';
while ( my $pr = $profiles->next ) {
my %languages;
- my $forks = $self->schema->resultset('Fork')->search({profile =>
- $pr->id});
- while (my $fork = $forks->next) {
- my $languages =
- $self->schema->resultset('RepoLang')->search({repository =>
- $fork->repos->id});
- while (my $lang = $languages->next) {
- $languages{$lang->language->name}+=$lang->size;
+ my $forks = $self->schema->resultset('Fork')
+ ->search( { profile => $pr->id } );
+ while ( my $fork = $forks->next ) {
+ my $languages = $self->schema->resultset('RepoLang')
+ ->search( { repository => $fork->repos->id } );
+ while ( my $lang = $languages->next ) {
+ $languages{ $lang->language->name } += $lang->size;
}
}
- my @sorted_lang = sort {$languages{$b} <=> $languages{$a}} keys %languages;
+ my @sorted_lang
+ = sort { $languages{$b} <=> $languages{$a} } keys %languages;
my $main_lang = shift @sorted_lang;
- my $other_lang = join('|', @sorted_lang);
- my $str = $profiles->blog.";;;github;".$main_lang.";".$other_lang.";".$profile->country."\n";
+ my $other_lang = join( '|', @sorted_lang );
+ my $str
+ = $profiles->blog
+ . ";;;github;"
+ . $main_lang . ";"
+ . $other_lang . ";"
+ . $profile->country . "\n";
print $fh $str;
}
close $fh;
diff --git a/lib/githubexplorer/Gexf.pm b/lib/githubexplorer/Gexf.pm
index 58281d4..de0da49 100644
--- a/lib/githubexplorer/Gexf.pm
+++ b/lib/githubexplorer/Gexf.pm
@@ -4,85 +4,90 @@ use Moose;
use XML::Simple;
use 5.010;
-has schema => (is => 'ro', isa => 'Object', required => 1);
-has id_edges => (is => 'rw', isa => 'Num', traits => ['Counter'], default =>
-0, handles => {inc_edges => 'inc'});
+has schema => ( is => 'ro', isa => 'Object', required => 1 );
+has id_edges => (
+ is => 'rw',
+ isa => 'Num',
+ traits => ['Counter'],
+ default => 0,
+ handles => { inc_edges => 'inc' }
+);
has graph => (
-is => 'rw',
-isa => 'HashRef',
-default => sub {
- my $graph = {
- gexf => {
- version => "1.1",
- meta => { creator => ['linkfluence'] },
- graph => {
- type => 'static',
- attributes => {
- class => 'node',
- type => 'static',
- attribute => [
- {
- id => 0,
- type => 'float',
- title => 'name'
- },
- {
- id => 1,
- type => 'string',
- title => 'type',
- },
- {
- id => 2,
- type => 'float',
- title => 'followers_count'
- },
- {
- id => 3,
- type => 'float',
- title => 'following_count'
- },
- {
- id => 4,
- type => 'float',
- title => 'forks',
- },
- {
- id => 5,
- type => 'string',
- title => 'location',
- },
- {
- id => 6,
- type => 'float',
- title => 'public_gist_count',
- },
- {
- id => 7,
- type => 'float',
- title => 'public_repo_count',
- },
- {
- id => 8,
- type => 'string',
- title => 'language',
- },
- {
- id => 9,
- type => 'string',
- title => 'description',
- },
- {
- id => 10,
- type => 'float',
- title => 'watchers',
- }
- ]
+ is => 'rw',
+ isa => 'HashRef',
+ default => sub {
+ my $graph = {
+ gexf => {
+ version => "1.1",
+ meta => { creator => ['linkfluence'] },
+ graph => {
+ type => 'static',
+ attributes => {
+ class => 'node',
+ type => 'static',
+ attribute => [
+ {
+ id => 0,
+ type => 'float',
+ title => 'name'
+ },
+ {
+ id => 1,
+ type => 'string',
+ title => 'type',
+ },
+ {
+ id => 2,
+ type => 'float',
+ title => 'followers_count'
+ },
+ {
+ id => 3,
+ type => 'float',
+ title => 'following_count'
+ },
+ {
+ id => 4,
+ type => 'float',
+ title => 'forks',
+ },
+ {
+ id => 5,
+ type => 'string',
+ title => 'location',
+ },
+ {
+ id => 6,
+ type => 'float',
+ title => 'public_gist_count',
+ },
+ {
+ id => 7,
+ type => 'float',
+ title => 'public_repo_count',
+ },
+ {
+ id => 8,
+ type => 'string',
+ title => 'language',
+ },
+ {
+ id => 9,
+ type => 'string',
+ title => 'description',
+ },
+ {
+ id => 10,
+ type => 'float',
+ title => 'watchers',
+ }
+ ]
+ }
}
}
- }
- };
-}
+ };
+ }
);
sub gen_gexf {
@@ -94,11 +99,11 @@ sub gen_gexf {
$self->profiles_from_repositories;
my $profiles_from_repositories = $self->dump_gexf;
- $profiles_from_repositories > io ('profiles_from_repositories.gexf');
+ $profiles_from_repositories > io('profiles_from_repositories.gexf');
$self->repositories_from_profiles;
my $repositories_from_profiles = $self->dump_gexf;
- $profiles_from_repositories > io ('repositories_from_profiles.gexf');
+ $profiles_from_repositories > io('repositories_from_profiles.gexf');
}
sub dump_gefx {
@@ -110,7 +115,7 @@ sub dump_gefx {
}
sub basic_profiles {
- my $self = shift;
+ my $self = shift;
$self->id_edges(0);
say "start basic_profiles ...";
my $profiles = $self->schema->resultset('Profiles')->search();
@@ -124,9 +129,9 @@ sub basic_profiles {
my $id = 0;
while ( my $edge = $edges->next ) {
my $e = {
- source => $edge->origin->id,
- target => $edge->dest->id,
- id => $self->inc_edges,
+ source => $edge->origin->id,
+ target => $edge->dest->id,
+ id => $self->inc_edges,
};
push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
}
@@ -140,15 +145,16 @@ sub profiles_from_repositories {
my ($nodes);
my $profiles = $self->schema->resultset('Profiles')->search();
- while (my $profile = $profiles->next) {
+ while ( my $profile = $profiles->next ) {
my $node = $self->_get_node_for_profile($profile);
push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} }, $node;
}
my $repositories = $self->schema->resultset('Repositories')->search();
- while (my $repos = $repositories->next) {
- my $forks = $self->schema->resultset('Fork')->search({repos => $repos->id});
+ while ( my $repos = $repositories->next ) {
+ my $forks = $self->schema->resultset('Fork')
+ ->search( { repos => $repos->id } );
my @profiles;
- while (my $fork = $forks->next) {
+ while ( my $fork = $forks->next ) {
push @profiles, $fork->profile->id;
}
foreach my $p (@profiles) {
@@ -157,7 +163,7 @@ sub profiles_from_repositories {
my $e = {
source => $p,
target => $_,
- id => $self->inc_edges,
+ id => $self->inc_edges,
};
push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
} @profiles;
@@ -173,38 +179,45 @@ sub repositories_from_profiles {
my ($nodes);
my $repositories = $self->schema->resultset('Repositories')->search();
- while (my $repos = $repositories->next) {
+ while ( my $repos = $repositories->next ) {
next if $repos->name =~ /dotfiles/;
- if (!exists $nodes->{$repos->name}) {
- my $language = $self->schema->resultset('RepoLang')->search({repository => $repos->id}, {order_by => 'size'})->first;
+ if ( !exists $nodes->{ $repos->name } ) {
+ my $language
+ = $self->schema->resultset('RepoLang')
+ ->search( { repository => $repos->id },
+ { order_by => 'size' } )->first;
my $lang = $language ? $language->language->name : 'none';
- $nodes->{$repos->name} = {
- id => $repos->name,
- label => $repos->name,
+ $nodes->{ $repos->name } = {
+ id => $repos->name,
+ label => $repos->name,
attvalues => {
attvalue => [
- { for => 0, value => $repos->name},
- { for => 1, value => "repository"},
- { for => 4, value => $repos->forks},
- { for => 9, value => $repos->description},
- { for => 10, value => $repos->watchers},
- { for => 8, value => $lang},
+ { for => 0, value => $repos->name },
+ { for => 1, value => "repository" },
+ { for => 4, value => $repos->forks },
+ { for => 9, value => $repos->description },
+ { for => 10, value => $repos->watchers },
+ { for => 8, value => $lang },
],
},
};
}
- my $forks = $self->schema->resultset('Fork')->search({repos => $repos->id});
- while (my $fork = $forks->next) {
+ my $forks = $self->schema->resultset('Fork')
+ ->search( { repos => $repos->id } );
+ while ( my $fork = $forks->next ) {
my $e = {
- source => $fork->profile->id,
- target => $fork->repos->name,
- id => $self->inc_edges,
+ source => $fork->profile->id,
+ target => $fork->repos->name,
+ id => $self->inc_edges,
};
push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
}
}
- map {push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} }, $nodes->{$_} keys %$nodes;
+ map {
+ push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} },
+ $nodes->{$_}
+ } keys %$nodes;
say "repositories_from_profiles done";
}
@@ -213,22 +226,23 @@ sub stats_languages_by_country {
}
sub _get_node_for_profile {
- my ($self, $profile) = @_;
- my ($languages, $ordered_languages) = $self->_get_languages_for_profile($profile);
+ my ( $self, $profile ) = @_;
+ my ( $languages, $ordered_languages )
+ = $self->_get_languages_for_profile($profile);
my $main_lang = shift @$ordered_languages;
- my $node = {
- id => $profile->id,
- label => $profile->login,
+ my $node = {
+ id => $profile->id,
+ label => $profile->login,
attvalues => {
attvalue => [
- { for => 0, value => $profile->name},
- { for => 1, value => "profile"},
- { for => 2, value => $profile->followers_count},
- { for => 3, value => $profile->following_count},
- { for => 5, value => $profile->country},
- { for => 6, value => $profile->public_gist_count},
- { for => 7, value => $profile->public_repo_count},
- { for => 8, value => $main_lang},
+ { for => 0, value => $profile->name },
+ { for => 1, value => "profile" },
+ { for => 2, value => $profile->followers_count },
+ { for => 3, value => $profile->following_count },
+ { for => 5, value => $profile->country },
+ { for => 6, value => $profile->public_gist_count },
+ { for => 7, value => $profile->public_repo_count },
+ { for => 8, value => $main_lang },
]
},
};
@@ -236,22 +250,22 @@ sub _get_node_for_profile {
}
sub _get_languages_for_profile {
- my ($self, $profile) = shift;
+ my ( $self, $profile ) = shift;
- my $forks = $self->schema->resultset('Fork')->search({profile =>
- $profile->id});
+ my $forks = $self->schema->resultset('Fork')
+ ->search( { profile => $profile->id } );
my %languages;
- while (my $fork = $forks->next) {
- my $languages =
- $self->schema->resultset('RepoLang')->search({repository =>
- $fork->repos->id});
- while (my $lang = $languages->next) {
- $languages{$lang->language->name}+=$lang->size;
+ while ( my $fork = $forks->next ) {
+ my $languages = $self->schema->resultset('RepoLang')
+ ->search( { repository => $fork->repos->id } );
+ while ( my $lang = $languages->next ) {
+ $languages{ $lang->language->name } += $lang->size;
}
}
- my @sorted_lang = sort {$languages{$b} <=> $languages{$a}} keys %languages;
- return (\%languages, \@sorted_lang);
+ my @sorted_lang
+ = sort { $languages{$b} <=> $languages{$a} } keys %languages;
+ return ( \%languages, \@sorted_lang );
}
#sub repositories {