summaryrefslogtreecommitdiff
path: root/lib/githubexplorer
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--lib/githubexplorer.pm41
-rw-r--r--lib/githubexplorer/Gexf.pm155
2 files changed, 107 insertions, 89 deletions
diff --git a/lib/githubexplorer.pm b/lib/githubexplorer.pm
index 7b9c252..aef66ec 100644
--- a/lib/githubexplorer.pm
+++ b/lib/githubexplorer.pm
@@ -105,14 +105,47 @@ sub gen_seed {
my $main_lang = shift @sorted_lang;
my $other_lang = join( '|', @sorted_lang );
my $str
- = $profiles->blog
+ = $pr->blog
. ";;;github;"
- . $main_lang . ";"
- . $other_lang . ";"
- . $profile->country . "\n";
+ . ($main_lang || '') . ";"
+ . ($other_lang || '') . ";"
+ . ($pr->country || ''). "\n";
print $fh $str;
}
close $fh;
}
+sub stats_by_country {
+ my $self = shift;
+ $self->_connect unless $self->has_schema;
+ my $repositories = $self->schema->resultset('Repositories')->search();
+
+ my $countries;
+ while (my $repos = $repositories->next) {
+ next if !$repos->id_profile->country;
+ my $languages = $self->schema->resultset('RepoLang')
+ ->search( { repository => $repos->id } );
+ while ( my $lang = $languages->next ) {
+ $countries->{ $repos->id_profile->country }->{$lang->language->name} += $lang->size;
+ }
+ }
+ foreach my $country (keys %$countries) {
+ my $total = $self->schema->resultset('Profiles')->search({country => $country})->count;
+ $countries->{$country}->{total_dev} = $total;
+ my $total_bytes;
+ map {$total_bytes += $countries->{$country}->{$_}} keys %{$countries->{$country}};
+ foreach my $lang (keys %{$countries->{$country}}) {
+ $countries->{$country}->{"pct_".$lang} = ($countries->{$country}->{$lang} / $total_bytes) * 100;
+ }
+ }
+ my @sorted_countries = sort {$countries->{$b}->{total_dev} <=> $countries->{$a}->{total_dev}} keys %$countries;
+
+ my $final;
+ for ( 0 .. 19) {
+ push @$final, {$sorted_countries[$_] => $countries->{$sorted_countries[$_]} };
+ }
+ warn Dump $final;
+ DumpFile('countries.yaml', $final);
+}
+
1;
diff --git a/lib/githubexplorer/Gexf.pm b/lib/githubexplorer/Gexf.pm
index de0da49..98f3d38 100644
--- a/lib/githubexplorer/Gexf.pm
+++ b/lib/githubexplorer/Gexf.pm
@@ -2,6 +2,7 @@ package githubexplorer::Gexf;
use Moose;
use XML::Simple;
+use IO::All;
use 5.010;
has schema => ( is => 'ro', isa => 'Object', required => 1 );
@@ -29,7 +30,7 @@ has graph => (
attribute => [
{
id => 0,
- type => 'float',
+ type => 'string',
title => 'name'
},
{
@@ -93,20 +94,20 @@ has graph => (
sub gen_gexf {
my $self = shift;
- $self->basic_profiles;
- my $basic_profiles = $self->dump_gexf;
- $basic_profiles > io('basic_profiles.gexf');
+# $self->basic_profiles;
+# my $basic_profiles = $self->dump_gexf;
+# $basic_profiles > io('basic_profiles.gexf');
$self->profiles_from_repositories;
my $profiles_from_repositories = $self->dump_gexf;
$profiles_from_repositories > io('profiles_from_repositories.gexf');
- $self->repositories_from_profiles;
- my $repositories_from_profiles = $self->dump_gexf;
- $profiles_from_repositories > io('repositories_from_profiles.gexf');
+# $self->repositories_from_profiles;
+# my $repositories_from_profiles = $self->dump_gexf;
+# $repositories_from_profiles > io('repositories_from_profiles.gexf');
}
-sub dump_gefx {
+sub dump_gexf {
my $self = shift;
my $xml_out = XMLout( $self->graph, AttrIndent => 1, keepRoot => 1 );
$self->graph->{gexf}->{graph}->{nodes} = undef;
@@ -149,6 +150,7 @@ sub profiles_from_repositories {
my $node = $self->_get_node_for_profile($profile);
push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} }, $node;
}
+ my $edges;
my $repositories = $self->schema->resultset('Repositories')->search();
while ( my $repos = $repositories->next ) {
my $forks = $self->schema->resultset('Fork')
@@ -158,17 +160,31 @@ sub profiles_from_repositories {
push @profiles, $fork->profile->id;
}
foreach my $p (@profiles) {
- map {
- next if $_ eq $p;
- my $e = {
- source => $p,
- target => $_,
- id => $self->inc_edges,
- };
- push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
- } @profiles;
+ foreach my $t (@profiles) {
+ next if $t eq $p;
+ if (exists $edges->{$p}->{$t}) {
+ $edges->{$p}->{$t}->{weight}++;
+ }elsif(exists $edges->{$t}->{$p}) {
+ $edges->{$t}->{$p}->{weight}++;
+ }else{
+ $edges->{$p}->{$t}->{weight}++;
+ }
+ }
+ }
+ }
+ foreach my $e (keys %$edges) {
+ foreach my $t (keys %{$edges->{$e}}) {
+ next if $edges->{$e}->{$t}->{weight} < 4;
+ my $edge = {
+ id => $self->inc_edges,
+ source => $e,
+ target => $t,
+ weight => $edges->{$e}->{$t}->{weight},
+ };
+ push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $edge;
}
}
+ say "edges => ".scalar @{ $self->graph->{gexf}->{graph}->{edges}->{edge} };
say "profiles_from_repositories done";
}
@@ -203,26 +219,48 @@ sub repositories_from_profiles {
},
};
}
- my $forks = $self->schema->resultset('Fork')
- ->search( { repos => $repos->id } );
- while ( my $fork = $forks->next ) {
- my $e = {
- source => $fork->profile->id,
- target => $fork->repos->name,
- id => $self->inc_edges,
- };
- push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
- }
}
map {
push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} },
$nodes->{$_}
} keys %$nodes;
- say "repositories_from_profiles done";
-}
-sub stats_languages_by_country {
- my $self = shift;
+ my $edges;
+ my $profiles = $self->schema->resultset('Profiles');
+ while ( my $profile = $profiles->next ) {
+ my $forks = $self->schema->resultset('Fork')->search({profile =>
+ $profile->id});
+ my @repos;
+ while (my $fork = $forks->next) {
+ push @repos, $fork->repos->name;
+ }
+ foreach my $r (@repos) {
+ foreach my $t (@repos) {
+ next if $t eq $r;
+ if (exists $edges->{$r}->{$t}) {
+ $edges->{$r}->{$t}->{weight}++;
+ }elsif(exists $edges->{$t}->{$r}){
+ $edges->{$t}->{$r}->{weight}++;
+ }else{
+ $edges->{$r}->{$t}->{weight}++;
+ }
+ }
+ }
+ }
+ foreach my $e (keys %$edges) {
+ foreach my $t (keys %{$edges->{$e}}) {
+ next if $edges->{$e}->{$t}->{weight} < 10;
+ my $edge = {
+ id => $self->inc_edges,
+ source => $e,
+ target => $t,
+ weight => $edges->{$e}->{$t}->{weight},
+ };
+ push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $edge;
+ }
+ }
+ say "edges => ".scalar @{ $self->graph->{gexf}->{graph}->{edges}->{edge} };
+ say "repositories_from_profiles done";
}
sub _get_node_for_profile {
@@ -250,7 +288,7 @@ sub _get_node_for_profile {
}
sub _get_languages_for_profile {
- my ( $self, $profile ) = shift;
+ my ( $self, $profile ) = @_;
my $forks = $self->schema->resultset('Fork')
->search( { profile => $profile->id } );
@@ -268,57 +306,4 @@ sub _get_languages_for_profile {
return ( \%languages, \@sorted_lang );
}
-#sub repositories {
-# my $self = shift;
-#
-# say "start repositories ...";
-# my $repositories = $self->schema->resultset('Repositories')->search({fork => 0});
-# while (my $repos = $repositories->next) {
-#
-# next if $repos->name =~ /dotfiles/i;
-# # available in forks ?
-# my $check_fork = $self->schema->resultset('Fork')->search({repos => $repos->id});
-# next if $check_fork->count < 1;
-#
-# if (!grep {$_->{id} eq "repos_".$repos->name} @{$self->graph->{gexf}->{graph}->{nodes}->{node}}) {
-# my $language = $self->schema->resultset('RepoLang')->search({repository => $repos->id}, {order_by => 'size'})->first;
-# my $lang = $language ? $language->language->name : 'none';
-# my $node = {
-# id => "repos_".$repos->name,
-# label => $repos->name,
-# attvalues => {
-# attvalue => [
-# { for => 0, value => $repos->name},
-# { for => 1, value => "repository"},
-# { for => 4, value => $repos->forks},
-# { for => 9, value => $repos->description},
-# { for => 10, value => $repos->watchers},
-# { for => 8, value => $lang},
-# ],
-# },
-# };
-# push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} }, $node;
-# }
-# my $e = {
-# source => $repos->id_profile->id,
-# target => "repos_".$repos->name,
-# id => $self->inc_edges,
-# };
-# push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
-# }
-#
-# my $forks = $self->schema->resultset('Fork')->search();
-#
-# while (my $fork = $forks->next) {
-# next if $fork->repos->name =~ /dotfiles/i;
-# my $e = {
-# source => $fork->profile->id,
-# target => "repos_".$fork->repos->name,
-# id => $self->inc_edges,
-# };
-# push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
-# }
-# say " done";
-#}
-
1;