summaryrefslogtreecommitdiff
path: root/lib/githubexplorer
diff options
context:
space:
mode:
Diffstat (limited to 'lib/githubexplorer')
-rw-r--r--lib/githubexplorer/Gexf.pm254
-rw-r--r--lib/githubexplorer/Schema/Result/Profiles.pm1
-rw-r--r--lib/githubexplorer/Schema/Result/Repositories.pm1
3 files changed, 138 insertions, 118 deletions
diff --git a/lib/githubexplorer/Gexf.pm b/lib/githubexplorer/Gexf.pm
index 7e82f80..6832379 100644
--- a/lib/githubexplorer/Gexf.pm
+++ b/lib/githubexplorer/Gexf.pm
@@ -25,67 +25,85 @@ has graph => (
meta => { creator => ['linkfluence'] },
graph => {
type => 'static',
- attributes => {
- class => 'node',
- type => 'static',
- attribute => [
- {
- id => 0,
- type => 'string',
- title => 'name'
- },
- {
- id => 1,
- type => 'string',
- title => 'type',
- },
- {
- id => 2,
- type => 'float',
- title => 'followers_count'
- },
- {
- id => 3,
- type => 'float',
- title => 'following_count'
- },
- {
- id => 4,
- type => 'float',
- title => 'forks',
- },
- {
- id => 5,
- type => 'string',
- title => 'location',
- },
- {
- id => 6,
- type => 'float',
- title => 'public_gist_count',
- },
- {
- id => 7,
- type => 'float',
- title => 'public_repo_count',
- },
- {
- id => 8,
- type => 'string',
- title => 'language',
- },
- {
- id => 9,
- type => 'string',
- title => 'description',
- },
- {
- id => 10,
- type => 'float',
- title => 'watchers',
- }
- ]
- }
+ attributes => [
+ {
+ class => 'edge',
+ type => 'static',
+ attribute => [
+ {
+ id => 0,
+ type => 'string',
+ title => 'language'
+ },
+ {
+ id => 0,
+ type => 'float',
+ title => 'collaborate'
+ },
+ ]
+ },
+ {
+ class => 'node',
+ type => 'static',
+ attribute => [
+ {
+ id => 0,
+ type => 'string',
+ title => 'name'
+ },
+ {
+ id => 1,
+ type => 'string',
+ title => 'type',
+ },
+ {
+ id => 2,
+ type => 'float',
+ title => 'followers_count'
+ },
+ {
+ id => 3,
+ type => 'float',
+ title => 'following_count'
+ },
+ {
+ id => 4,
+ type => 'float',
+ title => 'forks',
+ },
+ {
+ id => 5,
+ type => 'string',
+ title => 'location',
+ },
+ {
+ id => 6,
+ type => 'float',
+ title => 'public_gist_count',
+ },
+ {
+ id => 7,
+ type => 'float',
+ title => 'public_repo_count',
+ },
+ {
+ id => 8,
+ type => 'string',
+ title => 'language',
+ },
+ {
+ id => 9,
+ type => 'string',
+ title => 'description',
+ },
+ {
+ id => 10,
+ type => 'float',
+ title => 'watchers',
+ }
+ ]
+ },
+ ]
}
}
};
@@ -95,26 +113,26 @@ has graph => (
sub gen_gexf {
my $self = shift;
- $self->_average_by_langage();
-# $self->basic_profiles;
-# my $basic_profiles = $self->dump_gexf;
-# $basic_profiles > io('basic_profiles.gexf');
+ #$self->_average_by_langage();
+ $self->basic_profiles;
+ my $basic_profiles = $self->dump_gexf;
+ $basic_profiles > io('basic_profiles.gexf');
- $self->profiles_from_repositories;
- my $profiles_from_repositories = $self->dump_gexf;
- $profiles_from_repositories > io('profiles_from_repositories.gexf');
+ #$self->profiles_from_repositories;
+ #my $profiles_from_repositories = $self->dump_gexf;
+ #$profiles_from_repositories > io('profiles_from_repositories.gexf');
-# $self->repositories_from_profiles;
-# my $repositories_from_profiles = $self->dump_gexf;
-# $repositories_from_profiles > io('repositories_from_profiles.gexf');
+ #$self->repositories_from_profiles;
+ #my $repositories_from_profiles = $self->dump_gexf;
+ #$repositories_from_profiles > io('repositories_from_profiles.gexf');
}
sub dump_gexf {
my $self = shift;
my $xml_out = XMLout( $self->graph, AttrIndent => 1, keepRoot => 1 );
- say "total edges => ".scalar @{$self->graph->{gexf}->{graph}->{nodes}->{node}};
- say "total nodes => ".scalar @{$self->graph->{gexf}->{graph}->{edges}->{edge}};
+ say "total nodes => ".scalar @{$self->graph->{gexf}->{graph}->{nodes}->{node}};
+ say "total edges => ".scalar @{$self->graph->{gexf}->{graph}->{edges}->{edge}};
$self->graph->{gexf}->{graph}->{nodes} = undef;
$self->graph->{gexf}->{graph}->{edges} = undef;
return $xml_out;
@@ -124,7 +142,8 @@ sub basic_profiles {
my $self = shift;
$self->id_edges(0);
say "start basic_profiles ...";
- my $profiles = $self->schema->resultset('Profiles')->search();
+ my $profiles =
+ $self->schema->resultset('Profiles')->search();
while ( my $profile = $profiles->next ) {
my $node = $self->_get_node_for_profile($profile);
@@ -134,10 +153,23 @@ sub basic_profiles {
my $edges = $self->schema->resultset('Follow')->search();
my $id = 0;
while ( my $edge = $edges->next ) {
+ my $collaborate = 1;
+# my $forks_source = $self->schema->resultset('Fork')->search({profile =>
+# $edge->origin->id});
+# while (my $fork = $forks_source->next) {
+# my $contrib = $self->schema->resultset('Fork')->search({repos =>
+# $fork->repos->id});
+# while (my $c = $contrib->next) {
+# $collaborate++ if ($c->profile->id == $edge->dest->id);
+# }
+# }
my $e = {
source => $edge->origin->id,
target => $edge->dest->id,
id => $self->inc_edges,
+ weight => $collaborate,
+ collaborate => $collaborate,
+ language => $edge->origin->main_language,
};
push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
}
@@ -160,13 +192,9 @@ sub profiles_from_repositories {
while ( my $repos = $repositories->next ) {
my $forks = $self->schema->resultset('Fork')
->search( { repos => $repos->id } );
- my $language;
- my $lang_rs = $self->schema->resultset('RepoLang')->search({repository => $repos->id}, {order_by => 'size'})->first;
- if ($lang_rs) {
- $language = $lang_rs->language->name;
- }
- if ($language && exists $self->avg_contrib_by_lang->{$language}->{avg} && $forks <= $self->avg_contrib_by_lang->{$language}->{avg}){
- warn ">>>>> on skip pour ".$repos->name."\n";
+ if ($repos->main_language && exists
+ $self->avg_contrib_by_lang->{$repos->main_language}->{avg} &&
+ $forks < $self->avg_contrib_by_lang->{$repos->main_language}->{avg}){
next;
}
my @profiles;
@@ -188,6 +216,7 @@ sub profiles_from_repositories {
}
foreach my $e (keys %$edges) {
foreach my $t (keys %{$edges->{$e}}) {
+ next unless $edges->{$e}->{$t}->{weight} > 5;
my $edge = {
id => $self->inc_edges,
source => $e,
@@ -211,11 +240,6 @@ sub repositories_from_profiles {
next if $repos->name =~ /dotfiles/;
if ( !exists $nodes->{ $repos->name } ) {
- my $language
- = $self->schema->resultset('RepoLang')
- ->search( { repository => $repos->id },
- { order_by => 'size' } )->first;
- my $lang = $language ? $language->language->name : 'none';
$nodes->{ $repos->name } = {
id => $repos->name,
label => $repos->name,
@@ -226,7 +250,7 @@ sub repositories_from_profiles {
{ for => 4, value => $repos->forks },
{ for => 9, value => $repos->description },
{ for => 10, value => $repos->watchers },
- { for => 8, value => $lang },
+ { for => 8, value => $repos->main_language },
],
},
};
@@ -261,7 +285,7 @@ sub repositories_from_profiles {
}
foreach my $e (keys %$edges) {
foreach my $t (keys %{$edges->{$e}}) {
- next if $edges->{$e}->{$t}->{weight} < 10;
+ next if $edges->{$e}->{$t}->{weight} < 5;
my $edge = {
id => $self->inc_edges,
source => $e,
@@ -271,15 +295,11 @@ sub repositories_from_profiles {
push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $edge;
}
}
- say "edges => ".scalar @{ $self->graph->{gexf}->{graph}->{edges}->{edge} };
say "repositories_from_profiles done";
}
sub _get_node_for_profile {
my ( $self, $profile ) = @_;
- my ( $languages, $ordered_languages )
- = $self->_get_languages_for_profile($profile);
- my $main_lang = shift @$ordered_languages;
my $node = {
id => $profile->id,
label => $profile->login,
@@ -292,45 +312,43 @@ sub _get_node_for_profile {
{ for => 5, value => $profile->country },
{ for => 6, value => $profile->public_gist_count },
{ for => 7, value => $profile->public_repo_count },
- { for => 8, value => $main_lang },
+ { for => 8, value => $profile->main_language },
]
},
};
return $node;
}
-sub _get_languages_for_profile {
- my ( $self, $profile ) = @_;
-
- my $forks = $self->schema->resultset('Fork')
- ->search( { profile => $profile->id } );
-
- my %languages;
- while ( my $fork = $forks->next ) {
- my $languages = $self->schema->resultset('RepoLang')
- ->search( { repository => $fork->repos->id } );
- while ( my $lang = $languages->next ) {
- $languages{ $lang->language->name } += $lang->size;
- }
- }
- my @sorted_lang
- = sort { $languages{$b} <=> $languages{$a} } keys %languages;
- return ( \%languages, \@sorted_lang );
-}
+#sub _get_languages_for_profile {
+# my ( $self, $profile ) = @_;
+#
+# my $forks = $self->schema->resultset('Fork')
+# ->search( { profile => $profile->id } );
+#
+# my %languages;
+# while ( my $fork = $forks->next ) {
+# my $languages = $self->schema->resultset('RepoLang')
+# ->search( { repository => $fork->repos->id } );
+# while ( my $lang = $languages->next ) {
+# $languages{ $lang->language->name } += $lang->size;
+# }
+# }
+# my @sorted_lang
+# = sort { $languages{$b} <=> $languages{$a} } keys %languages;
+# return ( \%languages, \@sorted_lang );
+#}
sub _average_by_langage {
my $self = shift;
my $hash_lang;
my $repositories = $self->schema->resultset('Repositories')->search();
+ say "gather stats ...";
while ( my $repos = $repositories->next ) {
- my $lang = $self->schema->resultset('RepoLang')->search(
- { repository => $repos->id }, { order_by => 'size' }
- )->first;
- next unless $lang;
- $hash_lang->{ $lang->language->name }->{repositories}++;
+ next unless $repos->main_language;
+ $hash_lang->{ $repos->main_language }->{repositories}++;
my $forks = $self->schema->resultset('Fork')->search( { repos => $repos->id } )->count;
- $hash_lang->{ $lang->language->name }->{contributors} += $forks;
- $hash_lang->{$lang->language->name}->{avg} = int ($hash_lang->{$lang->language->name}->{contributors} / $hash_lang->{$lang->language->name}->{repositories});
+ $hash_lang->{ $repos->main_language }->{contributors} += $forks;
+ $hash_lang->{$repos->main_language}->{avg} = int ($hash_lang->{$repos->main_language}->{contributors} / $hash_lang->{$repos->main_language}->{repositories});
};
$self->avg_contrib_by_lang($hash_lang);
}
diff --git a/lib/githubexplorer/Schema/Result/Profiles.pm b/lib/githubexplorer/Schema/Result/Profiles.pm
index b43211e..df8f147 100644
--- a/lib/githubexplorer/Schema/Result/Profiles.pm
+++ b/lib/githubexplorer/Schema/Result/Profiles.pm
@@ -11,6 +11,7 @@ __PACKAGE__->add_columns(
company => { data_type => 'varchar', is_nullable => 1 },
created_at => { data_type => 'timestamp' },
email => { data_type => 'varchar', is_nullable => 1 },
+ main_language => { data_type => 'varchar', is_nullable => 1 },
followers_count => { data_type => 'int' },
following_count => { data_type => 'int' },
gravatar_id => { data_type => 'varchar', is_nullable => 1 },
diff --git a/lib/githubexplorer/Schema/Result/Repositories.pm b/lib/githubexplorer/Schema/Result/Repositories.pm
index 58c0e51..ab82fef 100644
--- a/lib/githubexplorer/Schema/Result/Repositories.pm
+++ b/lib/githubexplorer/Schema/Result/Repositories.pm
@@ -10,6 +10,7 @@ __PACKAGE__->add_columns(
name => { data_type => 'varchar' },
homepage => { data_type => 'varchar', is_nullable => 1 },
url => { data_type => 'varchar', is_nullable => 1 },
+ main_language => { data_type => 'varchar', is_nullable => 1 },
watchers => { data_type => 'int' },
forks => { data_type => 'int' },
fork => { data_type => 'bool' },