summaryrefslogtreecommitdiff
path: root/lib/githubexplorer
diff options
context:
space:
mode:
authorfranck cuny <franck@lumberjaph.net>2010-02-12 16:41:02 +0100
committerfranck cuny <franck@lumberjaph.net>2010-02-12 16:41:02 +0100
commitd2551c9cc2e637835876fec5e9cb58f9e9f2061c (patch)
tree3f1ab54705e1a21d3978cd0da73ed8c12137074f /lib/githubexplorer
parentMerge branch 'master' of lj:github-explorer (diff)
downloadgithub-explorer-d2551c9cc2e637835876fec5e9cb58f9e9f2061c.tar.gz
wip
Diffstat (limited to '')
-rw-r--r--lib/githubexplorer.pm29
-rw-r--r--lib/githubexplorer/Gexf.pm362
-rw-r--r--lib/githubexplorer/Schema/Result/Profiles.pm2
3 files changed, 267 insertions, 126 deletions
diff --git a/lib/githubexplorer.pm b/lib/githubexplorer.pm
index 4260842..5744e08 100644
--- a/lib/githubexplorer.pm
+++ b/lib/githubexplorer.pm
@@ -25,8 +25,8 @@ has seed => (
return \@seeds;
}
);
-has api_login => ( isa => 'Str', is => 'ro', required => 1 );
-has api_token => ( isa => 'Str', is => 'ro', required => 1 );
+has api_login => ( isa => 'Str|Undef', is => 'ro', required => 1 );
+has api_token => ( isa => 'Str|Undef', is => 'ro', required => 1 );
has connect_info => ( isa => 'ArrayRef', is => 'ro', required => 1 );
has with_repo => ( isa => 'Bool', is => 'ro', default => sub {0} );
has schema => (
@@ -69,8 +69,7 @@ sub gen_graph {
my $self = shift;
$self->_connect unless $self->has_schema;
my $graph = githubexplorer::Gexf->new( schema => $self->schema );
- my $xml = $graph->gen_gexf;
- $xml > io('crawl.gexf');
+ $graph->gen_gexf;
}
sub graph_repo {
@@ -82,14 +81,32 @@ sub graph_repo {
}
}
-sub extract_seed {
+sub gen_seed {
my $self = shift;
$self->_connect unless $self->has_schema;
my $profiles = $self->schema->resultset('Profiles')
->search( { blog => { '!=' => undef }, blog => { '!=' => '' } } );
+
+ open my $fh, '>', 'seed.csv';
while ( my $pr = $profiles->next ) {
+ my %languages;
+ my $forks = $self->schema->resultset('Fork')->search({profile =>
+ $pr->id});
+ while (my $fork = $forks->next) {
+ my $languages =
+ $self->schema->resultset('RepoLang')->search({repository =>
+ $fork->repos->id});
+ while (my $lang = $languages->next) {
+ $languages{$lang->language->name}+=$lang->size;
+ }
+ }
+ my @sorted_lang = sort {$languages{$b} <=> $languages{$a}} keys %languages;
+ my $main_lang = shift @sorted_lang;
+ my $other_lang = join('|', @sorted_lang);
+ my $str = $profiles->blog.";;;github;".$main_lang.";".$other_lang.";".$profile->country."\n";
+ print $fh $str;
}
+ close $fh;
}
-
1;
diff --git a/lib/githubexplorer/Gexf.pm b/lib/githubexplorer/Gexf.pm
index f7e38cb..58281d4 100644
--- a/lib/githubexplorer/Gexf.pm
+++ b/lib/githubexplorer/Gexf.pm
@@ -9,113 +9,114 @@ has id_edges => (is => 'rw', isa => 'Num', traits => ['Counter'], default =>
0, handles => {inc_edges => 'inc'});
has graph => (
- is => 'rw',
- isa => 'HashRef',
- default => sub {
- my $graph = {
- gexf => {
- version => "1.1",
- meta => { creator => ['linkfluence'] },
- graph => {
- type => 'static',
- attributes => {
- class => 'node',
- type => 'static',
- attribute => [
- {
- id => 0,
- type => 'float',
- title => 'name'
- },
- {
- id => 1,
- type => 'string',
- title => 'type',
- },
- {
- id => 2,
- type => 'float',
- title => 'followers_count'
- },
- {
- id => 3,
- type => 'float',
- title => 'following_count'
- },
- {
- id => 4,
- type => 'float',
- title => 'forks',
- },
- {
- id => 5,
- type => 'string',
- title => 'location',
- },
- {
- id => 6,
- type => 'float',
- title => 'public_gist_count',
- },
- {
- id => 7,
- type => 'float',
- title => 'public_repo_count',
- },
- {
- id => 8,
- type => 'string',
- title => 'language',
- },
- {
- id => 9,
- type => 'string',
- title => 'description',
- },
- {
- id => 10,
- type => 'float',
- title => 'watchers',
- }
- ]
- }
+is => 'rw',
+isa => 'HashRef',
+default => sub {
+ my $graph = {
+ gexf => {
+ version => "1.1",
+ meta => { creator => ['linkfluence'] },
+ graph => {
+ type => 'static',
+ attributes => {
+ class => 'node',
+ type => 'static',
+ attribute => [
+ {
+ id => 0,
+ type => 'float',
+ title => 'name'
+ },
+ {
+ id => 1,
+ type => 'string',
+ title => 'type',
+ },
+ {
+ id => 2,
+ type => 'float',
+ title => 'followers_count'
+ },
+ {
+ id => 3,
+ type => 'float',
+ title => 'following_count'
+ },
+ {
+ id => 4,
+ type => 'float',
+ title => 'forks',
+ },
+ {
+ id => 5,
+ type => 'string',
+ title => 'location',
+ },
+ {
+ id => 6,
+ type => 'float',
+ title => 'public_gist_count',
+ },
+ {
+ id => 7,
+ type => 'float',
+ title => 'public_repo_count',
+ },
+ {
+ id => 8,
+ type => 'string',
+ title => 'language',
+ },
+ {
+ id => 9,
+ type => 'string',
+ title => 'description',
+ },
+ {
+ id => 10,
+ type => 'float',
+ title => 'watchers',
+ }
+ ]
}
}
- };
- }
+ }
+ };
+}
);
sub gen_gexf {
my $self = shift;
- $self->profiles;
- #$self->repositories;
- say "total nodes : ".scalar (@{ $self->graph->{gexf}->{graph}->{nodes}->{node} });
- say "total edges : ".scalar (@{ $self->graph->{gexf}->{graph}->{edges}->{edge} });
+
+ $self->basic_profiles;
+ my $basic_profiles = $self->dump_gexf;
+ $basic_profiles > io('basic_profiles.gexf');
+
+ $self->profiles_from_repositories;
+ my $profiles_from_repositories = $self->dump_gexf;
+ $profiles_from_repositories > io ('profiles_from_repositories.gexf');
+
+ $self->repositories_from_profiles;
+ my $repositories_from_profiles = $self->dump_gexf;
+ $profiles_from_repositories > io ('repositories_from_profiles.gexf');
+}
+
+sub dump_gefx {
+ my $self = shift;
my $xml_out = XMLout( $self->graph, AttrIndent => 1, keepRoot => 1 );
+ $self->graph->{gexf}->{graph}->{nodes} = undef;
+ $self->graph->{gexf}->{graph}->{edges} = undef;
return $xml_out;
}
-sub profiles {
+sub basic_profiles {
my $self = shift;
- say "start profiles ...";
+ $self->id_edges(0);
+ say "start basic_profiles ...";
my $profiles = $self->schema->resultset('Profiles')->search();
while ( my $profile = $profiles->next ) {
- my $node = {
- id => $profile->id,
- label => $profile->login,
- attvalues => {
- attvalue => [
- { for => 0, value => $profile->name},
- { for => 1, value => "profile"},
- { for => 2, value => $profile->followers_count},
- { for => 3, value => $profile->following_count},
- { for => 5, value => $profile->location},
- { for => 6, value => $profile->public_gist_count},
- { for => 7, value => $profile->public_repo_count},
- ]
- },
- };
+ my $node = $self->_get_node_for_profile($profile);
push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} }, $node;
}
@@ -129,26 +130,57 @@ sub profiles {
};
push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
}
- say " done";
+ say "basic_profiles done";
}
-sub repositories {
+sub profiles_from_repositories {
my $self = shift;
+ $self->id_edges(0);
+ say "start profiles_from_repositories ...";
- say "start repositories ...";
- my $repositories = $self->schema->resultset('Repositories')->search({fork => 0});
+ my ($nodes);
+ my $profiles = $self->schema->resultset('Profiles')->search();
+ while (my $profile = $profiles->next) {
+ my $node = $self->_get_node_for_profile($profile);
+ push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} }, $node;
+ }
+ my $repositories = $self->schema->resultset('Repositories')->search();
while (my $repos = $repositories->next) {
+ my $forks = $self->schema->resultset('Fork')->search({repos => $repos->id});
+ my @profiles;
+ while (my $fork = $forks->next) {
+ push @profiles, $fork->profile->id;
+ }
+ foreach my $p (@profiles) {
+ map {
+ next if $_ eq $p;
+ my $e = {
+ source => $p,
+ target => $_,
+ id => $self->inc_edges,
+ };
+ push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
+ } @profiles;
+ }
+ }
+ say "profiles_from_repositories done";
+}
+
+sub repositories_from_profiles {
+ my $self = shift;
+ $self->id_edges(0);
+ say "start repositories_from_profiles ...";
- next if $repos->name =~ /dotfiles/i;
- # available in forks ?
- my $check_fork = $self->schema->resultset('Fork')->search({repos => $repos->id});
- next if $check_fork->count < 1;
+ my ($nodes);
+ my $repositories = $self->schema->resultset('Repositories')->search();
+ while (my $repos = $repositories->next) {
+ next if $repos->name =~ /dotfiles/;
- if (!grep {$_->{id} eq "repos_".$repos->name} @{$self->graph->{gexf}->{graph}->{nodes}->{node}}) {
+ if (!exists $nodes->{$repos->name}) {
my $language = $self->schema->resultset('RepoLang')->search({repository => $repos->id}, {order_by => 'size'})->first;
my $lang = $language ? $language->language->name : 'none';
- my $node = {
- id => "repos_".$repos->name,
+ $nodes->{$repos->name} = {
+ id => $repos->name,
label => $repos->name,
attvalues => {
attvalue => [
@@ -161,28 +193,118 @@ sub repositories {
],
},
};
- push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} }, $node;
}
- my $e = {
- source => $repos->id_profile->id,
- target => "repos_".$repos->name,
- id => $self->inc_edges,
- };
- push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
+ my $forks = $self->schema->resultset('Fork')->search({repos => $repos->id});
+ while (my $fork = $forks->next) {
+ my $e = {
+ source => $fork->profile->id,
+ target => $fork->repos->name,
+ id => $self->inc_edges,
+ };
+ push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
+ }
}
+ map {push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} }, $nodes->{$_} keys %$nodes;
+ say "repositories_from_profiles done";
+}
+
+sub stats_languages_by_country {
+ my $self = shift;
+}
+
+sub _get_node_for_profile {
+ my ($self, $profile) = @_;
+ my ($languages, $ordered_languages) = $self->_get_languages_for_profile($profile);
+ my $main_lang = shift @$ordered_languages;
+ my $node = {
+ id => $profile->id,
+ label => $profile->login,
+ attvalues => {
+ attvalue => [
+ { for => 0, value => $profile->name},
+ { for => 1, value => "profile"},
+ { for => 2, value => $profile->followers_count},
+ { for => 3, value => $profile->following_count},
+ { for => 5, value => $profile->country},
+ { for => 6, value => $profile->public_gist_count},
+ { for => 7, value => $profile->public_repo_count},
+ { for => 8, value => $main_lang},
+ ]
+ },
+ };
+ return $node;
+}
- my $forks = $self->schema->resultset('Fork')->search();
+sub _get_languages_for_profile {
+ my ($self, $profile) = shift;
+ my $forks = $self->schema->resultset('Fork')->search({profile =>
+ $profile->id});
+
+ my %languages;
while (my $fork = $forks->next) {
- next if $fork->repos->name =~ /dotfiles/i;
- my $e = {
- source => $fork->profile->id,
- target => "repos_".$fork->repos->name,
- id => $self->inc_edges,
- };
- push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
+ my $languages =
+ $self->schema->resultset('RepoLang')->search({repository =>
+ $fork->repos->id});
+ while (my $lang = $languages->next) {
+ $languages{$lang->language->name}+=$lang->size;
+ }
}
- say " done";
+ my @sorted_lang = sort {$languages{$b} <=> $languages{$a}} keys %languages;
+ return (\%languages, \@sorted_lang);
}
+#sub repositories {
+# my $self = shift;
+#
+# say "start repositories ...";
+# my $repositories = $self->schema->resultset('Repositories')->search({fork => 0});
+# while (my $repos = $repositories->next) {
+#
+# next if $repos->name =~ /dotfiles/i;
+# # available in forks ?
+# my $check_fork = $self->schema->resultset('Fork')->search({repos => $repos->id});
+# next if $check_fork->count < 1;
+#
+# if (!grep {$_->{id} eq "repos_".$repos->name} @{$self->graph->{gexf}->{graph}->{nodes}->{node}}) {
+# my $language = $self->schema->resultset('RepoLang')->search({repository => $repos->id}, {order_by => 'size'})->first;
+# my $lang = $language ? $language->language->name : 'none';
+# my $node = {
+# id => "repos_".$repos->name,
+# label => $repos->name,
+# attvalues => {
+# attvalue => [
+# { for => 0, value => $repos->name},
+# { for => 1, value => "repository"},
+# { for => 4, value => $repos->forks},
+# { for => 9, value => $repos->description},
+# { for => 10, value => $repos->watchers},
+# { for => 8, value => $lang},
+# ],
+# },
+# };
+# push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} }, $node;
+# }
+# my $e = {
+# source => $repos->id_profile->id,
+# target => "repos_".$repos->name,
+# id => $self->inc_edges,
+# };
+# push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
+# }
+#
+# my $forks = $self->schema->resultset('Fork')->search();
+#
+# while (my $fork = $forks->next) {
+# next if $fork->repos->name =~ /dotfiles/i;
+# my $e = {
+# source => $fork->profile->id,
+# target => "repos_".$fork->repos->name,
+# id => $self->inc_edges,
+# };
+# push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
+# }
+# say " done";
+#}
+
1;
diff --git a/lib/githubexplorer/Schema/Result/Profiles.pm b/lib/githubexplorer/Schema/Result/Profiles.pm
index e0349d7..b43211e 100644
--- a/lib/githubexplorer/Schema/Result/Profiles.pm
+++ b/lib/githubexplorer/Schema/Result/Profiles.pm
@@ -15,6 +15,8 @@ __PACKAGE__->add_columns(
following_count => { data_type => 'int' },
gravatar_id => { data_type => 'varchar', is_nullable => 1 },
location => { data_type => 'varchar', is_nullable => 1 },
+ country => { data_type => 'varchar', is_nullable => 1 },
+ city => { data_type => 'varchar', is_nullable => 1 },
name => { data_type => 'varchar', is_nullable => 1 },
public_gist_count => { data_type => 'int' },
public_repo_count => { data_type => 'int' },