summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorfranck cuny <franck@lumberjaph.net>2010-02-12 12:00:27 +0100
committerfranck cuny <franck@lumberjaph.net>2010-02-12 12:00:27 +0100
commit1f68a82c35b01fd5a8efbf3894d74c020acaf7ed (patch)
tree1d76cfd9866ecb81da0d1ba086cd867e50359959
parentwip (diff)
parentwip (diff)
downloadgithub-explorer-1f68a82c35b01fd5a8efbf3894d74c020acaf7ed.tar.gz
Merge branch 'master' of lj:github-explorer
* 'master' of lj:github-explorer: wip prepare repos small fix
-rw-r--r--.gitignore4
-rw-r--r--extract-seed.pl6
-rw-r--r--lib/githubexplorer.pm2
-rw-r--r--lib/githubexplorer/Gexf.pm137
-rw-r--r--lib/githubexplorer/Network.pm9
-rw-r--r--lib/githubexplorer/Repository.pm9
6 files changed, 146 insertions, 21 deletions
diff --git a/.gitignore b/.gitignore
index 4358bbd..19f1f1e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,6 @@
*.sqlite
*.conf
*.sql
-*.yml \ No newline at end of file
+*.yml
+*.gexf
+*.csv \ No newline at end of file
diff --git a/extract-seed.pl b/extract-seed.pl
new file mode 100644
index 0000000..293c270
--- /dev/null
+++ b/extract-seed.pl
@@ -0,0 +1,6 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use githubexplorer::Schema;
+
+my $schema = githubexplorer::Schema->connect(); \ No newline at end of file
diff --git a/lib/githubexplorer.pm b/lib/githubexplorer.pm
index 9e5e134..4260842 100644
--- a/lib/githubexplorer.pm
+++ b/lib/githubexplorer.pm
@@ -69,7 +69,7 @@ sub gen_graph {
my $self = shift;
$self->_connect unless $self->has_schema;
my $graph = githubexplorer::Gexf->new( schema => $self->schema );
- my $xml = $graph->profiles;
+ my $xml = $graph->gen_gexf;
$xml > io('crawl.gexf');
}
diff --git a/lib/githubexplorer/Gexf.pm b/lib/githubexplorer/Gexf.pm
index 503eebe..f7e38cb 100644
--- a/lib/githubexplorer/Gexf.pm
+++ b/lib/githubexplorer/Gexf.pm
@@ -2,8 +2,11 @@ package githubexplorer::Gexf;
use Moose;
use XML::Simple;
+use 5.010;
has schema => (is => 'ro', isa => 'Object', required => 1);
+has id_edges => (is => 'rw', isa => 'Num', traits => ['Counter'], default =>
+0, handles => {inc_edges => 'inc'});
has graph => (
is => 'rw',
@@ -11,8 +14,8 @@ has graph => (
default => sub {
my $graph = {
gexf => {
- version => "1.0",
- meta => { creator => ['rtgi'] },
+ version => "1.1",
+ meta => { creator => ['linkfluence'] },
graph => {
type => 'static',
attributes => {
@@ -21,19 +24,59 @@ has graph => (
attribute => [
{
id => 0,
- type => 'string',
+ type => 'float',
title => 'name'
},
{
- id => 1,
- type => 'string',
- title => 'followers_count'
+ id => 1,
+ type => 'string',
+ title => 'type',
},
{
id => 2,
- type => 'string',
+ type => 'float',
+ title => 'followers_count'
+ },
+ {
+ id => 3,
+ type => 'float',
title => 'following_count'
},
+ {
+ id => 4,
+ type => 'float',
+ title => 'forks',
+ },
+ {
+ id => 5,
+ type => 'string',
+ title => 'location',
+ },
+ {
+ id => 6,
+ type => 'float',
+ title => 'public_gist_count',
+ },
+ {
+ id => 7,
+ type => 'float',
+ title => 'public_repo_count',
+ },
+ {
+ id => 8,
+ type => 'string',
+ title => 'language',
+ },
+ {
+ id => 9,
+ type => 'string',
+ title => 'description',
+ },
+ {
+ id => 10,
+ type => 'float',
+ title => 'watchers',
+ }
]
}
}
@@ -42,8 +85,19 @@ has graph => (
}
);
+sub gen_gexf {
+ my $self = shift;
+ $self->profiles;
+ #$self->repositories;
+ say "total nodes : ".scalar (@{ $self->graph->{gexf}->{graph}->{nodes}->{node} });
+ say "total edges : ".scalar (@{ $self->graph->{gexf}->{graph}->{edges}->{edge} });
+ my $xml_out = XMLout( $self->graph, AttrIndent => 1, keepRoot => 1 );
+ return $xml_out;
+}
+
sub profiles {
my $self = shift;
+ say "start profiles ...";
my $profiles = $self->schema->resultset('Profiles')->search();
while ( my $profile = $profiles->next ) {
@@ -52,9 +106,13 @@ sub profiles {
label => $profile->login,
attvalues => {
attvalue => [
- {name => $profile->name},
- {followers_count => $profile->followers_count},
- {following_count => $profile->following_count},
+ { for => 0, value => $profile->name},
+ { for => 1, value => "profile"},
+ { for => 2, value => $profile->followers_count},
+ { for => 3, value => $profile->following_count},
+ { for => 5, value => $profile->location},
+ { for => 6, value => $profile->public_gist_count},
+ { for => 7, value => $profile->public_repo_count},
]
},
};
@@ -65,17 +123,66 @@ sub profiles {
my $id = 0;
while ( my $edge = $edges->next ) {
my $e = {
- cardinal => 1,
source => $edge->origin->id,
target => $edge->dest->id,
- type => 'dir',
- id => $id++,
+ id => $self->inc_edges,
};
push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
}
+ say " done";
+}
- my $xml_out = XMLout( $self->graph, AttrIndent => 1, keepRoot => 1 );
- return $xml_out;
+sub repositories {
+ my $self = shift;
+
+ say "start repositories ...";
+ my $repositories = $self->schema->resultset('Repositories')->search({fork => 0});
+ while (my $repos = $repositories->next) {
+
+ next if $repos->name =~ /dotfiles/i;
+ # available in forks ?
+ my $check_fork = $self->schema->resultset('Fork')->search({repos => $repos->id});
+ next if $check_fork->count < 1;
+
+ if (!grep {$_->{id} eq "repos_".$repos->name} @{$self->graph->{gexf}->{graph}->{nodes}->{node}}) {
+ my $language = $self->schema->resultset('RepoLang')->search({repository => $repos->id}, {order_by => 'size'})->first;
+ my $lang = $language ? $language->language->name : 'none';
+ my $node = {
+ id => "repos_".$repos->name,
+ label => $repos->name,
+ attvalues => {
+ attvalue => [
+ { for => 0, value => $repos->name},
+ { for => 1, value => "repository"},
+ { for => 4, value => $repos->forks},
+ { for => 9, value => $repos->description},
+ { for => 10, value => $repos->watchers},
+ { for => 8, value => $lang},
+ ],
+ },
+ };
+ push @{ $self->graph->{gexf}->{graph}->{nodes}->{node} }, $node;
+ }
+ my $e = {
+ source => $repos->id_profile->id,
+ target => "repos_".$repos->name,
+ id => $self->inc_edges,
+ };
+ push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
+ }
+
+ my $forks = $self->schema->resultset('Fork')->search();
+
+ while (my $fork = $forks->next) {
+ next if $fork->repos->name =~ /dotfiles/i;
+ my $e = {
+ source => $fork->profile->id,
+ target => "repos_".$fork->repos->name,
+ id => $self->inc_edges,
+ };
+ push @{ $self->graph->{gexf}->{graph}->{edges}->{edge} }, $e;
+ }
+ say " done";
}
1;
diff --git a/lib/githubexplorer/Network.pm b/lib/githubexplorer/Network.pm
index eb6253f..dde08a4 100644
--- a/lib/githubexplorer/Network.pm
+++ b/lib/githubexplorer/Network.pm
@@ -7,6 +7,11 @@ use YAML::Syck;
sub fetch_network {
my ( $self, $repos ) = @_;
+ # check fork
+ my $check = $self->schema->resultset('Fork')->search({repos=>
+ $repos->id});
+ return if $check->count > 0;
+
say ">> start on ".$repos->name;
my $api_repos = Net::GitHub::V2::Repositories->new(
owner => $repos->id_profile->login,
@@ -16,6 +21,10 @@ sub fetch_network {
);
my $edges = $api_repos->network();
+ if (ref $edges ne 'ARRAY') {
+ sleep 60;
+ return;
+ }
sleep(1);
foreach my $edge (@$edges) {
next if $edge->{owner} eq $repos->id_profile->login;
diff --git a/lib/githubexplorer/Repository.pm b/lib/githubexplorer/Repository.pm
index 035450a..617e091 100644
--- a/lib/githubexplorer/Repository.pm
+++ b/lib/githubexplorer/Repository.pm
@@ -2,6 +2,7 @@ package githubexplorer::Repository;
use 5.010;
use Moose::Role;
use Net::GitHub::V2::Repositories;
+use Try::Tiny;
sub fetch_repositories {
my ( $self, $profile ) = @_;
@@ -14,9 +15,9 @@ sub fetch_repositories {
my $repo_list = $github_profile->list();
- while ( ref $repo_list ne 'ARRAYREF' ) {
+ if ( ref $repo_list ne 'ARRAY' ) {
sleep(60);
- $repo_list = $github_profile->list();
+ return;
}
foreach my $repos (@$repo_list) {
@@ -40,9 +41,9 @@ sub fetch_repositories {
token => $self->api_token,
);
my $langs = $api_repos->languages;
- while ( ref $langs ne 'HASHREF' ) {
+ if ( ref $langs ne 'HASH' ) {
sleep(60);
- $langs = $api_repos->languages;
+ next;
}
foreach my $lang ( keys %$langs ) {