summaryrefslogtreecommitdiff
path: root/lib/githubexplorer.pm
blob: 7b9c252136b70e7001fb70d03d62984606b38636 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
package githubexplorer;
use 5.010;
use YAML::Syck;
use Moose;
use githubexplorer::Schema;
use githubexplorer::Gexf;
use IO::All;

with qw/githubexplorer::Profile githubexplorer::Repository
    githubexplorer::Network/;

has seed => (
    isa      => 'ArrayRef',
    is       => 'rw',
    required => 1,
    lazy     => 1,
    default  => sub {
        my $self     = shift;
        my $profiles = $self->schema->resultset('Profiles')
            ->search( { done => { '!=', 1 } }, { order_by => 'login desc' } );
        my @seeds;
        while ( my $p = $profiles->next ) {
            push @seeds, $p->login;
        }
        return \@seeds;
    }
);
has api_login    => ( isa => 'Str|Undef', is => 'ro', required => 1 );
has api_token    => ( isa => 'Str|Undef', is => 'ro', required => 1 );
has connect_info => ( isa => 'ArrayRef',  is => 'ro', required => 1 );
has with_repo    => ( isa => 'Bool',      is => 'ro', default  => sub {0} );
has schema => (
    isa       => 'githubexplorer::Schema',
    is        => 'rw',
    predicate => 'has_schema'
);

sub deploy {
    my ($self) = @_;
    $self->_connect() unless $self->has_schema;
    $self->schema->deploy;
}

sub _connect {
    my $self = shift;
    $self->schema(
        githubexplorer::Schema->connect( @{ $self->connect_info } ) );
}

sub harvest_profiles {
    my ( $self, $depth ) = @_;
    $self->_connect() unless $self->has_schema;
    $depth //= 1;
    foreach my $login ( @{ $self->seed } ) {
        $self->fetch_profile( $login, $depth );
    }
}

sub harvest_repo {
    my $self = shift;
    $self->_connect unless $self->has_schema;
    my $profiles = $self->schema->resultset('Profiles')->search();
    while ( my $p = $profiles->next ) {
        $self->fetch_repositories($p);
    }
}

sub gen_graph {
    my $self = shift;
    $self->_connect unless $self->has_schema;
    my $graph = githubexplorer::Gexf->new( schema => $self->schema );
    $graph->gen_gexf;
}

sub graph_repo {
    my $self = shift;
    $self->_connect unless $self->has_schema;
    my $repos
        = $self->schema->resultset('Repositories')->search( { fork => 0 } );
    while ( my $r = $repos->next ) {
        $self->fetch_network($r);
    }
}

sub gen_seed {
    my $self = shift;
    $self->_connect unless $self->has_schema;
    my $profiles = $self->schema->resultset('Profiles')
        ->search( { blog => { '!=' => undef }, blog => { '!=' => '' } } );

    open my $fh, '>', 'seed.csv';
    while ( my $pr = $profiles->next ) {
        my %languages;
        my $forks = $self->schema->resultset('Fork')
            ->search( { profile => $pr->id } );
        while ( my $fork = $forks->next ) {
            my $languages = $self->schema->resultset('RepoLang')
                ->search( { repository => $fork->repos->id } );
            while ( my $lang = $languages->next ) {
                $languages{ $lang->language->name } += $lang->size;
            }
        }
        my @sorted_lang
            = sort { $languages{$b} <=> $languages{$a} } keys %languages;
        my $main_lang = shift @sorted_lang;
        my $other_lang = join( '|', @sorted_lang );
        my $str
            = $profiles->blog
            . ";;;github;"
            . $main_lang . ";"
            . $other_lang . ";"
            . $profile->country . "\n";
        print $fh $str;
    }
    close $fh;
}

1;