diff --git a/ExportGitHubUsageStatsForOrganization.ps1 b/ExportGitHubUsageStatsForOrganization.ps1 deleted file mode 100644 index 75dc256..0000000 --- a/ExportGitHubUsageStatsForOrganization.ps1 +++ /dev/null @@ -1,158 +0,0 @@ -# Export Git Hub Usage Stats For Organization -# This PowerShell script can be used to export daily GitHub repository statistics, -# for all repositories under an organization. -# -# Author: Gabriel Mongefranco (@gabrielmongefranco) -# See README for other contributors, if any. -# Created: 3/15/24 -# License: See attached license file -# Website: https://github.com/DepressionCenter | https://depressioncenter.org - - -# Set the working directory and inputs -$organizationName = 'ENTER_ORGANIZATION_NAME_HERE_WITHOUT_SPACES' -$username = 'ENTER_GITHUB_USERNAME_HERE' -# To use interactive login, leave the apiToken string blank and uncomment where indicated in the authentication section -$apiToken = ConvertTo-SecureString 'ENTER_GITHUB_API_TOKEN_HERE' -AsPlainText -Force -$jsonOutputPath = 'c:\GitHubStats\efdc-github-stats.json' -$csvOutputPath = 'c:\GitHubStats\efdc-github-stats.csv' -$csvRollingOutputPath = 'c:\GitHubStats\efdc-github-stats-rolling.csv' - - -# Ensure you have PowerShellForGitHub module installed -Import-Module PowerShellForGitHub - - - -# Begin -Clear-Host -Write-Host -f Yellow " === Export GitHub Usage Stats For Organization Script === " - -# Authentication -# To authenticate interactively, comment this section, and use this instead: Set-GitHubAuthentication $username -Write-Host "Authenticating to GitHub API as $username..." -$githubCredential = New-Object System.Management.Automation.PSCredential $username, $apiToken -Set-GitHubAuthentication -Credential $githubCredential -SessionOnly -$apiToken = '' - - -# Set some GitHub parameters -Set-GitHubConfiguration -DisableTelemetry - -# Get all repositories under the given organization -Write-Host "Getting repos..." -$repoCount = [int]0 -try -{ - $repos = Get-GitHubRepository -OrganizationName $organizationName - $repoCount = [int]$repos.Count -} catch { - Write-Host -f Red "Error while getting repository information. Ensure the organization and credentials are correct." -} - -if ($repoCount -eq 0) -{ - Write-Host -f Red "No repositories found." - Start-Sleep -Seconds 3 - Exit -} else { - Write-Host "Found $repoCount repo(s)." -} - -# Add custom properties to the repository variable -$repos | Add-Member -Force -MemberType NoteProperty -Name contributions_count -Value @() -$repos | Add-Member -Force -MemberType NoteProperty -Name contributors -Value @() -$repos | Add-Member -Force -MemberType NoteProperty -Name contributors_count -Value 0 -$repos | Add-Member -Force -MemberType NoteProperty -Name referrer_traffic -Value @() -$repos | Add-Member -Force -MemberType NoteProperty -Name referrer_traffic_count -Value 0 -$repos | Add-Member -Force -MemberType NoteProperty -Name referrer_traffic_uniques -Value 0 -$repos | Add-Member -Force -MemberType NoteProperty -Name path_traffic -Value @() -$repos | Add-Member -Force -MemberType NoteProperty -Name path_traffic_count -Value 0 -$repos | Add-Member -Force -MemberType NoteProperty -Name path_traffic_uniques -Value 0 -$repos | Add-Member -Force -MemberType NoteProperty -Name view_traffic -Value @() -$repos | Add-Member -Force -MemberType NoteProperty -Name view_traffic_count -Value 0 -$repos | Add-Member -Force -MemberType NoteProperty -Name view_traffic_uniques -Value 0 -$repos | Add-Member -Force -MemberType NoteProperty -Name clone_traffic -Value @() -$repos | Add-Member -Force -MemberType NoteProperty -Name clone_traffic_count -Value 0 -$repos | Add-Member -Force -MemberType NoteProperty -Name clone_traffic_uniques -Value 0 -Write-Host "Getting usage stats..." - -# Contributions -$repos | ForEach-Object {$_.contributions_count = $_.stats.contributions } - -# Contributors -Write-Host "Getting contributors..." -$repos | ForEach-Object {$_.contributors = Get-GitHubRepositoryContributor -Uri $_.url } -$repos | ForEach-Object {$_.contributors_count = ($_.contributors | Select-Object -ExpandProperty login -Unique).Count } - -# Referrer Traffic -Write-Host "Getting referrer traffic..." -$repos | ForEach-Object {$_.referrer_traffic = Get-GitHubReferrerTraffic -Uri $_.url } -$repos | ForEach-Object {$_.referrer_traffic_count = (Get-GitHubReferrerTraffic -Uri $_.url | Measure-Object -Sum count).Sum } -$repos | ForEach-Object {$_.referrer_traffic_uniques = (Get-GitHubReferrerTraffic -Uri $_.url | Measure-Object -Sum uniques).Sum } - -# Path Traffic -Write-Host "Getting path traffic..." -$repos | ForEach-Object {$_.path_traffic = Get-GitHubPathTraffic -Uri $_.url } -$repos | ForEach-Object {$_.path_traffic_count = (Get-GitHubPathTraffic -Uri $_.url | Measure-Object -Sum count).Sum } -$repos | ForEach-Object {$_.path_traffic_uniques = (Get-GitHubPathTraffic -Uri $_.url | Measure-Object -Sum uniques).Sum } - -# View Traffic -Write-Host "Getting view traffic..." -$repos | ForEach-Object {$_.view_traffic = Get-GitHubViewTraffic -Uri $_.url } -$repos | ForEach-Object {$_.view_traffic_count = (Get-GitHubViewTraffic -Uri $_.url | Measure-Object -Sum count).Sum } -$repos | ForEach-Object {$_.view_traffic_uniques = (Get-GitHubViewTraffic -Uri $_.url | Measure-Object -Sum uniques).Sum } - -# Clone Traffic -Write-Host "Getting clone traffic..." -$repos | ForEach-Object {$_.clone_traffic = Get-GitHubCloneTraffic -Uri $_.url } -$repos | ForEach-Object {$_.clone_traffic_count = (Get-GitHubCloneTraffic -Uri $_.url | Measure-Object -Sum count).Sum } -$repos | ForEach-Object {$_.clone_traffic_uniques = (Get-GitHubCloneTraffic -Uri $_.url | Measure-Object -Sum uniques).Sum } - - -# Extract pertinent extracts and convert to array of objects for easy export -Write-Host "Processing usage stats..." -$DateCaptured = (Get-Date) -$usageStats = @() -foreach ($repo in $repos) -{ - $usageStats += [PSCustomObject]@{ - DateCaptured = $DateCaptured - name = $repo.name - full_name = $repo.full_name - owner = $repo.owner.login - description = $repo.description - url = $repo.RepositoryUrl - created = $repo.created_at - updated = $repo.updated_at - pushed = $repo.pushed_at - size = $repo.size - visibility = $repo.visibility - stargazers_count = [int]$repo.stargazers_count - watchers_count = [int]$repo.watchers_count - forks_count = [int]$repo.forks_count - open_issues_count = [int]$repo.open_issues_count - contributors_count = [int]$repo.contributors_count - referrer_traffic_count = [int]$repo.referrer_traffic_count - referrer_traffic_uniques = [int]$repo.referrer_traffic_uniques - path_traffic_count = [int]$repo.path_traffic_count - path_traffic_uniques = [int]$repo.path_traffic_uniques - view_traffic_count = [int]$repo.view_traffic_count - view_traffic_uniques = [int]$repo.view_traffic_uniques - clone_traffic_count = [int]$repo.clone_traffic_count - clone_traffic_uniques = [int]$repo.clone_traffic_uniques - } -} - - -# Export results to JSON -Write-Host "Saving results in JSON format at: $jsonOutputPath" -$usageStats | ConvertTo-Json | Out-File -FilePath $jsonOutputPath - -# Export only pertinent stats to CSV -Write-Host "Saving results in CSV format at: $csvOutputPath" -$usageStats | Export-Csv -Path $csvOutputPath -NoTypeInformation -Write-Host "Saving cummulative results in CSV format at: $csvRollingOutputPath" -$usageStats | Export-Csv -Path $csvRollingOutputPath -NoTypeInformation -Append - -Write-Host "Done." diff --git a/PowerShell-Scripts/ExportGitHubUsageStatsForOrganization.ps1 b/PowerShell-Scripts/ExportGitHubUsageStatsForOrganization.ps1 new file mode 100644 index 0000000..7d08539 --- /dev/null +++ b/PowerShell-Scripts/ExportGitHubUsageStatsForOrganization.ps1 @@ -0,0 +1,416 @@ +# Export Git Hub Usage Stats For Organization +# This PowerShell script can be used to export daily GitHub repository statistics, +# for all repositories under an organization. +# +# Author: Gabriel Mongefranco (@gabrielmongefranco) +# See README for other contributors, if any. +# Created: 3/15/24 +# License: See attached license file +# Website: https://github.com/DepressionCenter | https://depressioncenter.org +# +# Remarks: Set GITHUB_USERNAME and GITHUB_API_KEY in the system environment variables before running this script. + + +# Set the working directory and inputs +$organizationName = 'DepressionCenter' +$username = $Env:GITHUB_USERNAME +# To use interactive login, leave the apiToken string blank and uncomment where indicated in the authentication section +$apiToken = [SecureString](ConvertTo-SecureString $Env:GITHUB_API_KEY -AsPlainText -Force) +$jsonOutputPath = 'c:\GitHubStats\github-stats-' + $organizationName + '.json' +$jsonDetailedOutputPath = 'c:\GitHubStats\github-stats-detailed-' + $organizationName + '.json' +$csvOutputPath = 'c:\GitHubStats\github-stats-' + $organizationName + '.csv' +$csvRollingOutputPath = 'c:\GitHubStats\github-stats-rolling-' + $organizationName + '.csv' + + +# Ensure you have PowerShellForGitHub module installed +Import-Module PowerShellForGitHub + + + +# Begin +Clear-Host +Write-Host -f Yellow " === Export GitHub Usage Stats For Organization Repos === " +$DateCaptured = [DateTime]::UtcNow #Use UTC to keep in line with how GitHub reports data + +# Authentication +# To authenticate interactively, comment this section, and use this instead: Set-GitHubAuthentication $username +Write-Host "Authenticating to GitHub API as $username..." +$githubCredential = [System.Management.Automation.PSCredential](New-Object System.Management.Automation.PSCredential($username, $apiToken)) +Set-GitHubAuthentication -Credential $githubCredential -SessionOnly +$apiToken = '' + + +# Set some GitHub parameters +Set-GitHubConfiguration -DisableTelemetry -DisableUpdateCheck -DefaultOwnerName $organizationname + +# Get all repositories under the given organization +Write-Host "Getting repos..." +$repoCount = [int]0 +try +{ + $repos = Get-GitHubRepository -OrganizationName $organizationName + $repoCount = [int]$repos.Count +} catch { + Write-Host -f Red "Error while getting repository information. Ensure the organization and credentials are correct." +} + +if ($repoCount -eq 0) +{ + Write-Host -f Red "No repositories found." + Start-Sleep -Seconds 3 + Exit +} else { + Write-Host "Found $repoCount repo(s)." +} + +# Add custom properties to the repository variable +$repos | Add-Member -Force -MemberType NoteProperty -Name contributors -Value @() +$repos | Add-Member -Force -MemberType NoteProperty -Name contributors_count -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name contributors_csv -Value "" +$repos | Add-Member -Force -MemberType NoteProperty -Name contributions_count -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name contributors_detail_csv -Value "" +$repos | Add-Member -Force -MemberType NoteProperty -Name collaborators -Value @() +$repos | Add-Member -Force -MemberType NoteProperty -Name collaborators_count -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name collaborators_csv -Value "" + +$repos | Add-Member -Force -MemberType NoteProperty -Name stargazers -Value @() +$repos | Add-Member -Force -MemberType NoteProperty -Name stargazers_count -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name stargazers_csv -Value "" +$repos | Add-Member -Force -MemberType NoteProperty -Name watchers -Value @() +$repos | Add-Member -Force -MemberType NoteProperty -Name watchers_count -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name watchers_csv -Value "" +$repos | Add-Member -Force -MemberType NoteProperty -Name referrer_traffic -Value @() +$repos | Add-Member -Force -MemberType NoteProperty -Name referrer_traffic_count -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name referrer_traffic_uniques -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name referrer_traffic_csv -Value "" +$repos | Add-Member -Force -MemberType NoteProperty -Name path_traffic -Value @() +$repos | Add-Member -Force -MemberType NoteProperty -Name path_traffic_count -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name path_traffic_uniques -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name path_traffic_csv -Value "" +$repos | Add-Member -Force -MemberType NoteProperty -Name view_traffic -Value @() +$repos | Add-Member -Force -MemberType NoteProperty -Name view_traffic_count -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name view_traffic_uniques -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name view_traffic_count_yesterday -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name view_traffic_uniques_yesterday -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name view_traffic_csv -Value "" +$repos | Add-Member -Force -MemberType NoteProperty -Name clone_traffic -Value @() +$repos | Add-Member -Force -MemberType NoteProperty -Name clone_traffic_count -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name clone_traffic_uniques -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name clone_traffic_count_yesterday -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name clone_traffic_uniques_yesterday -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name clone_traffic_csv -Value "" +$repos | Add-Member -Force -MemberType NoteProperty -Name events -Value @() +$repos | Add-Member -Force -MemberType NoteProperty -Name events_count -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name events_count_yesterday -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name events_uniques -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name events_uniques_yesterday -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name events_csv -Value "" +$repos | Add-Member -Force -MemberType NoteProperty -Name pushes -Value @() +$repos | Add-Member -Force -MemberType NoteProperty -Name pushes_count -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name pushes_count_yesterday -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name pushes_uniques -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name pushes_uniques_yesterday -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name pushes_csv -Value "" +$repos | Add-Member -Force -MemberType NoteProperty -Name forks -Value @() +$repos | Add-Member -Force -MemberType NoteProperty -Name forks_count -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name forks_uniques -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name forks_count_yesterday -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name forks_uniques_yesterday -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name forks_csv -Value "" +$repos | Add-Member -Force -MemberType NoteProperty -Name pulls -Value @() +$repos | Add-Member -Force -MemberType NoteProperty -Name pulls_count -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name issues -Value @() +$repos | Add-Member -Force -MemberType NoteProperty -Name issues_count -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name issues_open_count -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name issues_uniques -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name issues_count_opened_yesterday -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name issues_count_closed_yesterday -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name issues_uniques_opened_yesterday -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name issues_uniques_closed_yesterday -Value [int]0 +$repos | Add-Member -Force -MemberType NoteProperty -Name topics_csv -Value "" + + +# Contributors +Write-Host "Getting contributors..." +$repos | ForEach-Object {$_.contributors = Get-GitHubRepositoryContributor -Uri $_.url } +$repos | ForEach-Object {$_.contributors_count = [int]($_.contributors | Select-Object -ExpandProperty login -Unique).Count } +$repos | ForEach-Object {if ($_.contributors_count -gt 0) {$_.contributors_csv = [String]::Join(",", $_.contributors.UserName ) }} -ErrorAction Ignore +$repos | ForEach-Object {if ($_.contributors_count -gt 0) {$_.contributors_detail_csv = [String]::Join(",", ($_.contributors | ForEach-Object {$_.UserName + '|' + $_.contributions}) ) }} -ErrorAction Ignore + +# Contributions +$repos | ForEach-Object {$_.contributions_count = [int]($_.contributors | Measure-Object -Sum contributions).Sum } + + +# Collaborators +Write-Host "Getting collaborators..." +$repos | ForEach-Object { $_.collaborators = (Invoke-GHRestMethod -Method Get -UriFragment $_.collaborators_url.replace('{/collaborator}','')); $_.collaborators_count = $_.collaborators.count } +$repos | ForEach-Object {if ($_.collaborators_count -gt 0) {$_.collaborators_csv = [String]::Join(",", ($_.collaborators | ForEach-Object {$_.login}) ) }} -ErrorAction Ignore + + +# Watchers (subscribers) +# Due to an API change, subscribers_count should be used for getting subscribers, fka watchers. +# The other fields called watchers and stargazers both return stargazers now, but this PS module does not support this change. +Write-Host "Getting watchers (subscribers)..." +$repos | ForEach-Object { $_.watchers = (Invoke-GHRestMethod -Uri $_.subscribers_url -Method Get) } +$repos | ForEach-Object { $_.watchers_count = $_.watchers.Count } +$repos | ForEach-Object {if ($_.watchers_count -gt 0) {$_.watchers_csv = [String]::Join(",", $_.watchers.login ) }} -ErrorAction Ignore + + +# Stargazers (bookmarks) +Write-Host "Getting stargazers (bookmarks)..." +$repos | ForEach-Object { $_.stargazers = (Invoke-GHRestMethod -Uri $_.stargazers_url -Method Get) } +$repos | ForEach-Object { $_.stargazers_count = $_.stargazers.Count } +$repos | ForEach-Object {if ($_.stargazers_count -gt 0) {$_.stargazers_csv = ([String]::Join(",", $_.stargazers.login )) }} -ErrorAction Ignore + + +# Referrer Traffic +Write-Host "Getting referrer traffic..." +$repos | ForEach-Object {$_.referrer_traffic = Get-GitHubReferrerTraffic -Uri $_.url } +$repos | ForEach-Object {$_.referrer_traffic_count = [int]($_.referrer_traffic | Measure-Object -Sum count).Sum } +$repos | ForEach-Object {$_.referrer_traffic_uniques = [int]($_.referrer_traffic | Measure-Object -Sum uniques).Sum } +# Use this to get only referrer website without counts: $repos | ForEach-Object {if ($_.referrer_traffic_count -gt 0) {$_.referrer_traffic_csv = [String]::Join(",", $_.referrer_traffic.referrer ) }} -ErrorAction Ignore +$repos | ForEach-Object {if ($_.referrer_traffic_count -gt 0) {$_.referrer_traffic_csv = [String]::Join(",", ($_.referrer_traffic | ForEach-Object {$_.referrer + '|' + $_.count + '|' + $_.uniques}) ) }} -ErrorAction Ignore + +# Path Traffic +Write-Host "Getting path traffic..." +$repos | ForEach-Object {$_.path_traffic = Get-GitHubPathTraffic -Uri $_.url } +$repos | ForEach-Object {$_.path_traffic_count = [int]($_.path_traffic | Measure-Object -Sum count).Sum } +$repos | ForEach-Object {$_.path_traffic_uniques = [int]($_.path_traffic | Measure-Object -Sum uniques).Sum } +$repos | ForEach-Object { + if ($_.path_traffic_count -gt 0) { + $uriPrefix = ($_.full_name) + $_.path_traffic | ForEach-Object { + # Fix internal path name that no longer matches due to renaming one of our repo's + $_.path = $_.path.replace('/Useful-SQL-Queries-for-Umich-Research-Centers','').replace('/Useful-SQL-Queries-for-UMich-Research-Centers','') + + # Remove /organizationname/reponame/ from the beginning of each path, to reduce field size + $_.path = $_.path.replace($uriPrefix,'').replace('/'+$organizationName,'').replace($organizationName+'/','').replace('//','/') + $_.path = $_.path.replace('blob/main/README.md','').replace('/README.md','') + if(($_.path -eq '') -or ($null -eq $_.path) -or ($_.path -eq '/') -or ($_.path -eq 'README.md') -or ($_.path -eq 'blob/main')-or ($_.path -eq 'tree/main')) {$_.path = 'Home'} + } + $_.path_traffic_csv = [String]::Join(",", ($_.path_traffic | ForEach-Object {$_.path + '|' + $_.count + '|' + $_.uniques}) ) + } +} -ErrorAction Ignore + + +# View Traffic +Write-Host "Getting view traffic..." +$repos | ForEach-Object {$_.view_traffic = Get-GitHubViewTraffic -Uri $_.url } +$repos | ForEach-Object {$_.view_traffic_count = [int]($_.view_traffic | Measure-Object -Sum count).Sum } +$repos | ForEach-Object {$_.view_traffic_uniques = [int]($_.view_traffic | Measure-Object -Sum uniques).Sum } +# Convert dates back to UTC +$repos | ForEach-Object {$_.view_traffic.views | ForEach-Object {$_.timestamp = [DateTime][System.TimeZoneInfo]::ConvertTimeBySystemTimeZoneId( $_.timestamp, 'Greenwich Standard Time').DateTime}} +# Filter views object array to pick only yesterday's data, based on a UTC timestamp and today's date in UTC +$repos | ForEach-Object { $_.view_traffic_count_yesterday = [int]( @($_.view_traffic.views | Where-Object {$_.timestamp.Date -EQ $DateCaptured.Date.AddDays(-1)}) | Measure-Object -Sum count).Sum } +$repos | ForEach-Object { $_.view_traffic_uniques_yesterday = [int]( @($_.view_traffic.views | Where-Object {$_.timestamp.Date -EQ $DateCaptured.Date.AddDays(-1)}) | Measure-Object -Sum uniques).Sum } +# To return all view traffic for the past 14 days, use this line: $repos | ForEach-Object {if ($_.view_traffic_count -gt 0) {$_.view_traffic_csv = [String]::Join(",", ($_.view_traffic.views | ForEach-Object {$_.timestamp.ToString("MM/dd/yyyy hh:mm:ss tt") + '|' + $_.count + '|' + $_.uniques}) ) }} -ErrorAction Ignore +$repos | ForEach-Object {if ($_.view_traffic_count_yesterday -gt 0) {$_.view_traffic_csv = [String]::Join(",", (@($_.view_traffic.views | Where-Object {$_.timestamp.Date -EQ ($DateCaptured.Date.AddDays(-1))}) | ForEach-Object {$_.timestamp.ToString("MM/dd/yyyy hh:mm:ss tt") + '|' + $_.count + '|' + $_.uniques}) ) }} -ErrorAction Ignore + +# Clone Traffic +Write-Host "Getting clone traffic..." +$repos | ForEach-Object {$_.clone_traffic = Get-GitHubCloneTraffic -Uri $_.url } +$repos | ForEach-Object {$_.clone_traffic_count = [int]($_.clone_traffic | Measure-Object -Sum count).Sum } +$repos | ForEach-Object {$_.clone_traffic_uniques = [int]($_.clone_traffic | Measure-Object -Sum uniques).Sum } +# Convert dates back to UTC +$repos | ForEach-Object {$_.clone_traffic.clones | ForEach-Object {$_.timestamp = [DateTime][System.TimeZoneInfo]::ConvertTimeBySystemTimeZoneId( $_.timestamp, 'Greenwich Standard Time').DateTime}} +# Filter clones object array to pick only yesterday's data, based on a UTC timestamp and today's date in UTC +$repos | ForEach-Object { $_.clone_traffic_count_yesterday = [int]( @($_.clone_traffic.clones | Where-Object {$_.timestamp.Date -EQ $DateCaptured.Date.AddDays(-1)}) | Measure-Object -Sum count).Sum } +$repos | ForEach-Object { $_.clone_traffic_uniques_yesterday = [int]( @($_.clone_traffic.clones | Where-Object {$_.timestamp.Date -EQ $DateCaptured.Date.AddDays(-1)}) | Measure-Object -Sum uniques).Sum } +# To return all clones, use this instead of the line below: $repos | ForEach-Object {if ($_.clone_traffic_count -gt 0) {$_.clone_traffic_csv = [String]::Join(",", ($_.clone_traffic.clones | ForEach-Object {$_.timestamp.ToString("MM/dd/yyyy hh:mm:ss tt") + '|' + $_.count + '|' + $_.uniques}) ) }} -ErrorAction Ignore +$repos | ForEach-Object {if ($_.clone_traffic_count_yesterday -gt 0) {$_.clone_traffic_csv = [String]::Join(",", (@($_.clone_traffic.clones | Where-Object {$_.timestamp.Date -EQ ($DateCaptured.Date.AddDays(-1))}) | ForEach-Object {$_.timestamp.ToString("MM/dd/yyyy hh:mm:ss tt") + '|' + $_.count + '|' + $_.uniques}) ) }} -ErrorAction Ignore + + +# Events +# Get-GitHubEvent does not appear to work, so call API directly +Write-Host "Getting events (commit, merge, etc.)..." +$repos | ForEach-Object { $_.events = ( (Invoke-GHRestMethod -Uri $_.events_url -Method Get) | Sort-Object -Unique -Descending -Property created_at) } +$repos | ForEach-Object { $_.events_count = $_.events.Count } +$repos | ForEach-Object { $_.events_uniques = [int]($_.events | Select-Object -ExpandProperty actor | Select-Object -ExpandProperty login -Unique).Count } +# Convert dates back to UTC +$repos | ForEach-Object {if ($_.events_count -gt 0) {$_.events | ForEach-Object {$_.created_at = [DateTime][System.TimeZoneInfo]::ConvertTimeBySystemTimeZoneId( $_.created_at, 'Greenwich Standard Time').DateTime}}} +# Filter events object array to pick only yesterday's data, based on a UTC timestamp and today's date in UTC +$repos | ForEach-Object { $_.events_count_yesterday = @($_.events | Where-Object {$_.created_at.Date -EQ ($DateCaptured.Date.AddDays(-1))}).Count } +$repos | ForEach-Object { $_.events_uniques_yesterday = ( @($_.events | Where-Object {$_.created_at.Date -EQ ($DateCaptured.Date.AddDays(-1))}) | Select-Object -ExpandProperty actor | Select-Object -ExpandProperty login -Unique).Count } +# Create CSV of yesterday's events only +$repos | ForEach-Object {if ($_.events_count_yesterday -gt 0) {$_.events_csv = [String]::Join(",", (@($_.events | Where-Object {$_.created_at.Date -EQ ($DateCaptured.Date.AddDays(-1))}) | ForEach-Object {$_.created_at.ToString("MM/dd/yyyy hh:mm:ss tt") + '|' + $_.type.replace('Event','') + $(if ($_.payload.action) {'/' + $_.payload.action} else {''}) + '|' + $_.actor.login}) ) }} -ErrorAction Ignore + + +# Pushes / Commits +Write-Host "Parsing commits from event history..." +$repos | ForEach-Object { $_.pushes = ( $_.events | Where-Object -Property type -eq 'PushEvent' ); $_.pushes_count = $_.pushes.Count } +$repos | ForEach-Object { $_.pushes_uniques = [int]($_.pushes | Select-Object -ExpandProperty actor | Select-Object -ExpandProperty login -Unique).Count } +# Filter to get only yesterday's pushes +$repos | ForEach-Object { $_.pushes_count_yesterday = @($_.pushes | Where-Object {$_.created_at.Date -EQ ($DateCaptured.Date.AddDays(-1))}).Count } +$repos | ForEach-Object { $_.pushes_uniques_yesterday = [int]( @($_.pushes | Where-Object {$_.created_at.Date -EQ ($DateCaptured.Date.AddDays(-1))}) | Select-Object -ExpandProperty actor | Select-Object -ExpandProperty login -Unique).Count } +$repos | ForEach-Object {if ($_.pushes_count_yesterday -gt 0) {$_.pushes_csv = [String]::Join(",", (@($_.pushes | Where-Object {$_.created_at.Date -EQ ($DateCaptured.Date.AddDays(-1))}) | ForEach-Object {$_.created_at.ToString("MM/dd/yyyy hh:mm:ss tt") + '|' + $_.type.replace('Event','') + $(if ($_.payload.action) {'/' + $_.payload.action} else {''}) + '|' + $_.actor.login}) ) }} -ErrorAction Ignore + + + +# Forks +Write-Host "Getting forks..." +$repos | ForEach-Object { $_.forks = (Get-GitHubRepositoryFork -Uri $_.url); $_.forks_count = $_.forks.Count } +$repos | ForEach-Object { $_.forks_uniques = [int]($_.forks | Select-Object -ExpandProperty owner | Select-Object -ExpandProperty login -Unique).Count } +# Convert dates back to UTC +$repos | ForEach-Object {if ($_.forks_count -gt 0) {$_.forks | ForEach-Object {$_.created_at = [DateTime][System.TimeZoneInfo]::ConvertTimeBySystemTimeZoneId( $_.created_at, 'Greenwich Standard Time').DateTime}}} +# Get yesterday's forks only +$repos | ForEach-Object { $_.forks_count_yesterday = @($_.forks | Where-Object {$_.created_at.Date -EQ ($DateCaptured.Date.AddDays(-1))}).Count } +$repos | ForEach-Object { $_.forks_uniques_yesterday = [int]( @($_.forks | Where-Object {$_.created_at.Date -EQ ($DateCaptured.Date.AddDays(-1))}) | Select-Object -ExpandProperty owner | Select-Object -ExpandProperty login -Unique).Count } +$repos | ForEach-Object {if ($_.forks_count_yesterday -gt 0) {$_.forks_csv = [String]::Join(",", (@($_.forks | Where-Object {$_.created_at.Date -EQ ($DateCaptured.Date.AddDays(-1))}) | ForEach-Object {$_.created_at.ToString("MM/dd/yyyy hh:mm:ss tt") + '|' + $_.name + '|' + $_.owner.login}) ) }} -ErrorAction Ignore + + +# Pull Requests +Write-Host "Getting pull requests..." +$repos | ForEach-Object { $_.pulls = (Invoke-GHRestMethod -Uri $_.pulls_url.replace("{/number}","") -Method Get); $_.pulls_count = $_.pulls.Count } +# TODO: Add counts of unique users, active pulls, closed pulls, total pulls, and perhaps pulls_csv of users requesting pulls yesterday + + +# Issues +Write-Host "Getting issues (tickets/tasks)..." +$repos | ForEach-Object { $_.issues = (Get-GitHubIssue -Uri $_.url); $_.issues_count = $_.issues.Count } +$repos | ForEach-Object { $_.issues_uniques = [int]($_.issues | Select-Object -ExpandProperty user | Select-Object -ExpandProperty login -Unique).Count } +$repos | ForEach-Object { $_.issues_open_count = ($_.issues | Where-Object -Property state -EQ "open").Count } +# Convert dates back to UTC +$repos | ForEach-Object { $_.issue | Where-Object -Property created_at -NE $null | ForEach-Object { $_.created_at = [DateTime][System.TimeZoneInfo]::ConvertTimeBySystemTimeZoneId( $_.created_at, 'Greenwich Standard Time').DateTime } } +$repos | ForEach-Object { $_.issues | Where-Object -Property closed_at -NE $null | ForEach-Object { $_.closed_at = [DateTime][System.TimeZoneInfo]::ConvertTimeBySystemTimeZoneId( $_.closed_at, 'Greenwich Standard Time').DateTime } } +# Filter issues object array to pick only yesterday's data, based on a UTC timestamp and today's date in UTC +$repos | ForEach-Object { $_.issues_count_opened_yesterday = [int]( @($_.issues | Where-Object {$_.created_at.Date -EQ $DateCaptured.Date.AddDays(-1)}).Count) } +$repos | ForEach-Object { $_.issues_count_closed_yesterday = [int]( @($_.issues | Where-Object {$_.closed_at.Date -EQ $DateCaptured.Date.AddDays(-1)}).Count) } +$repos | ForEach-Object { $_.issues_uniques_opened_yesterday = [int]( @($_.issues | Where-Object {$_.created_at.Date -EQ $DateCaptured.Date.AddDays(-1)} | Select-Object -ExpandProperty user | Select-Object -ExpandProperty login -Unique).Count) } +$repos | ForEach-Object { $_.issues_uniques_closed_yesterday = [int]( @($_.issues | Where-Object {$_.closed_at.Date -EQ $DateCaptured.Date.AddDays(-1)} | Select-Object -ExpandProperty user | Select-Object -ExpandProperty login -Unique).Count) } + + + +# Topics (tags at the repo level) +Write-Host "Getting repository-level topics (tags)..." +$repos | ForEach-Object {if ($_.topics.Count -gt 0) {$_.topics_csv = [String]::Join(",", $_.topics ) }} -ErrorAction Ignore + + + +# Extract pertinent extracts and convert to array of objects for easy export +# NOTE: Format your data types or CSV fields as needed in the loop below +Write-Host "Processing usage stats..." +$usageStats = @() +foreach ($repo in $repos) +{ + $usageStats += [PSCustomObject]@{ + # Sync info + DateCaptured = [DateTime]$DateCaptured + + # Repository info + name = [String]$repo.name + full_name = [String]$repo.full_name + owner = [String]$repo.owner.login + description = [String]$repo.description + url = [String]$repo.RepositoryUrl + + # Repository properties + created = [DateTime]$repo.created_at + updated = [DateTime]$repo.updated_at + pushed = [DateTime]$repo.pushed_at + size = [float]$repo.size + visibility = $repo.visibility + is_fork = [int]$repo.fork #remove [int] if you want boolean data type + is_archived = [int]$repo.archived #remove [int] if you want boolean data type + is_template = [int]$repo.is_template #remove [int] if you want boolean data type + topics_csv = $repo.topics_csv + + # Events + events_count = [int]$repo.events_count + events_uniques = [int]$repo.events_uniques + events_count_yesterday = [int]$repo.events_count_yesterday + events_uniques_yesterday = [int]$repo.events_uniques_yesterday + events_csv = $repo.events_csv + + # Pushes / Commits + pushes_count = [int]$repo.pushes_count + pushes_uniques = [int]$repo.pushes_uniques + pushes_count_yesterday = [int]$repo.pushes_count_yesterday + pushes_uniques_yesterday = [int]$repo.pushes_uniques_yesterday + pushes_csv = $repo.pushes_csv + + # Pull requests + pulls_count = [int]$repo.pulls_count + + # Discussions + has_discussions_enabled = [int]$repo.has_discussions #remove [int] if you want boolean data type + + # Issues + has_issues_enabled = [int]$repo.has_issues #remove [int] if you want boolean data type + issues_count = [int]$repo.issues_count + issues_open_count = [int]$repo.issues_open_count + issues_uniques = [int]$repo.issues_uniques + issues_count_opened_yesterday = [int]$repo.issues_count_opened_yesterday + issues_count_closed_yesterday = [int]$repo.issues_count_closed_yesterday + issues_uniques_opened_yesterday = [int]$repo.issues_uniques_opened_yesterday + issues_uniques_closed_yesterday = [int]$repo.issues_uniques_closed_yesterday + + + # Forks + forks_count = [int]$repo.forks_count + forks_count_yesterday = [int]$repo.forks_count_yesterday + forks_uniques = [int]$repo.forks_uniques + forks_uniques_yesterday = [int]$repo.forks_uniques_yesterday + forks_csv = $repo.forks_csv + + + + # Stargazers (Favorites) + stargazers_count = [int]$repo.stargazers_count + stargazers_csv = $repo.stargazers_csv + + + # Watchers (Subscriptions) + watchers_count = [int]$repo.watchers_count + watchers_csv = $repo.watchers_csv + + + # Contributors and Contributions + contributors_count = [int]$repo.contributors_count + contributors_csv = $repo.contributors_csv + contributions_count = [int]$repo.contributions_count + contributors_detail_csv = $repo.contributors_detail_csv + + + # Collaborators + collaborators_count = [int]$repo.collaborators_count + collaborators_csv = $repo.collaborators_csv + + + # Clones + clone_traffic_count = [int]$repo.clone_traffic_count + clone_traffic_count_yesterday = [int]$repo.clone_traffic_count_yesterday + clone_traffic_uniques = [int]$repo.clone_traffic_uniques + clone_traffic_uniques_yesterday = [int]$repo.clone_traffic_uniques_yesterday + clone_traffic_csv = $repo.clone_traffic_csv + + # Web Traffic + referrer_traffic_count = [int]$repo.referrer_traffic_count + referrer_traffic_uniques = [int]$repo.referrer_traffic_uniques + referrer_traffic_csv = $repo.referrer_traffic_csv + path_traffic_count = [int]$repo.path_traffic_count + path_traffic_uniques = [int]$repo.path_traffic_uniques + path_traffic_csv = $repo.path_traffic_csv + view_traffic_count = [int]$repo.view_traffic_count + view_traffic_count_yesterday = [int]$repo.view_traffic_count_yesterday + view_traffic_uniques = [int]$repo.view_traffic_uniques + view_traffic_uniques_yesterday = [int]$repo.view_traffic_uniques_yesterday + view_traffic_csv = $repo.view_traffic_csv + } +} + +$usageStatsFull = [PSCustomObject]@{ + DateCaptured = $DateCaptured + repo_stats = $repos +} + +# Export results to JSON +Write-Host "Saving results in JSON format at: $jsonOutputPath" +$usageStats | ConvertTo-Json | Out-File -FilePath $jsonOutputPath +$usageStatsFull | ConvertTo-Json | Out-File -FilePath $jsonDetailedOutputPath + +# Export only pertinent stats to CSV +Write-Host "Saving results in CSV format at: $csvOutputPath" +$usageStats | Export-Csv -Path $csvOutputPath -NoTypeInformation +$usageStats | Export-Csv -Path $csvRollingOutputPath -NoTypeInformation -Append + +Write-Host "Done." diff --git a/README.md b/README.md index d0e4b71..d42c25d 100644 --- a/README.md +++ b/README.md @@ -3,29 +3,38 @@ # GitHub Usage Stats ## Description -Scripts to capture GitHub repository and usage statistics daily. It is designed to get statistics for all repos under a GitHub Enterprise-licensed organization, but it should also work for personal repositories. -The PowerShell script was moved here from its previous location at: [https://github.com/DepressionCenter/MTC-Internal-Tools-and-Automation.](https://github.com/DepressionCenter/MTC-Internal-Tools-and-Automation/blob/main/PowerShell%20Scripts/ExportGitHubUsageStatsForOrganization.ps1) +Scripts to capture GitHub repository and usage statistics daily, for all repositories under an organization that uses GitHub Enterprise. Simply download the PowerShell script, edit the settings towards the top of the file (file paths, API key, Organization name, etc.), and run it. You can also schedule it as a Windows Task, or import into a database with an external ETL or ELT tool. +![GitHub Usage Stats Sample Screenshot](https://github.com/DepressionCenter/GitHub-Usage-Stats/blob/main/images/GitHub-Usage-Stats-Output-Example.png?raw=true "Sample output from this GitHub Usage Stats script.") ## Quick Start Guide -+ Get an API key from the Organization you want to use (with read permissions). This must be done by an admin of the organization. If running this for personal repos, get an API key from yur own account -+ Install the PowerShellForGitHub module in PowerShell for the system, or for the user who will run the script -+ Download the PowerShell script (ExportGitHubUsageStatsForOrganization.ps1) -+ Edit the settings at the top of the script, including the Organization Name variable -+ Create a directory for the output files, c:\GitHubStats, or as configured in previous step -+ Run the script in PowerShell ++ Get a GitHub API key with read permissions to your organization. ++ Set GITHUB_USERNAME and GITHUB_API_KEY in the system environment variables ++ Install the PowerShellForGitHub module in PowerShell. ++ Download the PowerShell script (ExportGitHubUsageStatsForOrganization.ps1). ++ Edit the settings at the top of the script, including the Organization Name variable. ++ Create a directory for the output files, c:\GitHubStats, or as configured in previous step. ++ Run the script in PowerShell. + Grab the CSV or JSON files from the output directory. Files are replaced except for the "rolling" file which appends to previous days' data. -+ Optional step: Use Windows Scheduler to run the script daily, and make the output directory a file share with appropriate permissions. ## Documentation + +### General Information + The statistics will be dumped into both CSV and JSON files in the output directory, including: - + **{efdc}-github-stats.csv** - today's snapshot in CSV format; web traffic goes back 14 days. File is replaced at each run. - + **{efdc}-github-stats.json** - today's snapshot in JSON format; web traffic goes back 14 days. File is replaced at each run. - + **{efdc}-github-stats-rolling.csv** - stoday's snapshot added to the same CSV, without deleting previous data. -+ All the counts are 14-day totals, not for an individual day. + + **github-stats-{OrganizationName}.csv** - today's snapshot in CSV format. File is replaced at each run. Recommended for loading into a database. + + **github-stats-{OrganizationName}.json** - today's snapshot in JSON format. File is replaced at each run. Recommended for loading into a database. + + **github-stats-detailed-{OrganizationName}.json** - today's snapshot in JSON format, with all detailed included. File is replaced at each run. It can be used for debugging and troubleshooting. + + **github-stats-rolling-{OrganizationName}.csv** - today's snapshot added to the same CSV, without deleting previous data. This file can be used to create reports directly in Excel, Tableau, PowerBI, etc. without the need for a database. ++ All the counts not labeled "yesterday" are 14-day totals, not for an individual day. ++ Note that all dates and times are in universal time (UTC), in the GMT time zone. + +### Loading Into a Database ++ The script(s) under the SQL folder can be used to create a table to host and accumulate the data. ++ + Currently, the only script(s) available are for Oracle databases. Some work maybe required to use a different database engine. ++ The PowerShell script does not currently save to the database directly. A data pipeline is needed to load the data into a database. ## Additional Resources diff --git a/SQL-Database-Scripts/GITHUB_USAGE_RAW.sql b/SQL-Database-Scripts/GITHUB_USAGE_RAW.sql new file mode 100644 index 0000000..54254e2 --- /dev/null +++ b/SQL-Database-Scripts/GITHUB_USAGE_RAW.sql @@ -0,0 +1,121 @@ +-- USAGESTATS.GITHUB_USAGE_RAW definition +CREATE TABLE USAGESTATS.GITHUB_USAGE_RAW + ( "LOGID" NUMBER GENERATED BY DEFAULT AS IDENTITY MINVALUE 1 MAXVALUE 999999999999 INCREMENT BY 1 START WITH 1 CACHE 20 NOORDER NOCYCLE NOKEEP NOSCALE NOT NULL ENABLE, + DATECAPTURED DATE NOT NULL ENABLE, + NAME NVARCHAR2(64) NOT NULL ENABLE, + FULL_NAME NVARCHAR2(128) NOT NULL ENABLE, + OWNER NVARCHAR2(64) NOT NULL ENABLE, + DESCRIPTION NVARCHAR2(256), + URL NVARCHAR2(128) NOT NULL ENABLE, + CREATED DATE NOT NULL ENABLE, + UPDATED DATE, + PUSHED DATE, + REPOSITORY_SIZE NUMBER(*,0), + VISIBILITY NVARCHAR2(24), + IS_FORK NUMBER(1,0), + IS_ARCHIVED NUMBER(1,0), + IS_TEMPLATE NUMBER(1,0), + TOPICS_CSV NVARCHAR2(512), + EVENTS_COUNT NUMBER(*,0), + EVENTS_UNIQUES NUMBER(*,0), + EVENTS_COUNT_YESTERDAY NUMBER(*,0), + EVENTS_UNIQUES_YESTERDAY NUMBER(*,0), + EVENTS_CSV VARCHAR2(4000), + PUSHES_COUNT NUMBER(*,0), + PUSHES_UNIQUES NUMBER(*,0), + PUSHES_COUNT_YESTERDAY NUMBER(*,0), + PUSHES_UNIQUES_YESTERDAY NUMBER(*,0), + PUSHES_CSV VARCHAR2(4000), + PULLS_COUNT NUMBER(*,0), + HAS_DISCUSSIONS_ENABLED NUMBER(1,0), + HAS_ISSUES_ENABLED NUMBER(1,0), + ISSUES_COUNT NUMBER(*,0), + ISSUES_OPEN_COUNT NUMBER(*,0), + ISSUES_UNIQUES NUMBER(*,0), + ISSUES_COUNT_OPENED_YESTERDAY NUMBER(*,0), + ISSUES_COUNT_CLOSED_YESTERDAY NUMBER(*,0), + ISSUES_UNIQUES_OPENED_YESTERDAY NUMBER(*,0), + ISSUES_UNIQUES_CLOSED_YESTERDAY NUMBER(*,0), + FORKS_COUNT NUMBER(*,0), + FORKS_COUNT_YESTERDAY NUMBER(*,0), + FORKS_UNIQUES NUMBER(*,0), + FORKS_UNIQUES_YESTERDAY NUMBER(*,0), + FORKS_CSV VARCHAR2(4000), + STARGAZERS_COUNT NUMBER(*,0), + STARGAZERS_CSV VARCHAR2(4000), + WATCHERS_COUNT NUMBER(*,0), + WATCHERS_CSV VARCHAR2(4000), + CONTRIBUTORS_COUNT NUMBER(*,0), + CONTRIBUTORS_CSV VARCHAR2(4000), + CONTRIBUTIONS_COUNT NUMBER(*,0), + CONTRIBUTORS_DETAIL_CSV VARCHAR2(4000), + COLLABORATORS_COUNT NUMBER(*,0), + COLLABORATORS_CSV VARCHAR2(4000), + CLONE_TRAFFIC_COUNT NUMBER(*,0), + CLONE_TRAFFIC_COUNT_YESTERDAY NUMBER(*,0), + CLONE_TRAFFIC_UNIQUES NUMBER(*,0), + CLONE_TRAFFIC_UNIQUES_YESTERDAY NUMBER(*,0), + CLONE_TRAFFIC_CSV VARCHAR2(4000), + REFERRER_TRAFFIC_COUNT NUMBER(*,0), + REFERRER_TRAFFIC_UNIQUES NUMBER(*,0), + REFERRER_TRAFFIC_CSV VARCHAR2(4000), + PATH_TRAFFIC_COUNT NUMBER(*,0), + PATH_TRAFFIC_UNIQUES NUMBER(*,0), + PATH_TRAFFIC_CSV VARCHAR2(4000), + VIEW_TRAFFIC_COUNT NUMBER(*,0), + VIEW_TRAFFIC_COUNT_YESTERDAY NUMBER(*,0), + VIEW_TRAFFIC_UNIQUES NUMBER(*,0), + VIEW_TRAFFIC_UNIQUES_YESTERDAY NUMBER(*,0), + VIEW_TRAFFIC_CSV VARCHAR2(4000) + ) SEGMENT CREATION DEFERRED + PCTFREE 10 PCTUSED 40 INITRANS 1 MAXTRANS 255 + NOCOMPRESS LOGGING + TABLESPACE "USERS_PDB" ; +COMMIT; + +/* Grant Permissions */ +/* +GRANT SELECT ON USAGESTATS.GITHUB_USAGE_RAW TO "USERID"; + GRANT INSERT ON USAGESTATS.GITHUB_USAGE_RAW TO "USERID"; + GRANT UPDATE ON USAGESTATS.GITHUB_USAGE_RAW TO "USERID"; + GRANT DELETE ON USAGESTATS.GITHUB_USAGE_RAW TO "USERID"; + GRANT REFERENCES ON USAGESTATS.GITHUB_USAGE_RAW TO "USERID"; + GRANT SELECT ON USAGESTATS.GITHUB_USAGE_RAW TO "REPORTING_USERID"; + GRANT REFERENCES ON USAGESTATS.GITHUB_USAGE_RAW TO "REPORTING_USERID"; +COMMIT; +*/ + +COMMENT ON TABLE USAGESTATS.GITHUB_USAGE_RAW IS 'Usage statistics from GitHub captured daily, in raw format (from GitHubs API).'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.LOGID IS 'Auto-incremented numeric ID that uniquely identifies each entry.'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.DateCaptured IS 'The date and time when the data was downloaded from the API, in UTC.'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Name IS 'The name of the code repository.'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Full_Name IS 'The full name of the code repository, which includes the name of the organization that owns it.'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Owner IS 'The name of the organization that owns the repository.'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Description IS 'The public short description of the repository, as shown in GitHub.'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.URL IS 'The full HTTPS URL to the repository.'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Created IS 'The date and time when the repository was created, in UTC.'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Updated IS 'The date and time when the repository was last updated, in UTC.'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Pushed IS 'The date and time of the last ''push'' (commit) to the repository, in UTC.'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Repository_Size IS 'The current size of the repository, in bytes.'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Visibility IS 'Repository visibility type (e.g. Public, Private, Internal).'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Is_Fork IS 'Boolean (1/0) value indicating whether this repository is a fork of another.'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Is_Archived IS 'Boolean (1/0) value indicating whether this repository has been archived and is no longer active.'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Is_Template IS 'Boolean (1/0) value indicating whether this repository is available to use as a template for future repositories.'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Topics_CSV IS 'Topics (tags) for the repository itself (not to be confused with commit or branch tags).'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Events_Count IS 'The total number of events in this repository, including pushes, pulls, following, etc.'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Events_Uniques IS 'The total number of unique users who performed an action on this repository, including pushes, pulls, following, etc.'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Events_Count_Yesterday IS 'The number of events in this repository, including pushes, pulls, following, etc., that occurred one day before the capture date.'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Events_Uniques_Yesterday IS 'The number of unique users who performed an action on this repository, including pushes, pulls, following, etc., on the day before the capture date.'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Events_CSV IS 'Comma-separated list of events that occurred one day prior to the capture date. Fields inside each value are separated by pipes. Format: MM/dd/yyyy hh:mm:ss tt|EventType{/Action}|Actor_Login, ...'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Pushes_CSV IS 'Comma-separated list of pushes (commits) that occurred one day prior to the capture date. Fields inside each value are separated by pipes. Format: MM/dd/yyyy hh:mm:ss tt|EventType{/Action}|Actor_Login, ...'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Forks_CSV IS 'Comma-separated list of forks (copies) that occurred one day prior to the capture date. Fields inside each value are separated by pipes. Format: MM/dd/yyyy hh:mm:ss tt|RepositoryName|Owner_Login, ...'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Stargazers_CSV IS 'Comma-separated list of users who have starred (bookmarked) this repository.'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Watchers_CSV IS 'Comma-separated list of users who have subscribed to (watching) activity in this repository.'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Contributors_CSV IS 'Comma-separated list of users who have contributed to this repository. Note that this field pulls UserName while all others pull Login, but generally both are the same.'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Contributors_detail_CSV IS 'Comma-separated list of users who have contributed to this repository and the number of contributions. Format: UserName|0,UserName|0,...'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Collaborators_CSV IS 'Comma-separated list uf users who are interna or external collaborators, meaning their account is setup on this repo with write or admin rights.'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Clone_Traffic_CSV IS 'Comma-separated list of clones (downloads, that is, copies made to local repo with the git clone command) that occurred one day prior to the capture date. Fields inside each value are separated by pipes. Format: MM/dd/yyyy hh:mm:ss tt|Count#|Uniques#, ...'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Referrer_Traffic_CSV IS 'Comma-separated list of referals that occurred one day prior to the capture date. Fields inside each value are separated by pipes. Format: MM/dd/yyyy hh:mm:ss tt|Count#|Uniques#, ...'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.Path_Traffic_CSV IS 'Comma-separated list of URLs visited (that had any views) that occurred in the past 14 days, including the capture date. Only partial URLs are returned (anything after /reponame/), and any blank paths or views of README.md are transformed to read Home (as in home page). Fields inside each value are separated by pipes. Format: PathText|Count#|Uniques#, ...'; +COMMENT ON COLUMN USAGESTATS.GITHUB_USAGE_RAW.View_Traffic_CSV IS 'Comma-separated list of views that occurred one day prior to the capture date. Fields inside each value are separated by pipes. Format: MM/dd/yyyy hh:mm:ss tt|Count#|Uniques#, ...'; +COMMIT; diff --git a/images/GitHub-Usage-Stats-Output-Example.png b/images/GitHub-Usage-Stats-Output-Example.png new file mode 100644 index 0000000..bdd8318 Binary files /dev/null and b/images/GitHub-Usage-Stats-Output-Example.png differ