# Geography of Organisations

Analysis of the geographic distribution of organisations behind OSS in sustainability shows an overwhelming majority (64%) is placed in Europe and North America. 28.7% of the projects are considered global, as no geographical affiliation could be identified. [Economic complexity](https://en.wikipedia.org/wiki/Economic_Complexity_Index) – whereby the more diverse knowledge accumulated in a population, the greater the productive capabilities – may partly explain the distribution of open source communities between continents.

In [21]:
import dateparser
import datetime
import handcalcs.render
import numpy as np
import pandas as pd
import plotly.io as pio
import plotly.graph_objects as go
import plotly.express as px
import pycountry
from pycountry_convert import (
    country_alpha2_to_continent_code,
    country_alpha3_to_country_alpha2,
)
from opensustainTemplate import *

In [22]:
df_organizations = pd.read_csv("../csv/github_organizations.csv")

In [23]:
# Clean up the dataset
def name_to_iso3(x):
    """Perform a fuzzy search for UK-like strings
    Arguments:
        x - a string with a country name

    Outputs:
        A string with ISO3 name standard for the UK

    """

    if x == "UK":
        x = "United Kingdom"
    try:
        iso3 = pycountry.countries.search_fuzzy(x)[0].alpha_3
    except:
        iso3 = ""
    return iso3


def alpha3_to_alpha2(x):
    """Convert country code ISO 3166-1 alpha-3 to country code ISO 3166-1 alpha-2 .
    Arguments:
        x - a string with a country name following ISO 3166-1 alpha-3 standard

    Outputs:
        A string with a country name following country code ISO 3166-1 alpha-2

    """

    try:
        alpha_2 = country_alpha3_to_country_alpha2(x)
    except:
        alpha_2 = ""
    return alpha_2


def alpha2_to_continent(x):
    """Convert country code ISO 3166-1 alpha-2 to continent name
    Arguments:
        x - a string with a country name following ISO 3166-1 alpha-2 standard

    Outputs:
        A string with a continent name

    """

    try:
        continent = country_alpha2_to_continent_code(x)
    except:
        continent = ""
    return continent


def upper_string(lower_string):
    """Apply title format
    Arguments:
        lower_string - a string
    Outputs:
        A string with a title format

    """

    return lower_string.title()


def calc_age(start_date):
    """Calculate age in years between now and start_date
    Arguments:
        start_date - a date
    Outputs:
        A float with number of years between now and start_date

    """
    return (
        datetime.datetime.now()
        - dateparser.parse(start_date, settings={"TIMEZONE": "CEST"})
    ).days / 365


def count_strings(comma_seperated_string):
    """Count number of delimiters (commas) in a string
    Arguments:
        comma_seperated_string - a string containing commas
    Outputs:
        A number (int) of commas found in comma_seperated_string

    """

    if type(comma_seperated_string) == str:
        return comma_seperated_string.count(",")
    else:
        return 0

In [24]:
df_organizations = pd.read_csv("../csv/github_organizations.csv")
df_organizations["ISO_3"] = df_organizations["location_country"].apply(name_to_iso3)
df_organizations["ISO_3_alpha2"] = df_organizations["ISO_3"].apply(alpha3_to_alpha2)
df_organizations["continent"] = df_organizations["ISO_3_alpha2"].apply(
    alpha2_to_continent
)

In [25]:
continent_his = (
    df_organizations["continent"]
    .value_counts()
    .to_frame()
    .rename_axis("continent_name")
)
continent_his.rename(
    index={
        "EU": "Europe",
        "NA": "North America",
        "": "Global",
        "OC": "Oceania",
        "AS": "Asia",
        "SA": "South America",
        "AF": "Africa",
    },
    inplace=True,
)
fig = px.pie(
    continent_his.reset_index(),
    values="continent",
    names="continent_name",
    color_discrete_sequence=color_discrete_sequence,
    hole=0.2,
)

fig.update_layout(
    font_size=16,
    showlegend=False,
    hovermode=False,
)
fig.update_traces(
    textposition="auto",
    textinfo="label+percent",
    marker=dict(line=dict(color=boarder_color, width=1)),
)
fig["layout"].update(margin=dict(l=0, r=0, b=0, t=0))

config = {
  'toImageButtonOptions': {
    'format': 'svg', # one of png, svg, jpeg, webp
  },
  'responsive':'true'
}
config = {
  'toImageButtonOptions': {
    'format': 'svg', # one of png, svg, jpeg, webp
  },
  'responsive':'true'
}
fig.show(config=config)

```{figure} data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7
:figclass: caption-hack
:name: distribution-of-organisations-between-continents

\- Distributions of organisations between continents 
```

In [26]:
## https://octoverse.github.com/
values = {31.5, 31.2, 27.3, 5.9, 2.3, 1.7}
index_labels = ["Oceania", "Africa", "South America", "Europe", "Asia", "North America"]
df_users_continent_cotoverse = pd.DataFrame(values, index=index_labels).reset_index()

However, if one compares the ratios with open source developer statistics, clear differences in origin become apparent. Here, baseline data from “[The State of the Octoverse](https://octoverse.github.com/)” is used, a study that provides the geographic distribution of millions of active GitHub users.

In [27]:
# similar pooling to the one in cell 53 could be done here for Africa + Oceania

fig = px.pie(
    df_users_continent_cotoverse,
    values=0,
    names="index",
    color_discrete_sequence=color_discrete_sequence,
    hole=0.2,
)

fig.update_layout(
    font_size=16,
    showlegend=False,
    hovermode=False,
)
fig.update_traces(
    textposition="auto",
    textinfo="label+percent",
    marker=dict(line=dict(color=boarder_color, width=1)),
)
fig["layout"].update(margin=dict(l=0, r=0, b=0, t=0))

config = {
  'toImageButtonOptions': {
    'format': 'svg', # one of png, svg, jpeg, webp
  },
  'responsive':'true'
}
fig.show(config=config)

```{figure} data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7
:figclass: caption-hack
:name: distribution-of-all-github-users

\- Distributions of all GitHub users between continents
```

**At a national level, the United States, Germany, France, and the United Kingdom stand out**. Despite having more GitHub users than Europe, Asia accounts for only 1.9% of organisations working in OSS for sustainability. Moreover, the absence of Indian communities is notable, with no large organisation or project identified, despite a high number of open source developers present. Similarly, very few organisations or projects originate from China, despite a high number of open source developers and a high volume of scientific publications. While there are likely open source developers from underrepresented regions associated with both foreign organisations and global projects with no geographical affiliation, this deviation requires further investigation.

In [28]:
df_countries = (
    df_organizations["ISO_3"]
    .value_counts()
    .to_frame()
    .rename_axis("country")
    .reset_index()
)
df_countries = df_countries.rename(columns={"ISO_3": "counts"})

fig = px.choropleth(
    df_countries,
    locations="country",
    locationmode="ISO-3",
    color=np.log2(df_countries["counts"]),
    color_continuous_scale=["#829EEA", "#169485"],
    hover_name="country", 
    hover_data={"country": False, "counts": True,}
)

fig.update_traces(hovertemplate="%{customdata[0]}<extra>%{customdata[1]}</extra>")


fig.update_layout(
    coloraxis_colorbar=dict(
        title="Organisations",
        orientation='h',
        tickvals = [0, 1, 2, 3, 4, 5, 6, 7, 8],
        ticktext = ['0', '2', '4', '8', '16', '32', '64', '128', '256'],
        dtick='log',
        y=-0.25
    ),
    autosize=False,
    dragmode=False,
)

fig["layout"].update(margin=dict(l=20, r=0, b=10, t=40))

config = {
  'toImageButtonOptions': {
    'format': 'svg', # one of png, svg, jpeg, webp
  },
  'responsive':'true'
}
fig.show(config=config)

```{figure} data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7
:figclass: caption-hack
:name: national-distribution-of-organisations

\- Global distribution of organisations
```