# setup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import re
import datetime as dt
#sns.set(style="darkgrid")
="{:,.1f}".format
pd.options.display.float_format= '/Users/davidleitch/Library/Mobile Documents/com~apple~CloudDocs/nem review/'
nem_path from matplotlib.ticker import FormatStrFormatter
#from scipy.optimize import minimize
#from scipy.optimize import Bounds
from scipy import stats
from matplotlib.ticker import FormatStrFormatter
import matplotlib.ticker as plticker
from matplotlib.dates import DateFormatter
import matplotlib.dates as mdates
import matplotx
import xlwings as xw
from xlwings import view
from xlwings import load
import string
import holoviews as hv
#import hvplot.pandas
'bokeh')
hv.extension(import panel as pn
import bokeh
import jupyter_bokeh
import plotly.graph_objects as go
'plotly')
pn.extension(=True, loading_indicator=True) pn.extension(defer_load
file ='/Users/davidleitch/Dropbox/the analyst/Price forecast current models/RenewMap-3.csv'
= pd.read_csv(file)
projects projects.head()
Project Name | Technology | Project Size (MW) | Project Storage Size (MWh) | Turbines | Developer | Project Status | DA Status | EPBC Code | EPBC Link | State | Owner | OpenNEM | Council Area | Network | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | North Star Junction Solar Farm | Solar PV | 100.0 | NaN | NaN | Fortescue Metals Group | Construction | Approved | NaN | NaN | WA | Fortescue Metals Group | NaN | Port Hedland | NaN |
1 | Port Hedland Solar Project | Solar PV | 45.0 | NaN | NaN | Alinta Energy | Construction | Approved | 2022/09241 | https://epbcpublicportal.awe.gov.au/all-referr... | WA | APA Group | NaN | Port Hedland | NWIS |
2 | Yuri Renewable Hydrogen to Ammonia Project (So... | Solar PV | 18.0 | NaN | NaN | Engie | Construction | Approved | 2020/8739 | https://epbcpublicportal.awe.gov.au/all-referr... | WA | ENGIE, Mitsui | NaN | Karratha | NWIS |
3 | Amrun Solar Farm | Solar PV | 12.4 | NaN | NaN | Rio Tinto | Construction | Approved | NaN | NaN | QLD | Rio Tinto | NaN | Cook | Off Grid |
4 | Bellevue Gold Hybrid (Solar) | Solar PV | NaN | NaN | NaN | Zenith Energy | Construction | Approved | NaN | NaN | WA | Zenith Energy | NaN | Leonora | Off Grid |
projects.columns
Index(['Project Name', 'Technology', 'Project Size (MW)',
'Project Storage Size (MWh)', 'Turbines', 'Developer', 'Project Status',
'DA Status', 'EPBC Code', 'EPBC Link', 'State', 'Owner', 'OpenNEM',
'Council Area', 'Network'],
dtype='object')
= ['project',"technology",'mw','mwh','turbines','developer','status','da','epbc','epbc_link','state','owner','open_nem','council','network']
project_names = project_names
projects.columns projects.columns
Index(['project', 'technology', 'mw', 'mwh', 'turbines', 'developer', 'status',
'da', 'epbc', 'epbc_link', 'state', 'owner', 'open_nem', 'council',
'network'],
dtype='object')
=list(range(1,100)) # has to be integers
temp=[x/100 for x in temp]
percentile= projects['mw'].quantile(q=percentile)
s1
s1.plot()
= 'mw',ascending = False, inplace = True)
projects.sort_values(by
= ['project', 'technology', 'mw', 'mwh', 'turbines', 'developer', 'status','da', 'state', 'owner', 'council']
keep_col = projects.copy()
projects2 = projects2[keep_col]
projects 'mw'] > 49) & (projects['mw'] < 3001) ]
projects.loc[(projects[ projects.describe()
mw | mwh | turbines | |
---|---|---|---|
count | 1,068.0 | 1.0 | 279.0 |
mean | 299.3 | 3,600.0 | 80.3 |
std | 1,564.2 | NaN | 214.6 |
min | 0.1 | 3,600.0 | 1.0 |
25% | 9.0 | 3,600.0 | 22.0 |
50% | 80.0 | 3,600.0 | 47.0 |
75% | 250.0 | 3,600.0 | 81.0 |
max | 30,000.0 | 3,600.0 | 3,000.0 |
'state'].unique() projects[
array(['WA', 'NT', 'SA', 'QLD', 'VIC', 'NSW', 'TAS', 'ACT'], dtype=object)
= ['SA', 'QLD', 'VIC', 'NSW', 'TAS']
nem =projects.copy()
projects3= projects3['state'].isin(nem)
kept = projects3[kept].copy()
projects projects.describe()
mw | mwh | turbines | |
---|---|---|---|
count | 935.0 | 1.0 | 243.0 |
mean | 205.8 | 3,600.0 | 69.8 |
std | 441.0 | NaN | 111.0 |
min | 0.1 | 3,600.0 | 1.0 |
25% | 10.0 | 3,600.0 | 27.5 |
50% | 90.0 | 3,600.0 | 48.0 |
75% | 262.5 | 3,600.0 | 89.0 |
max | 10,000.0 | 3,600.0 | 1,600.0 |
projects.head()
project | technology | mw | mwh | turbines | developer | status | da | state | owner | council | |
---|---|---|---|---|---|---|---|---|---|---|---|
1062 | Evergreen Hydrogen Production Hub (Wind) | Onshore Wind | 10,000.0 | NaN | 1,600.0 | Copenhagen Infrastructure Partners | Development | Not Submitted | SA | Copenhagen Infrastructure Partners | Unincorporated SA |
410 | Evergreen Hydrogen Production Hub (Solar) | Solar PV | 4,000.0 | NaN | NaN | Copenhagen Infrastructure Partners | Development | Not Submitted | SA | Copenhagen Infrastructure Partners | Unincorporated SA |
924 | Collinsville Green Energy Hub | Onshore Wind | 3,000.0 | NaN | NaN | Ark Energy | Development | Not Submitted | QLD | Ark Energy | Whitsunday |
964 | Warracknabeal Energy Park | Onshore Wind | 1,650.0 | NaN | 211.0 | WestWind Energy | Development | Assessment | VIC | WestWind Energy | Yarriambiack |
966 | The Plains Wind Farm | Onshore Wind | 1,500.0 | NaN | 201.0 | Engie | Development | Assessment | NSW | Engie | Edward River, Hay |
=list(range(1,100)) # has to be integers
temp=[x/100 for x in temp]
percentile= projects['mw'].quantile(q=percentile)
s1 s1.plot()
projects.status.unique()
array(['Development', 'Construction', 'Operating'], dtype=object)
= projects.copy() fp
"owner")
fp.sort_values(= fp["owner"] fp_owners
# pivot
= pd.pivot_table(projects,index = ['developer','technology','project'], columns =['state','status'],values='mw',aggfunc = 'sum')
piv #(pd.concat([piv,piv.query("'technology != 'total'").sum(level =0).assign('technology'='Total').set_index('technology',append=True)]).sort_index())
=pd.pivot_table(projects,index = ['developer','technology'], columns =['state','status'],values='mw',aggfunc = 'sum')
piv2 = pd.concat([piv,piv2],axis=0)
piv3 20) piv3.tail(
'total']=piv2.sum(axis=1) piv2[
piv2.head()
= piv2.groupby(['technology','developer','total']).sum().sort_values(['technology','total'],ascending=False)
piv4 piv4
30) piv4.head(
piv4
# try a second pivot to get sub totals
= pd.pivot_table(projects,index = ['developer','technology','project'], columns =['state','status'],values='mw',fill_value=0,dropna=True,aggfunc = "sum") p1
'total']=p1.sum(axis=1)
p1[ p1
sum().rename((k,'Total')))for k,d in p1.groupby(level=0)]).append(p1.sum().rename(('Grand','Total'))) pd.concat([d.append(d.
= piv.groupby(['developer','technology']).sum()
pivg pivg
piv2
state | NSW | QLD | SA | TAS | VIC | total | |||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
status | Construction | Development | Operating | Construction | Development | Operating | Construction | Development | Operating | Development | Operating | Construction | Development | Operating | |
technology | |||||||||||||||
Onshore Wind | 1,013.1 | 26,364.0 | 2,234.9 | 1,625.0 | 33,486.2 | 1,025.7 | 412.0 | 15,915.0 | 2,340.9 | 5,597.0 | 559.4 | 1,071.0 | 16,024.4 | 4,377.1 | 112,045.7 |
Solar PV | 2,102.9 | 26,332.7 | 3,688.5 | 206.4 | 23,133.7 | 3,519.0 | 10.9 | 8,443.8 | 797.9 | 621.0 | 5.0 | 278.9 | 9,918.2 | 1,311.7 | 80,370.6 |
total | 3,116.1 | 52,696.7 | 5,923.4 | 1,831.4 | 56,619.9 | 4,544.7 | 422.9 | 24,358.8 | 3,138.8 | 6,218.0 | 564.4 | 1,350.0 | 25,942.6 | 5,688.8 | 192,416.3 |
=pd.pivot_table(projects,index = ['owner','technology'], columns =['state','status'],values='mw',margins=True,margins_name = 'total',aggfunc = "sum")
byowner byowner
= byowner.groupby(level = 'status',axis=1).sum()
byowner_status = ['total', 'Construction', 'Development', 'Operating']
byowner_status.columns = 'total',ascending = False)#by = 'mw',ascending = False, inplace = True byowner_status.sort_values(by
/var/folders/n_/ndsz23n10w355k9ys04xltkm0000gp/T/ipykernel_11569/2632836213.py:1: FutureWarning: DataFrame.groupby with axis=1 is deprecated. Do `frame.T.groupby(...)` without axis instead.
byowner_status = byowner.groupby(level = 'status',axis=1).sum()
total | Construction | Development | Operating | ||
---|---|---|---|---|---|
owner | technology | ||||
total | 189,258.0 | 6,720.4 | 162,765.6 | 19,772.1 | |
Copenhagen Infrastructure Partners | Onshore Wind | 10,500.0 | 0.0 | 10,500.0 | 0.0 |
Ark Energy | Onshore Wind | 6,661.0 | 0.0 | 6,661.0 | 0.0 |
Windlab | Onshore Wind | 5,300.0 | 0.0 | 5,300.0 | 0.0 |
Neoen | Onshore Wind | 4,584.0 | 412.0 | 3,821.0 | 351.0 |
... | ... | ... | ... | ... | ... |
Forestry Corporation NSW | Onshore Wind | 0.0 | 0.0 | 0.0 | 0.0 |
Gippsland Water | Onshore Wind | 0.0 | 0.0 | 0.0 | 0.0 |
LP Renewable Projects | Onshore Wind | 0.0 | 0.0 | 0.0 | 0.0 |
Marble Energy | Onshore Wind | 0.0 | 0.0 | 0.0 | 0.0 |
NIFSAN Developments | Solar PV | 0.0 | 0.0 | 0.0 | 0.0 |
354 rows × 4 columns
= byowner_status.swaplevel('owner','technology')
bys 'technology','owner'],ascending=False,inplace=True)
bys.sort_values([ bys
total | Construction | Development | Operating | ||
---|---|---|---|---|---|
technology | owner | ||||
Solar PV | ib vogt GmbH | 433.0 | 0.0 | 433.0 | 0.0 |
getgreen, Hareon Solar | 50.0 | 0.0 | 50.0 | 0.0 | |
akaysha energy, Wind Prospect | 200.0 | 0.0 | 200.0 | 0.0 | |
ZEN Energy | 200.0 | 0.0 | 200.0 | 0.0 | |
Yancoal | 330.0 | 0.0 | 330.0 | 0.0 | |
... | ... | ... | ... | ... | ... |
Onshore Wind | AGL Energy | 450.8 | 0.0 | 360.0 | 90.8 |
ACEN | 3,489.0 | 0.0 | 3,489.0 | 0.0 | |
ACE Power | 150.0 | 0.0 | 150.0 | 0.0 | |
ACCIONA Energia (50%), EnergyAustralia (50%) | 64.0 | 0.0 | 0.0 | 64.0 | |
total | 189,258.0 | 6,720.4 | 162,765.6 | 19,772.1 |
354 rows × 4 columns
'technology','total'],ascending=False,inplace=True)
bys.sort_values([ bys
total | Construction | Development | Operating | ||
---|---|---|---|---|---|
technology | owner | ||||
Solar PV | Copenhagen Infrastructure Partners | 4,012.0 | 0.0 | 4,012.0 | 0.0 |
Lightsource BP | 3,240.0 | 0.0 | 3,240.0 | 0.0 | |
FRV Australia | 2,854.9 | 300.0 | 2,036.8 | 518.1 | |
ACEN | 2,540.0 | 720.0 | 1,420.0 | 400.0 | |
GENEX, J-POWER | 2,475.0 | 0.0 | 2,475.0 | 0.0 | |
... | ... | ... | ... | ... | ... |
Onshore Wind | LP Renewable Projects | 0.0 | 0.0 | 0.0 | 0.0 |
Gippsland Water | 0.0 | 0.0 | 0.0 | 0.0 | |
Forestry Corporation NSW | 0.0 | 0.0 | 0.0 | 0.0 | |
Dawn Renewables | 0.0 | 0.0 | 0.0 | 0.0 | |
total | 189,258.0 | 6,720.4 | 162,765.6 | 19,772.1 |
354 rows × 4 columns
= bys.loc['Onshore Wind']
by_w by_w
total | Construction | Development | Operating | |
---|---|---|---|---|
owner | ||||
Woolnorth Renewables | 400.0 | 0.0 | 400.0 | 0.0 |
Windlab, Eurus | 43.2 | 0.0 | 0.0 | 43.2 |
Windlab | 5,300.0 | 0.0 | 5,300.0 | 0.0 |
Wind Prospect, Engie | 350.0 | 0.0 | 350.0 | 0.0 |
Wind Prospect | 700.0 | 0.0 | 700.0 | 0.0 |
... | ... | ... | ... | ... |
Acciona | 3,584.0 | 0.0 | 3,056.0 | 528.0 |
AGL Energy | 450.8 | 0.0 | 360.0 | 90.8 |
ACEN | 3,489.0 | 0.0 | 3,489.0 | 0.0 |
ACE Power | 150.0 | 0.0 | 150.0 | 0.0 |
ACCIONA Energia (50%), EnergyAustralia (50%) | 64.0 | 0.0 | 0.0 | 64.0 |
115 rows × 4 columns
=bys.loc['Solar PV']
by_s by_s
total | Construction | Development | Operating | |
---|---|---|---|---|
owner | ||||
ib vogt GmbH | 433.0 | 0.0 | 433.0 | 0.0 |
getgreen, Hareon Solar | 50.0 | 0.0 | 50.0 | 0.0 |
akaysha energy, Wind Prospect | 200.0 | 0.0 | 200.0 | 0.0 |
ZEN Energy | 200.0 | 0.0 | 200.0 | 0.0 |
Yancoal | 330.0 | 0.0 | 330.0 | 0.0 |
... | ... | ... | ... | ... |
AMP Energy | 895.0 | 0.0 | 780.0 | 115.0 |
AGL Energy | 5.0 | 5.0 | 0.0 | 0.1 |
ACEnergy | 59.9 | 0.0 | 34.9 | 25.0 |
ACEN | 2,540.0 | 720.0 | 1,420.0 | 400.0 |
ACE Power | 631.0 | 0.0 | 631.0 | 0.0 |
238 rows × 4 columns
= by_s.index.sort_values()
test = set()
deduplicated_data = set()
duplicates for i in range(1,len(test)):
= test[i-1]
a = test[i]
b if a[0:7] == b[0:7]:
duplicates.add(a)
duplicates.add(b)
20) by_w.head(
20) by_s.head(
= by_w.sort_values('total',ascending = False).head(25)
big_w = by_s.sort_values('total', ascending = False).head(25)
big_s = pd.concat([big_w,big_s])
big big
= True)
big.sort_index(inplace big
= big.index.unique()
bigco bigco
Index(['ACEN', 'AMP Energy', 'Acciona', 'Ark Energy', 'BJEI', 'BayWa r.e.',
'Bison Energy', 'Canadian Solar', 'Copenhagen Infrastructure Partners',
'EIWA Energy', 'Edify Energy', 'Engie', 'European Energy',
'FRV Australia', 'GENEX, J-POWER', 'Global Power Generation Australia',
'Goldwind Australia', 'Iberdrola', 'Lightsource BP', 'Macorna Energy',
'Mainstream Renewables', 'Mytilineos', 'Neoen',
'Octopus Investments, CEFC', 'Origin Energy',
'Pacific Blue (Pacific Hydro)', 'RATCH-Australia', 'RES',
'RWE Renewables', 'Renewable Energy Partners', 'Samsung C&T',
'SkyLab Australia', 'Spark Renewables', 'Squadron Energy',
'Sunshine Energy', 'Tetris Energy', 'Tilt Renewables',
'Tilt Renewables, CleanSight', 'Vena Energy Services', 'Vestas',
'Virya Energy', 'WestWind Energy', 'Windlab', 'X-ELIO'],
dtype='object', name='owner')
= projects["owner"].isin(bigco)
bigmask = projects[bigmask]
leaders leaders
'project',inplace = True)
leaders.set_index( leaders
= True)
leaders.reset_index(inplace = pd.pivot_table(leaders,index = ['developer','technology','project'], columns =['state','status'],values='mw',aggfunc = 'sum',margins = True)
leadpiv leadpiv
0,inplace=True)
leadpiv.fillna( leadpiv
= leadpiv.groupby(level = 0).sum().sort_values("All",ascending = False)
lead_sort 15) lead_sort.head(
= lead_sort.head(15)
top_lead_sort top_lead_sort.index
Index(['All', 'Copenhagen Infrastructure Partners', 'Ark Energy', 'Neoen',
'ACEN', 'RES', 'Windlab', 'Spark Renewables', 'Acciona',
'WestWind Energy', 'Lightsource BP', 'Renewable Energy Partners',
'Virya Energy', 'Engie', 'BayWa r.e.'],
dtype='object', name='developer')
leadpiv.head()
= leadpiv.index.isin(top_lead_sort.index,level=0)
bigmask = leadpiv[bigmask]
top15 top15
= top_lead_sort.index.values.tolist()
bysize bysize
['All',
'Copenhagen Infrastructure Partners',
'Ark Energy',
'Neoen',
'ACEN',
'RES',
'Windlab',
'Spark Renewables',
'Acciona',
'WestWind Energy',
'Lightsource BP',
'Renewable Energy Partners',
'Virya Energy',
'Engie',
'BayWa r.e.']
= top15.reindex(bysize,level=0)
top2 top2
'Ark Energy'] top2.loc[
= top2.drop("All",level=0) topdev
def df_to_space(x):
= "%.0f" % x
x if len(x) > 3:
= x[:-3]+','+x[-3:]
x if x == '0':
= ''
x return x
= topdev.groupby(["developer",]).sum().sort_values(["All"],ascending = False)
td
td.to_clipboard()
#for col in td.columns:
#td[col]= td[col].loc[td[col].str.contains('.')].str.split('.').str[0]
format (formatter = df_to_space).set_caption("Major developer projects MW").set_table_styles(styles) td.style.
"status"],axis=1).sum() topdev.groupby([
/var/folders/n_/ndsz23n10w355k9ys04xltkm0000gp/T/ipykernel_11569/2712648979.py:1: FutureWarning: DataFrame.groupby with axis=1 is deprecated. Do `frame.T.groupby(...)` without axis instead.
topdev.groupby(["status"],axis=1).sum()
status | Construction | Development | Operating | |||
---|---|---|---|---|---|---|
developer | technology | project | ||||
Copenhagen Infrastructure Partners | Onshore Wind | Evergreen Hydrogen Production Hub (Wind) | 10,000.0 | 0.0 | 10,000.0 | 0.0 |
Solar PV | Evergreen Hydrogen Production Hub (Solar) | 4,000.0 | 0.0 | 4,000.0 | 0.0 | |
Ark Energy | Onshore Wind | Boomer Green Energy Hub | 1,100.0 | 0.0 | 1,100.0 | 0.0 |
Bowmans Creek Wind Farm | 335.0 | 0.0 | 335.0 | 0.0 | ||
Burrendong Wind Farm | 500.0 | 0.0 | 500.0 | 0.0 | ||
... | ... | ... | ... | ... | ... | ... |
BayWa r.e. | Solar PV | Jung Solar Farm | 13.0 | 0.0 | 13.0 | 0.0 |
Kelsey Creek Solar Farm | 50.0 | 0.0 | 50.0 | 0.0 | ||
Sandy Creek Solar Farm (BayWa) | 12.0 | 0.0 | 12.0 | 0.0 | ||
Yarren Hut Solar Farm | 28.0 | 0.0 | 28.0 | 0.0 | ||
Yatpool Solar Farm | 94.0 | 0.0 | 0.0 | 94.0 |
140 rows × 4 columns
td2.columns
Index(['NSW', 'QLD', 'SA', 'TAS', 'VIC', 'total'], dtype='object', name='state')
= topdev.xs("Development",level=1,axis=1)
td2 = ['NSW', 'QLD', 'SA', 'TAS', 'VIC']
states 'total'] = td2[states].sum(axis=1) td2[
/var/folders/n_/ndsz23n10w355k9ys04xltkm0000gp/T/ipykernel_11569/1427645528.py:3: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
td2['total'] = td2[states].sum(axis=1)
'total',ascending=False,inplace=True) td2.sort_values(
/var/folders/n_/ndsz23n10w355k9ys04xltkm0000gp/T/ipykernel_11569/2537107422.py:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
td2.sort_values('total',ascending=False,inplace=True)
= td2.loc[(td2!=0).any(axis=1)]
td3 = td3.loc[td3['total'] > 90]
td4 td4
state | NSW | QLD | SA | TAS | VIC | total | ||
---|---|---|---|---|---|---|---|---|
developer | technology | project | ||||||
Copenhagen Infrastructure Partners | Onshore Wind | Evergreen Hydrogen Production Hub (Wind) | 0.0 | 0.0 | 10,000.0 | 0.0 | 0.0 | 10,000.0 |
Solar PV | Evergreen Hydrogen Production Hub (Solar) | 0.0 | 0.0 | 4,000.0 | 0.0 | 0.0 | 4,000.0 | |
Ark Energy | Onshore Wind | Collinsville Green Energy Hub | 0.0 | 3,000.0 | 0.0 | 0.0 | 0.0 | 3,000.0 |
WestWind Energy | Onshore Wind | Warracknabeal Energy Park | 0.0 | 0.0 | 0.0 | 0.0 | 1,650.0 | 1,650.0 |
Engie | Onshore Wind | The Plains Wind Farm | 1,500.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1,500.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
WestWind Energy | Onshore Wind | Normanville Energy Park | 0.0 | 0.0 | 0.0 | 0.0 | 122.0 | 122.0 |
Engie | Solar PV | Silverleaf Solar Farm | 120.0 | 0.0 | 0.0 | 0.0 | 0.0 | 120.0 |
Lightsource BP | Solar PV | Mannum Solar Farm | 0.0 | 0.0 | 100.0 | 0.0 | 0.0 | 100.0 |
Engie | Solar PV | Yass Solar Energy Park | 100.0 | 0.0 | 0.0 | 0.0 | 0.0 | 100.0 |
Neoen | Solar PV | Lemnos Solar Farm | 0.0 | 0.0 | 0.0 | 0.0 | 100.0 | 100.0 |
96 rows × 6 columns
'Copenhagen Infrastructure Partners',level = 0, inplace = True)
td4.drop( td4
/var/folders/n_/ndsz23n10w355k9ys04xltkm0000gp/T/ipykernel_11569/1425947989.py:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
td4.drop('Copenhagen Infrastructure Partners',level = 0, inplace = True)
state | NSW | QLD | SA | TAS | VIC | total | ||
---|---|---|---|---|---|---|---|---|
developer | technology | project | ||||||
Ark Energy | Onshore Wind | Collinsville Green Energy Hub | 0.0 | 3,000.0 | 0.0 | 0.0 | 0.0 | 3,000.0 |
WestWind Energy | Onshore Wind | Warracknabeal Energy Park | 0.0 | 0.0 | 0.0 | 0.0 | 1,650.0 | 1,650.0 |
Engie | Onshore Wind | The Plains Wind Farm | 1,500.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1,500.0 |
Virya Energy | Onshore Wind | Yanco Delta Wind Farm | 1,500.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1,500.0 |
Windlab | Onshore Wind | Bungaban Wind Farm (Southern QLD Renewable Generation Hub) | 0.0 | 1,400.0 | 0.0 | 0.0 | 0.0 | 1,400.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
WestWind Energy | Onshore Wind | Normanville Energy Park | 0.0 | 0.0 | 0.0 | 0.0 | 122.0 | 122.0 |
Engie | Solar PV | Silverleaf Solar Farm | 120.0 | 0.0 | 0.0 | 0.0 | 0.0 | 120.0 |
Lightsource BP | Solar PV | Mannum Solar Farm | 0.0 | 0.0 | 100.0 | 0.0 | 0.0 | 100.0 |
Engie | Solar PV | Yass Solar Energy Park | 100.0 | 0.0 | 0.0 | 0.0 | 0.0 | 100.0 |
Neoen | Solar PV | Lemnos Solar Farm | 0.0 | 0.0 | 0.0 | 0.0 | 100.0 | 100.0 |
94 rows × 6 columns
td4.describe()
state | NSW | QLD | SA | TAS | VIC | total |
---|---|---|---|---|---|---|
count | 94.0 | 94.0 | 94.0 | 94.0 | 94.0 | 94.0 |
mean | 226.8 | 178.5 | 23.5 | 36.6 | 102.7 | 568.0 |
std | 341.5 | 429.4 | 113.2 | 171.9 | 298.4 | 450.6 |
min | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 100.0 |
25% | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 266.2 |
50% | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 400.0 |
75% | 400.0 | 172.5 | 0.0 | 0.0 | 0.0 | 794.0 |
max | 1,500.0 | 3,000.0 | 850.0 | 1,260.0 | 1,650.0 | 3,000.0 |
# itk teal style for data frame doesn't print the borders properly in vscode but does in jupyter
= [dict(selector="caption",
styles =[("text-align", "left"),
props"font-size", "150%"),
("color", 'white'),
("background-color", "teal"),
("caption-side", "top")]),
(dict(selector = "",props =[("color","grey"),("background-color",'white'),('border-bottom', '1px dotted grey'),('font-size','80%')]),
dict(selector = "th",props =[("background-color",'#00DCDC'),('border-bottom', '1px dotted grey'),('min_width','50px')]),
dict(selector = "tr",props =[("background-color",'white'),('border-bottom', '1px dotted grey'),("color","black")]),
dict(selector = ".blank",props =[("background-color",'#00DCDC')]),
dict(selector = "th.col_heading",props =[("color",'black'),('font-size','100%'),("background-color",'#00DCDC'),('width','60px')]),
dict( selector = "tr:last-child", props =[("color","black"),('border-bottom', '5px black')])]
projects.columns
Index(['project', 'technology', 'mw', 'mwh', 'turbines', 'developer', 'status',
'da', 'state', 'owner', 'council'],
dtype='object')
= projects.loc[projects['status'] == 'Development'] development
development.describe()
mw | mwh | turbines | |
---|---|---|---|
count | 616.0 | 1.0 | 137.0 |
mean | 269.2 | 3,600.0 | 91.7 |
std | 525.8 | NaN | 141.1 |
min | 1.2 | 3,600.0 | 2.0 |
25% | 29.8 | 3,600.0 | 41.0 |
50% | 120.0 | 3,600.0 | 63.0 |
75% | 350.0 | 3,600.0 | 117.0 |
max | 10,000.0 | 3,600.0 | 1,600.0 |
= pd.pivot_table(development, index = ['technology','developer'],columns=['state','da'],values='mw',aggfunc = 'sum', margins = True) da
'technology').sum() da.groupby(
state | NSW | QLD | SA | TAS | VIC | All | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
da | Approved | Assessment | Not Approved | Not Submitted | Withdrawn | Approved | Assessment | Not Submitted | Approved | Assessment | Not Submitted | Approved | Assessment | Not Submitted | Approved | Assessment | Not Approved | Not Submitted | |
technology | |||||||||||||||||||
All | 7,171.1 | 32,364.8 | 10.0 | 11,744.8 | 1,365.0 | 21,505.9 | 1,450.0 | 33,664.0 | 4,694.4 | 559.9 | 19,104.5 | 1,529.0 | 2,310.0 | 2,379.0 | 7,796.9 | 5,988.9 | 75.0 | 12,076.8 | 165,790.1 |
Onshore Wind | 2,220.0 | 19,092.0 | 0.0 | 5,052.0 | 0.0 | 8,163.0 | 0.0 | 25,323.2 | 1,935.0 | 270.0 | 13,710.0 | 1,196.0 | 2,310.0 | 2,091.0 | 1,914.4 | 4,824.0 | 0.0 | 9,286.0 | 97,386.6 |
Solar PV | 4,951.1 | 13,272.9 | 10.0 | 6,692.8 | 1,365.0 | 13,342.9 | 1,450.0 | 8,340.8 | 2,759.4 | 289.9 | 5,394.5 | 333.0 | 0.0 | 288.0 | 5,882.5 | 1,165.0 | 75.0 | 2,790.8 | 68,403.5 |
= pd.IndexSlice
idx = da.loc[:,idx[:,["Approved","Assessment"]]]
da da
state | NSW | QLD | SA | TAS | VIC | ||||||
---|---|---|---|---|---|---|---|---|---|---|---|
da | Approved | Assessment | Approved | Assessment | Approved | Assessment | Approved | Assessment | Approved | Assessment | |
technology | developer | ||||||||||
Onshore Wind | ACE Power | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
ACEN | NaN | 1,100.0 | NaN | NaN | NaN | NaN | 1,129.0 | 1,260.0 | NaN | NaN | |
AGL Energy | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
Acciona | NaN | 1,406.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
Ark Energy | 335.0 | 840.0 | 971.0 | NaN | NaN | NaN | NaN | 1,050.0 | NaN | NaN | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
Solar PV | akaysha energy, Wind Prospect | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
cogency | 5.7 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 500.0 | NaN | |
getgreen, Hareon Solar | NaN | NaN | 50.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
ib vogt GmbH | 127.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 74.0 | NaN | |
All | 7,171.1 | 32,364.8 | 21,505.9 | 1,450.0 | 4,694.4 | 559.9 | 1,529.0 | 2,310.0 | 7,796.9 | 5,988.9 |
287 rows × 10 columns
'technology']).sum() da.groupby([
state | NSW | QLD | SA | TAS | VIC | |||||
---|---|---|---|---|---|---|---|---|---|---|
da | Approved | Assessment | Approved | Assessment | Approved | Assessment | Approved | Assessment | Approved | Assessment |
technology | ||||||||||
All | 7,171.1 | 32,364.8 | 21,505.9 | 1,450.0 | 4,694.4 | 559.9 | 1,529.0 | 2,310.0 | 7,796.9 | 5,988.9 |
Onshore Wind | 2,220.0 | 19,092.0 | 8,163.0 | 0.0 | 1,935.0 | 270.0 | 1,196.0 | 2,310.0 | 1,914.4 | 4,824.0 |
Solar PV | 4,951.1 | 13,272.9 | 13,342.9 | 1,450.0 | 2,759.4 | 289.9 | 333.0 | 0.0 | 5,882.5 | 1,165.0 |
'All',inplace = True,level = 0)
da.drop( da
'technology']).sum() da.groupby([
state | NSW | QLD | SA | TAS | VIC | |||||
---|---|---|---|---|---|---|---|---|---|---|
da | Approved | Assessment | Approved | Assessment | Approved | Assessment | Approved | Assessment | Approved | Assessment |
technology | ||||||||||
Onshore Wind | 2,220.0 | 19,092.0 | 8,163.0 | 0.0 | 1,935.0 | 270.0 | 1,196.0 | 2,310.0 | 1,914.4 | 4,824.0 |
Solar PV | 4,951.1 | 13,272.9 | 13,342.9 | 1,450.0 | 2,759.4 | 289.9 | 333.0 | 0.0 | 5,882.5 | 1,165.0 |
= da.loc['Onshore Wind'].sum()
windcht =pd.DataFrame(data = windcht)
a #a.droplevel(level=0, axis=1)
a.shape= [''] * len(a.columns)
a.columns
a= ['mw'] a.columns
= a.unstack()
b b
m2 | ||
---|---|---|
da | Approved | Assessment |
state | ||
NSW | 2,220.0 | 19,092.0 |
QLD | 8,163.0 | 0.0 |
SA | 1,935.0 | 270.0 |
TAS | 1,196.0 | 2,310.0 |
VIC | 1,914.4 | 4,824.0 |
=b.droplevel(level=0, axis=1)
c c
da | Approved | Assessment |
---|---|---|
state | ||
NSW | 2,220.0 | 19,092.0 |
QLD | 8,163.0 | 0.0 |
SA | 1,935.0 | 270.0 |
TAS | 1,196.0 | 2,310.0 |
VIC | 1,914.4 | 4,824.0 |
=c.T
d= None
d.index.name d
state | NSW | QLD | SA | TAS | VIC |
---|---|---|---|---|---|
Approved | 2,220.0 | 8,163.0 | 1,935.0 | 1,196.0 | 1,914.4 |
Assessment | 19,092.0 | 0.0 | 270.0 | 2,310.0 | 4,824.0 |
'font.size'] = 10
plt.rcParams[
plt.style.use (matplotx.styles.dracula)
# need to account for battery charging
= "Wind farms"
titlestring = plt.subplots(figsize = (5,2))#1,5,sharey = True,
fig, ax ='bar',legend = True ,ylabel ="mw",title=titlestring, ax = ax,grid=False)#layout =(1,8)sharey = Truefigsize = (14,4)
d.T.plot(kind=True, right=True, left=True, bottom=True)
sns.despine(top plt.show()
windcht.dtype
dtype('float64')
projects.columns
Index(['project', 'technology', 'mw', 'mwh', 'turbines', 'developer', 'status',
'da', 'state', 'owner', 'council'],
dtype='object')
'state'].unique() projects[
= projects.loc[(projects['technology'] == 'Onshore Wind') & (projects['da'] == 'Approved') & (projects['mw']>90) & (projects['status'] == 'Development')]
approved_wind
= pd.pivot_table(approved_wind,index=['developer','project'],columns='state',values='mw',fill_value='',margins=True,aggfunc='sum')
wind_ready
wind_ready.sort_index()= wind_ready.drop('All')
wind_ready wind_ready
import matplotlib as mpl
= 8
label_size 'ytick.labelsize'] = label_size
mpl.rcParams[= plt.subplots()
fig, ax 6,6)
fig.set_size_inches(
'All').plot(kind = 'barh', title='Approved wind farms by size',ylabel = 'mw',legend = False, ax = ax)
wind_ready.sort_values(#ax.set_yticks(yticklabels,fontsize = 10)
'right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines[ plt.show()
= plt.subplots()
fig, ax 6,4)
fig.set_size_inches(= wind_ready.groupby(['developer'])['All'].sum().to_frame(name='mw').sort_values('mw',ascending = False)
wrd = 'bar', title='Developers with approved wind farms',ylabel = 'mw',legend = False, ax = ax)
wrd.plot(kind 'right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines[ plt.show()
# itk teal style for data frame doesn't print the borders properly in vscode but does in jupyter
= [dict(selector="caption",
styles =[("text-align", "left"),
props"font-size", "150%"),
("color", 'white'),
("background-color", "teal"),
("caption-side", "top")]),
(dict(selector = "",props =[("color","grey"),("background-color",'white'),('border-bottom', '1px dotted grey')]),
dict(selector = "th",props =[("background-color",'white'),('border-bottom', '1px dotted grey'),('min_width','300px')]),
dict(selector = "tr",props =[("background-color",'white'),('border-bottom', '1px dotted grey'),("color","black")]),
dict(selector = ".blank",props =[("background-color",'#00DCDC')]),
dict(selector = "th.col_heading",props =[("color",'black'),('font-size','110%'),("background-color",'#00DCDC')]),
dict( selector = "tr:last-child", props =[("color","black"),('border-bottom', '5px black')])]
= projects.loc[(projects['technology'] == 'Solar PV') & (projects['da'] == 'Approved') & (projects['mw']>90) & (projects['status'] == 'Development')]
approved_solar
= pd.pivot_table(approved_solar,index=['developer','project'],columns='state',values='mw',fill_value='',margins=True,aggfunc='sum')
solar_ready
solar_ready.sort_index()= solar_ready.drop('All')
solar_ready solar_ready
"All",ascending=False).head(20).to_clipboard() solar_ready.sort_values(
= plt.subplots()
fig, ax 6,4)
fig.set_size_inches(= solar_ready.groupby(['developer'])['All'].sum().to_frame(name='mw').sort_values('mw',ascending = False)
srd = len(srd)
sdt = srd.head(15)
srd = 'bar', title='Top 15 out of '+str(sdt)+' developers with approved solar farms',ylabel = 'mw',legend = False, ax = ax)
srd.plot(kind 'right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines[ plt.show()