# setup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import re
import datetime as dt
#sns.set(style="darkgrid")
pd.options.display.float_format="{:,.1f}".format
nem_path = '/Users/davidleitch/Library/Mobile Documents/com~apple~CloudDocs/nem review/'
from matplotlib.ticker import FormatStrFormatter
#from scipy.optimize import minimize
#from scipy.optimize import Bounds
from scipy import stats
from matplotlib.ticker import FormatStrFormatter
import matplotlib.ticker as plticker
from matplotlib.dates import DateFormatter  
import matplotlib.dates as mdates
import matplotx
import xlwings as xw
from xlwings import view
from xlwings import load
import string
import holoviews as hv
#import hvplot.pandas
hv.extension('bokeh')
import panel as pn
import bokeh
import jupyter_bokeh
import plotly.graph_objects as go
pn.extension('plotly')
pn.extension(defer_load=True, loading_indicator=True)
file  ='/Users/davidleitch/Dropbox/the analyst/Price forecast current models/RenewMap-3.csv'

projects = pd.read_csv(file)
projects.head()
Project Name Technology Project Size (MW) Project Storage Size (MWh) Turbines Developer Project Status DA Status EPBC Code EPBC Link State Owner OpenNEM Council Area Network
0 North Star Junction Solar Farm Solar PV 100.0 NaN NaN Fortescue Metals Group Construction Approved NaN NaN WA Fortescue Metals Group NaN Port Hedland NaN
1 Port Hedland Solar Project Solar PV 45.0 NaN NaN Alinta Energy Construction Approved 2022/09241 https://epbcpublicportal.awe.gov.au/all-referr... WA APA Group NaN Port Hedland NWIS
2 Yuri Renewable Hydrogen to Ammonia Project (So... Solar PV 18.0 NaN NaN Engie Construction Approved 2020/8739 https://epbcpublicportal.awe.gov.au/all-referr... WA ENGIE, Mitsui NaN Karratha NWIS
3 Amrun Solar Farm Solar PV 12.4 NaN NaN Rio Tinto Construction Approved NaN NaN QLD Rio Tinto NaN Cook Off Grid
4 Bellevue Gold Hybrid (Solar) Solar PV NaN NaN NaN Zenith Energy Construction Approved NaN NaN WA Zenith Energy NaN Leonora Off Grid
projects.columns
Index(['Project Name', 'Technology', 'Project Size (MW)',
       'Project Storage Size (MWh)', 'Turbines', 'Developer', 'Project Status',
       'DA Status', 'EPBC Code', 'EPBC Link', 'State', 'Owner', 'OpenNEM',
       'Council Area', 'Network'],
      dtype='object')
project_names = ['project',"technology",'mw','mwh','turbines','developer','status','da','epbc','epbc_link','state','owner','open_nem','council','network']
projects.columns = project_names
projects.columns
Index(['project', 'technology', 'mw', 'mwh', 'turbines', 'developer', 'status',
       'da', 'epbc', 'epbc_link', 'state', 'owner', 'open_nem', 'council',
       'network'],
      dtype='object')
temp=list(range(1,100)) # has to be integers
percentile=[x/100 for x in temp]
s1 = projects['mw'].quantile(q=percentile)
s1.plot()
projects.sort_values(by = 'mw',ascending = False, inplace = True)
keep_col = ['project', 'technology', 'mw', 'mwh', 'turbines', 'developer', 'status','da',  'state', 'owner',  'council']
projects2 = projects.copy()
projects = projects2[keep_col]
projects.loc[(projects['mw'] > 49) &  (projects['mw'] < 3001) ]
projects.describe()
mw mwh turbines
count 1,068.0 1.0 279.0
mean 299.3 3,600.0 80.3
std 1,564.2 NaN 214.6
min 0.1 3,600.0 1.0
25% 9.0 3,600.0 22.0
50% 80.0 3,600.0 47.0
75% 250.0 3,600.0 81.0
max 30,000.0 3,600.0 3,000.0
projects['state'].unique()
array(['WA', 'NT', 'SA', 'QLD', 'VIC', 'NSW', 'TAS', 'ACT'], dtype=object)
nem = ['SA', 'QLD', 'VIC', 'NSW', 'TAS']
projects3=projects.copy()
kept = projects3['state'].isin(nem)
projects = projects3[kept].copy()
projects.describe()
mw mwh turbines
count 935.0 1.0 243.0
mean 205.8 3,600.0 69.8
std 441.0 NaN 111.0
min 0.1 3,600.0 1.0
25% 10.0 3,600.0 27.5
50% 90.0 3,600.0 48.0
75% 262.5 3,600.0 89.0
max 10,000.0 3,600.0 1,600.0
projects.head()
project technology mw mwh turbines developer status da state owner council
1062 Evergreen Hydrogen Production Hub (Wind) Onshore Wind 10,000.0 NaN 1,600.0 Copenhagen Infrastructure Partners Development Not Submitted SA Copenhagen Infrastructure Partners Unincorporated SA
410 Evergreen Hydrogen Production Hub (Solar) Solar PV 4,000.0 NaN NaN Copenhagen Infrastructure Partners Development Not Submitted SA Copenhagen Infrastructure Partners Unincorporated SA
924 Collinsville Green Energy Hub Onshore Wind 3,000.0 NaN NaN Ark Energy Development Not Submitted QLD Ark Energy Whitsunday
964 Warracknabeal Energy Park Onshore Wind 1,650.0 NaN 211.0 WestWind Energy Development Assessment VIC WestWind Energy Yarriambiack
966 The Plains Wind Farm Onshore Wind 1,500.0 NaN 201.0 Engie Development Assessment NSW Engie Edward River, Hay
temp=list(range(1,100)) # has to be integers
percentile=[x/100 for x in temp]
s1 = projects['mw'].quantile(q=percentile)
s1.plot()
projects.status.unique()
array(['Development', 'Construction', 'Operating'], dtype=object)
fp = projects.copy()
fp.sort_values("owner")
fp_owners = fp["owner"]
# pivot
piv = pd.pivot_table(projects,index = ['developer','technology','project'], columns =['state','status'],values='mw',aggfunc = 'sum')
#(pd.concat([piv,piv.query("'technology != 'total'").sum(level =0).assign('technology'='Total').set_index('technology',append=True)]).sort_index())

piv2 =pd.pivot_table(projects,index = ['developer','technology'], columns =['state','status'],values='mw',aggfunc = 'sum')
piv3 = pd.concat([piv,piv2],axis=0)
piv3.tail(20)
piv2['total']=piv2.sum(axis=1)
piv2.head()
piv4 = piv2.groupby(['technology','developer','total']).sum().sort_values(['technology','total'],ascending=False)
piv4
piv4.head(30)
piv4
# try a second pivot to get sub totals 
p1 = pd.pivot_table(projects,index = ['developer','technology','project'], columns =['state','status'],values='mw',fill_value=0,dropna=True,aggfunc = "sum")
p1['total']=p1.sum(axis=1)
p1
pd.concat([d.append(d.sum().rename((k,'Total')))for k,d in p1.groupby(level=0)]).append(p1.sum().rename(('Grand','Total')))
pivg = piv.groupby(['developer','technology']).sum()
pivg
piv2
state NSW QLD SA TAS VIC total
status Construction Development Operating Construction Development Operating Construction Development Operating Development Operating Construction Development Operating
technology
Onshore Wind 1,013.1 26,364.0 2,234.9 1,625.0 33,486.2 1,025.7 412.0 15,915.0 2,340.9 5,597.0 559.4 1,071.0 16,024.4 4,377.1 112,045.7
Solar PV 2,102.9 26,332.7 3,688.5 206.4 23,133.7 3,519.0 10.9 8,443.8 797.9 621.0 5.0 278.9 9,918.2 1,311.7 80,370.6
total 3,116.1 52,696.7 5,923.4 1,831.4 56,619.9 4,544.7 422.9 24,358.8 3,138.8 6,218.0 564.4 1,350.0 25,942.6 5,688.8 192,416.3
byowner =pd.pivot_table(projects,index = ['owner','technology'], columns =['state','status'],values='mw',margins=True,margins_name = 'total',aggfunc = "sum")
byowner
byowner_status = byowner.groupby(level = 'status',axis=1).sum()
byowner_status.columns = ['total', 'Construction', 'Development', 'Operating']
byowner_status.sort_values(by = 'total',ascending = False)#by = 'mw',ascending = False, inplace = True
/var/folders/n_/ndsz23n10w355k9ys04xltkm0000gp/T/ipykernel_11569/2632836213.py:1: FutureWarning: DataFrame.groupby with axis=1 is deprecated. Do `frame.T.groupby(...)` without axis instead.
  byowner_status = byowner.groupby(level = 'status',axis=1).sum()
total Construction Development Operating
owner technology
total 189,258.0 6,720.4 162,765.6 19,772.1
Copenhagen Infrastructure Partners Onshore Wind 10,500.0 0.0 10,500.0 0.0
Ark Energy Onshore Wind 6,661.0 0.0 6,661.0 0.0
Windlab Onshore Wind 5,300.0 0.0 5,300.0 0.0
Neoen Onshore Wind 4,584.0 412.0 3,821.0 351.0
... ... ... ... ... ...
Forestry Corporation NSW Onshore Wind 0.0 0.0 0.0 0.0
Gippsland Water Onshore Wind 0.0 0.0 0.0 0.0
LP Renewable Projects Onshore Wind 0.0 0.0 0.0 0.0
Marble Energy Onshore Wind 0.0 0.0 0.0 0.0
NIFSAN Developments Solar PV 0.0 0.0 0.0 0.0

354 rows × 4 columns

bys = byowner_status.swaplevel('owner','technology')
bys.sort_values(['technology','owner'],ascending=False,inplace=True)
bys
total Construction Development Operating
technology owner
Solar PV ib vogt GmbH 433.0 0.0 433.0 0.0
getgreen, Hareon Solar 50.0 0.0 50.0 0.0
akaysha energy, Wind Prospect 200.0 0.0 200.0 0.0
ZEN Energy 200.0 0.0 200.0 0.0
Yancoal 330.0 0.0 330.0 0.0
... ... ... ... ... ...
Onshore Wind AGL Energy 450.8 0.0 360.0 90.8
ACEN 3,489.0 0.0 3,489.0 0.0
ACE Power 150.0 0.0 150.0 0.0
ACCIONA Energia (50%), EnergyAustralia (50%) 64.0 0.0 0.0 64.0
total 189,258.0 6,720.4 162,765.6 19,772.1

354 rows × 4 columns

bys.sort_values(['technology','total'],ascending=False,inplace=True)
bys
total Construction Development Operating
technology owner
Solar PV Copenhagen Infrastructure Partners 4,012.0 0.0 4,012.0 0.0
Lightsource BP 3,240.0 0.0 3,240.0 0.0
FRV Australia 2,854.9 300.0 2,036.8 518.1
ACEN 2,540.0 720.0 1,420.0 400.0
GENEX, J-POWER 2,475.0 0.0 2,475.0 0.0
... ... ... ... ... ...
Onshore Wind LP Renewable Projects 0.0 0.0 0.0 0.0
Gippsland Water 0.0 0.0 0.0 0.0
Forestry Corporation NSW 0.0 0.0 0.0 0.0
Dawn Renewables 0.0 0.0 0.0 0.0
total 189,258.0 6,720.4 162,765.6 19,772.1

354 rows × 4 columns

by_w = bys.loc['Onshore Wind']
by_w
total Construction Development Operating
owner
Woolnorth Renewables 400.0 0.0 400.0 0.0
Windlab, Eurus 43.2 0.0 0.0 43.2
Windlab 5,300.0 0.0 5,300.0 0.0
Wind Prospect, Engie 350.0 0.0 350.0 0.0
Wind Prospect 700.0 0.0 700.0 0.0
... ... ... ... ...
Acciona 3,584.0 0.0 3,056.0 528.0
AGL Energy 450.8 0.0 360.0 90.8
ACEN 3,489.0 0.0 3,489.0 0.0
ACE Power 150.0 0.0 150.0 0.0
ACCIONA Energia (50%), EnergyAustralia (50%) 64.0 0.0 0.0 64.0

115 rows × 4 columns

by_s =bys.loc['Solar PV']
by_s
total Construction Development Operating
owner
ib vogt GmbH 433.0 0.0 433.0 0.0
getgreen, Hareon Solar 50.0 0.0 50.0 0.0
akaysha energy, Wind Prospect 200.0 0.0 200.0 0.0
ZEN Energy 200.0 0.0 200.0 0.0
Yancoal 330.0 0.0 330.0 0.0
... ... ... ... ...
AMP Energy 895.0 0.0 780.0 115.0
AGL Energy 5.0 5.0 0.0 0.1
ACEnergy 59.9 0.0 34.9 25.0
ACEN 2,540.0 720.0 1,420.0 400.0
ACE Power 631.0 0.0 631.0 0.0

238 rows × 4 columns

test = by_s.index.sort_values()
deduplicated_data = set()
duplicates = set()
for i in range(1,len(test)):
    a = test[i-1]
    b = test[i]
    if a[0:7] == b[0:7]:
            duplicates.add(a)
            duplicates.add(b)
    
by_w.head(20)
by_s.head(20)
big_w = by_w.sort_values('total',ascending = False).head(25)
big_s = by_s.sort_values('total', ascending = False).head(25)
big = pd.concat([big_w,big_s])
big
big.sort_index(inplace = True)
big
bigco = big.index.unique()
bigco
Index(['ACEN', 'AMP Energy', 'Acciona', 'Ark Energy', 'BJEI', 'BayWa r.e.',
       'Bison Energy', 'Canadian Solar', 'Copenhagen Infrastructure Partners',
       'EIWA Energy', 'Edify Energy', 'Engie', 'European Energy',
       'FRV Australia', 'GENEX, J-POWER', 'Global Power Generation Australia',
       'Goldwind Australia', 'Iberdrola', 'Lightsource BP', 'Macorna Energy',
       'Mainstream Renewables', 'Mytilineos', 'Neoen',
       'Octopus Investments, CEFC', 'Origin Energy',
       'Pacific Blue (Pacific Hydro)', 'RATCH-Australia', 'RES',
       'RWE Renewables', 'Renewable Energy Partners', 'Samsung C&T',
       'SkyLab Australia', 'Spark Renewables', 'Squadron Energy',
       'Sunshine Energy', 'Tetris Energy', 'Tilt Renewables',
       'Tilt Renewables, CleanSight', 'Vena Energy Services', 'Vestas',
       'Virya Energy', 'WestWind Energy', 'Windlab', 'X-ELIO'],
      dtype='object', name='owner')
bigmask = projects["owner"].isin(bigco)
leaders = projects[bigmask]
leaders
leaders.set_index('project',inplace = True)
leaders
leaders.reset_index(inplace = True)
leadpiv = pd.pivot_table(leaders,index = ['developer','technology','project'], columns =['state','status'],values='mw',aggfunc = 'sum',margins = True)
leadpiv
leadpiv.fillna(0,inplace=True)
leadpiv
lead_sort = leadpiv.groupby(level = 0).sum().sort_values("All",ascending = False)
lead_sort.head(15)
top_lead_sort = lead_sort.head(15)
top_lead_sort.index
Index(['All', 'Copenhagen Infrastructure Partners', 'Ark Energy', 'Neoen',
       'ACEN', 'RES', 'Windlab', 'Spark Renewables', 'Acciona',
       'WestWind Energy', 'Lightsource BP', 'Renewable Energy Partners',
       'Virya Energy', 'Engie', 'BayWa r.e.'],
      dtype='object', name='developer')
leadpiv.head()
bigmask = leadpiv.index.isin(top_lead_sort.index,level=0)
top15 = leadpiv[bigmask]
top15
bysize = top_lead_sort.index.values.tolist()
bysize
['All',
 'Copenhagen Infrastructure Partners',
 'Ark Energy',
 'Neoen',
 'ACEN',
 'RES',
 'Windlab',
 'Spark Renewables',
 'Acciona',
 'WestWind Energy',
 'Lightsource BP',
 'Renewable Energy Partners',
 'Virya Energy',
 'Engie',
 'BayWa r.e.']
top2 = top15.reindex(bysize,level=0)
top2
top2.loc['Ark Energy']
topdev = top2.drop("All",level=0)
def df_to_space(x):
    x = "%.0f" % x
    if len(x) > 3:
        x = x[:-3]+','+x[-3:]
    if x == '0':
        x = ''
    return x
td = topdev.groupby(["developer",]).sum().sort_values(["All"],ascending = False)
td.to_clipboard()


#for col in td.columns:
    #td[col]= td[col].loc[td[col].str.contains('.')].str.split('.').str[0]
td.style.format (formatter = df_to_space).set_caption("Major developer projects MW").set_table_styles(styles)
topdev.groupby(["status"],axis=1).sum()
/var/folders/n_/ndsz23n10w355k9ys04xltkm0000gp/T/ipykernel_11569/2712648979.py:1: FutureWarning: DataFrame.groupby with axis=1 is deprecated. Do `frame.T.groupby(...)` without axis instead.
  topdev.groupby(["status"],axis=1).sum()
status Construction Development Operating
developer technology project
Copenhagen Infrastructure Partners Onshore Wind Evergreen Hydrogen Production Hub (Wind) 10,000.0 0.0 10,000.0 0.0
Solar PV Evergreen Hydrogen Production Hub (Solar) 4,000.0 0.0 4,000.0 0.0
Ark Energy Onshore Wind Boomer Green Energy Hub 1,100.0 0.0 1,100.0 0.0
Bowmans Creek Wind Farm 335.0 0.0 335.0 0.0
Burrendong Wind Farm 500.0 0.0 500.0 0.0
... ... ... ... ... ... ...
BayWa r.e. Solar PV Jung Solar Farm 13.0 0.0 13.0 0.0
Kelsey Creek Solar Farm 50.0 0.0 50.0 0.0
Sandy Creek Solar Farm (BayWa) 12.0 0.0 12.0 0.0
Yarren Hut Solar Farm 28.0 0.0 28.0 0.0
Yatpool Solar Farm 94.0 0.0 0.0 94.0

140 rows × 4 columns

td2.columns
Index(['NSW', 'QLD', 'SA', 'TAS', 'VIC', 'total'], dtype='object', name='state')
td2 = topdev.xs("Development",level=1,axis=1)
states = ['NSW', 'QLD', 'SA', 'TAS', 'VIC']
td2['total'] = td2[states].sum(axis=1)
/var/folders/n_/ndsz23n10w355k9ys04xltkm0000gp/T/ipykernel_11569/1427645528.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  td2['total'] = td2[states].sum(axis=1)
td2.sort_values('total',ascending=False,inplace=True)
/var/folders/n_/ndsz23n10w355k9ys04xltkm0000gp/T/ipykernel_11569/2537107422.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  td2.sort_values('total',ascending=False,inplace=True)
td3 = td2.loc[(td2!=0).any(axis=1)]
td4 = td3.loc[td3['total'] > 90]
td4
state NSW QLD SA TAS VIC total
developer technology project
Copenhagen Infrastructure Partners Onshore Wind Evergreen Hydrogen Production Hub (Wind) 0.0 0.0 10,000.0 0.0 0.0 10,000.0
Solar PV Evergreen Hydrogen Production Hub (Solar) 0.0 0.0 4,000.0 0.0 0.0 4,000.0
Ark Energy Onshore Wind Collinsville Green Energy Hub 0.0 3,000.0 0.0 0.0 0.0 3,000.0
WestWind Energy Onshore Wind Warracknabeal Energy Park 0.0 0.0 0.0 0.0 1,650.0 1,650.0
Engie Onshore Wind The Plains Wind Farm 1,500.0 0.0 0.0 0.0 0.0 1,500.0
... ... ... ... ... ... ... ... ...
WestWind Energy Onshore Wind Normanville Energy Park 0.0 0.0 0.0 0.0 122.0 122.0
Engie Solar PV Silverleaf Solar Farm 120.0 0.0 0.0 0.0 0.0 120.0
Lightsource BP Solar PV Mannum Solar Farm 0.0 0.0 100.0 0.0 0.0 100.0
Engie Solar PV Yass Solar Energy Park 100.0 0.0 0.0 0.0 0.0 100.0
Neoen Solar PV Lemnos Solar Farm 0.0 0.0 0.0 0.0 100.0 100.0

96 rows × 6 columns

td4.drop('Copenhagen Infrastructure Partners',level = 0, inplace = True)
td4
/var/folders/n_/ndsz23n10w355k9ys04xltkm0000gp/T/ipykernel_11569/1425947989.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  td4.drop('Copenhagen Infrastructure Partners',level = 0, inplace = True)
state NSW QLD SA TAS VIC total
developer technology project
Ark Energy Onshore Wind Collinsville Green Energy Hub 0.0 3,000.0 0.0 0.0 0.0 3,000.0
WestWind Energy Onshore Wind Warracknabeal Energy Park 0.0 0.0 0.0 0.0 1,650.0 1,650.0
Engie Onshore Wind The Plains Wind Farm 1,500.0 0.0 0.0 0.0 0.0 1,500.0
Virya Energy Onshore Wind Yanco Delta Wind Farm 1,500.0 0.0 0.0 0.0 0.0 1,500.0
Windlab Onshore Wind Bungaban Wind Farm (Southern QLD Renewable Generation Hub) 0.0 1,400.0 0.0 0.0 0.0 1,400.0
... ... ... ... ... ... ... ... ...
WestWind Energy Onshore Wind Normanville Energy Park 0.0 0.0 0.0 0.0 122.0 122.0
Engie Solar PV Silverleaf Solar Farm 120.0 0.0 0.0 0.0 0.0 120.0
Lightsource BP Solar PV Mannum Solar Farm 0.0 0.0 100.0 0.0 0.0 100.0
Engie Solar PV Yass Solar Energy Park 100.0 0.0 0.0 0.0 0.0 100.0
Neoen Solar PV Lemnos Solar Farm 0.0 0.0 0.0 0.0 100.0 100.0

94 rows × 6 columns

td4.describe()
state NSW QLD SA TAS VIC total
count 94.0 94.0 94.0 94.0 94.0 94.0
mean 226.8 178.5 23.5 36.6 102.7 568.0
std 341.5 429.4 113.2 171.9 298.4 450.6
min 0.0 0.0 0.0 0.0 0.0 100.0
25% 0.0 0.0 0.0 0.0 0.0 266.2
50% 0.0 0.0 0.0 0.0 0.0 400.0
75% 400.0 172.5 0.0 0.0 0.0 794.0
max 1,500.0 3,000.0 850.0 1,260.0 1,650.0 3,000.0
# itk teal style for data frame doesn't print the borders properly in vscode but does in jupyter
styles  = [dict(selector="caption",
       props=[("text-align", "left"),
              ("font-size", "150%"),
              ("color", 'white'),
              ("background-color", "teal"),
              ("caption-side", "top")]),
              dict(selector = "",props =[("color","grey"),("background-color",'white'),('border-bottom', '1px dotted grey'),('font-size','80%')]),
              dict(selector = "th",props =[("background-color",'#00DCDC'),('border-bottom', '1px dotted grey'),('min_width','50px')]),
              dict(selector = "tr",props =[("background-color",'white'),('border-bottom', '1px dotted grey'),("color","black")]),
              dict(selector = ".blank",props =[("background-color",'#00DCDC')]),
              dict(selector = "th.col_heading",props =[("color",'black'),('font-size','100%'),("background-color",'#00DCDC'),('width','60px')]),
              dict( selector = "tr:last-child", props =[("color","black"),('border-bottom', '5px black')])]
projects.columns
Index(['project', 'technology', 'mw', 'mwh', 'turbines', 'developer', 'status',
       'da', 'state', 'owner', 'council'],
      dtype='object')
development = projects.loc[projects['status'] == 'Development']
development.describe()
mw mwh turbines
count 616.0 1.0 137.0
mean 269.2 3,600.0 91.7
std 525.8 NaN 141.1
min 1.2 3,600.0 2.0
25% 29.8 3,600.0 41.0
50% 120.0 3,600.0 63.0
75% 350.0 3,600.0 117.0
max 10,000.0 3,600.0 1,600.0
da = pd.pivot_table(development, index = ['technology','developer'],columns=['state','da'],values='mw',aggfunc = 'sum', margins = True)
da.groupby('technology').sum()
state NSW QLD SA TAS VIC All
da Approved Assessment Not Approved Not Submitted Withdrawn Approved Assessment Not Submitted Approved Assessment Not Submitted Approved Assessment Not Submitted Approved Assessment Not Approved Not Submitted
technology
All 7,171.1 32,364.8 10.0 11,744.8 1,365.0 21,505.9 1,450.0 33,664.0 4,694.4 559.9 19,104.5 1,529.0 2,310.0 2,379.0 7,796.9 5,988.9 75.0 12,076.8 165,790.1
Onshore Wind 2,220.0 19,092.0 0.0 5,052.0 0.0 8,163.0 0.0 25,323.2 1,935.0 270.0 13,710.0 1,196.0 2,310.0 2,091.0 1,914.4 4,824.0 0.0 9,286.0 97,386.6
Solar PV 4,951.1 13,272.9 10.0 6,692.8 1,365.0 13,342.9 1,450.0 8,340.8 2,759.4 289.9 5,394.5 333.0 0.0 288.0 5,882.5 1,165.0 75.0 2,790.8 68,403.5

idx  = pd.IndexSlice
da = da.loc[:,idx[:,["Approved","Assessment"]]]
da
state NSW QLD SA TAS VIC
da Approved Assessment Approved Assessment Approved Assessment Approved Assessment Approved Assessment
technology developer
Onshore Wind ACE Power NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
ACEN NaN 1,100.0 NaN NaN NaN NaN 1,129.0 1,260.0 NaN NaN
AGL Energy NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
Acciona NaN 1,406.0 NaN NaN NaN NaN NaN NaN NaN NaN
Ark Energy 335.0 840.0 971.0 NaN NaN NaN NaN 1,050.0 NaN NaN
... ... ... ... ... ... ... ... ... ... ... ...
Solar PV akaysha energy, Wind Prospect NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
cogency 5.7 NaN NaN NaN NaN NaN NaN NaN 500.0 NaN
getgreen, Hareon Solar NaN NaN 50.0 NaN NaN NaN NaN NaN NaN NaN
ib vogt GmbH 127.0 NaN NaN NaN NaN NaN NaN NaN 74.0 NaN
All 7,171.1 32,364.8 21,505.9 1,450.0 4,694.4 559.9 1,529.0 2,310.0 7,796.9 5,988.9

287 rows × 10 columns

da.groupby(['technology']).sum()
state NSW QLD SA TAS VIC
da Approved Assessment Approved Assessment Approved Assessment Approved Assessment Approved Assessment
technology
All 7,171.1 32,364.8 21,505.9 1,450.0 4,694.4 559.9 1,529.0 2,310.0 7,796.9 5,988.9
Onshore Wind 2,220.0 19,092.0 8,163.0 0.0 1,935.0 270.0 1,196.0 2,310.0 1,914.4 4,824.0
Solar PV 4,951.1 13,272.9 13,342.9 1,450.0 2,759.4 289.9 333.0 0.0 5,882.5 1,165.0
da.drop('All',inplace = True,level = 0)
da
da.groupby(['technology']).sum()
state NSW QLD SA TAS VIC
da Approved Assessment Approved Assessment Approved Assessment Approved Assessment Approved Assessment
technology
Onshore Wind 2,220.0 19,092.0 8,163.0 0.0 1,935.0 270.0 1,196.0 2,310.0 1,914.4 4,824.0
Solar PV 4,951.1 13,272.9 13,342.9 1,450.0 2,759.4 289.9 333.0 0.0 5,882.5 1,165.0
windcht = da.loc['Onshore Wind'].sum()
a =pd.DataFrame(data = windcht)
#a.droplevel(level=0, axis=1)
a.shape
a.columns = [''] * len(a.columns)
a
a.columns = ['mw']
b= a.unstack()
b
m2
da Approved Assessment
state
NSW 2,220.0 19,092.0
QLD 8,163.0 0.0
SA 1,935.0 270.0
TAS 1,196.0 2,310.0
VIC 1,914.4 4,824.0
c=b.droplevel(level=0, axis=1)
c
da Approved Assessment
state
NSW 2,220.0 19,092.0
QLD 8,163.0 0.0
SA 1,935.0 270.0
TAS 1,196.0 2,310.0
VIC 1,914.4 4,824.0
d=c.T
d.index.name = None
d
state NSW QLD SA TAS VIC
Approved 2,220.0 8,163.0 1,935.0 1,196.0 1,914.4
Assessment 19,092.0 0.0 270.0 2,310.0 4,824.0
plt.rcParams['font.size'] = 10
plt.style.use (matplotx.styles.dracula)

# need to account for battery charging
titlestring = "Wind farms"
fig, ax = plt.subplots(figsize = (5,2))#1,5,sharey = True,
d.T.plot(kind='bar',legend  = True ,ylabel ="mw",title=titlestring, ax = ax,grid=False)#layout =(1,8)sharey = Truefigsize = (14,4)
sns.despine(top=True, right=True, left=True, bottom=True)
plt.show()

windcht.dtype
dtype('float64')
projects.columns
Index(['project', 'technology', 'mw', 'mwh', 'turbines', 'developer', 'status',
       'da', 'state', 'owner', 'council'],
      dtype='object')
projects['state'].unique()
approved_wind = projects.loc[(projects['technology'] == 'Onshore Wind') & (projects['da'] == 'Approved') & (projects['mw']>90) & (projects['status'] == 'Development')]

wind_ready = pd.pivot_table(approved_wind,index=['developer','project'],columns='state',values='mw',fill_value='',margins=True,aggfunc='sum')
wind_ready.sort_index()
wind_ready = wind_ready.drop('All')
wind_ready
import matplotlib as mpl
label_size = 8
mpl.rcParams['ytick.labelsize'] = label_size
fig, ax = plt.subplots()
fig.set_size_inches(6,6)

wind_ready.sort_values('All').plot(kind = 'barh', title='Approved wind farms by size',ylabel = 'mw',legend = False, ax = ax)
#ax.set_yticks(yticklabels,fontsize = 10)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
plt.show()

fig, ax = plt.subplots()
fig.set_size_inches(6,4)
wrd = wind_ready.groupby(['developer'])['All'].sum().to_frame(name='mw').sort_values('mw',ascending = False)
wrd.plot(kind = 'bar', title='Developers with approved wind farms',ylabel = 'mw',legend = False, ax = ax)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
plt.show()

# itk teal style for data frame doesn't print the borders properly in vscode but does in jupyter
styles  = [dict(selector="caption",
       props=[("text-align", "left"),
              ("font-size", "150%"),
              ("color", 'white'),
              ("background-color", "teal"),
              ("caption-side", "top")]),
              dict(selector = "",props =[("color","grey"),("background-color",'white'),('border-bottom', '1px dotted grey')]),
              dict(selector = "th",props =[("background-color",'white'),('border-bottom', '1px dotted grey'),('min_width','300px')]),
              dict(selector = "tr",props =[("background-color",'white'),('border-bottom', '1px dotted grey'),("color","black")]),
              dict(selector = ".blank",props =[("background-color",'#00DCDC')]),
              dict(selector = "th.col_heading",props =[("color",'black'),('font-size','110%'),("background-color",'#00DCDC')]),
              dict( selector = "tr:last-child", props =[("color","black"),('border-bottom', '5px black')])]
approved_solar = projects.loc[(projects['technology'] == 'Solar PV') & (projects['da'] == 'Approved') & (projects['mw']>90) & (projects['status'] == 'Development')]

solar_ready = pd.pivot_table(approved_solar,index=['developer','project'],columns='state',values='mw',fill_value='',margins=True,aggfunc='sum')
solar_ready.sort_index()
solar_ready = solar_ready.drop('All')
solar_ready
solar_ready.sort_values("All",ascending=False).head(20).to_clipboard()
fig, ax = plt.subplots()
fig.set_size_inches(6,4)
srd = solar_ready.groupby(['developer'])['All'].sum().to_frame(name='mw').sort_values('mw',ascending = False)
sdt = len(srd)
srd = srd.head(15)
srd.plot(kind = 'bar', title='Top 15 out of '+str(sdt)+' developers with approved solar farms',ylabel = 'mw',legend = False, ax = ax)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
plt.show()