ITK Research

# setup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import re
import datetime as dt
#sns.set(style="darkgrid")
pd.options.display.float_format="{:,.1f}".format
nem_path = '/Users/davidleitch/Library/Mobile Documents/com~apple~CloudDocs/nem review/'
from matplotlib.ticker import FormatStrFormatter
#from scipy.optimize import minimize
#from scipy.optimize import Bounds
from scipy import stats
from matplotlib.ticker import FormatStrFormatter
import matplotlib.ticker as plticker
from matplotlib.dates import DateFormatter  
import matplotlib.dates as mdates
import matplotx
import xlwings as xw
from xlwings import view
from xlwings import load
import string
import holoviews as hv
#import hvplot.pandas
hv.extension('bokeh')
import panel as pn
import bokeh
import jupyter_bokeh
import plotly.graph_objects as go
pn.extension('plotly')
pn.extension(defer_load=True, loading_indicator=True)

file  ='/Users/davidleitch/Dropbox/the analyst/Price forecast current models/RenewMap-3.csv'

projects = pd.read_csv(file)
projects.head()

	Project Name	Technology	Project Size (MW)	Project Storage Size (MWh)	Turbines	Developer	Project Status	DA Status	EPBC Code	EPBC Link	State	Owner	OpenNEM	Council Area	Network
0	North Star Junction Solar Farm	Solar PV	100.0	NaN	NaN	Fortescue Metals Group	Construction	Approved	NaN	NaN	WA	Fortescue Metals Group	NaN	Port Hedland	NaN
1	Port Hedland Solar Project	Solar PV	45.0	NaN	NaN	Alinta Energy	Construction	Approved	2022/09241	https://epbcpublicportal.awe.gov.au/all-referr...	WA	APA Group	NaN	Port Hedland	NWIS
2	Yuri Renewable Hydrogen to Ammonia Project (So...	Solar PV	18.0	NaN	NaN	Engie	Construction	Approved	2020/8739	https://epbcpublicportal.awe.gov.au/all-referr...	WA	ENGIE, Mitsui	NaN	Karratha	NWIS
3	Amrun Solar Farm	Solar PV	12.4	NaN	NaN	Rio Tinto	Construction	Approved	NaN	NaN	QLD	Rio Tinto	NaN	Cook	Off Grid
4	Bellevue Gold Hybrid (Solar)	Solar PV	NaN	NaN	NaN	Zenith Energy	Construction	Approved	NaN	NaN	WA	Zenith Energy	NaN	Leonora	Off Grid

projects.columns

Index(['Project Name', 'Technology', 'Project Size (MW)',
       'Project Storage Size (MWh)', 'Turbines', 'Developer', 'Project Status',
       'DA Status', 'EPBC Code', 'EPBC Link', 'State', 'Owner', 'OpenNEM',
       'Council Area', 'Network'],
      dtype='object')

project_names = ['project',"technology",'mw','mwh','turbines','developer','status','da','epbc','epbc_link','state','owner','open_nem','council','network']
projects.columns = project_names
projects.columns

Index(['project', 'technology', 'mw', 'mwh', 'turbines', 'developer', 'status',
       'da', 'epbc', 'epbc_link', 'state', 'owner', 'open_nem', 'council',
       'network'],
      dtype='object')

temp=list(range(1,100)) # has to be integers
percentile=[x/100 for x in temp]
s1 = projects['mw'].quantile(q=percentile)
s1.plot()

projects.sort_values(by = 'mw',ascending = False, inplace = True)

keep_col = ['project', 'technology', 'mw', 'mwh', 'turbines', 'developer', 'status','da',  'state', 'owner',  'council']
projects2 = projects.copy()
projects = projects2[keep_col]
projects.loc[(projects['mw'] > 49) &  (projects['mw'] < 3001) ]
projects.describe()

	mw	mwh	turbines
count	1,068.0	1.0	279.0
mean	299.3	3,600.0	80.3
std	1,564.2	NaN	214.6
min	0.1	3,600.0	1.0
25%	9.0	3,600.0	22.0
50%	80.0	3,600.0	47.0
75%	250.0	3,600.0	81.0
max	30,000.0	3,600.0	3,000.0

projects['state'].unique()

array(['WA', 'NT', 'SA', 'QLD', 'VIC', 'NSW', 'TAS', 'ACT'], dtype=object)

nem = ['SA', 'QLD', 'VIC', 'NSW', 'TAS']
projects3=projects.copy()
kept = projects3['state'].isin(nem)
projects = projects3[kept].copy()
projects.describe()

	mw	mwh	turbines
count	935.0	1.0	243.0
mean	205.8	3,600.0	69.8
std	441.0	NaN	111.0
min	0.1	3,600.0	1.0
25%	10.0	3,600.0	27.5
50%	90.0	3,600.0	48.0
75%	262.5	3,600.0	89.0
max	10,000.0	3,600.0	1,600.0

projects.head()

	project	technology	mw	mwh	turbines	developer	status	da	state	owner	council
1062	Evergreen Hydrogen Production Hub (Wind)	Onshore Wind	10,000.0	NaN	1,600.0	Copenhagen Infrastructure Partners	Development	Not Submitted	SA	Copenhagen Infrastructure Partners	Unincorporated SA
410	Evergreen Hydrogen Production Hub (Solar)	Solar PV	4,000.0	NaN	NaN	Copenhagen Infrastructure Partners	Development	Not Submitted	SA	Copenhagen Infrastructure Partners	Unincorporated SA
924	Collinsville Green Energy Hub	Onshore Wind	3,000.0	NaN	NaN	Ark Energy	Development	Not Submitted	QLD	Ark Energy	Whitsunday
964	Warracknabeal Energy Park	Onshore Wind	1,650.0	NaN	211.0	WestWind Energy	Development	Assessment	VIC	WestWind Energy	Yarriambiack
966	The Plains Wind Farm	Onshore Wind	1,500.0	NaN	201.0	Engie	Development	Assessment	NSW	Engie	Edward River, Hay

temp=list(range(1,100)) # has to be integers
percentile=[x/100 for x in temp]
s1 = projects['mw'].quantile(q=percentile)
s1.plot()

projects.status.unique()

array(['Development', 'Construction', 'Operating'], dtype=object)

fp = projects.copy()

fp.sort_values("owner")
fp_owners = fp["owner"]

# pivot
piv = pd.pivot_table(projects,index = ['developer','technology','project'], columns =['state','status'],values='mw',aggfunc = 'sum')
#(pd.concat([piv,piv.query("'technology != 'total'").sum(level =0).assign('technology'='Total').set_index('technology',append=True)]).sort_index())

piv2 =pd.pivot_table(projects,index = ['developer','technology'], columns =['state','status'],values='mw',aggfunc = 'sum')
piv3 = pd.concat([piv,piv2],axis=0)
piv3.tail(20)

piv2['total']=piv2.sum(axis=1)

piv2.head()

piv4 = piv2.groupby(['technology','developer','total']).sum().sort_values(['technology','total'],ascending=False)
piv4

piv4.head(30)

piv4

# try a second pivot to get sub totals 
p1 = pd.pivot_table(projects,index = ['developer','technology','project'], columns =['state','status'],values='mw',fill_value=0,dropna=True,aggfunc = "sum")

p1['total']=p1.sum(axis=1)
p1

pd.concat([d.append(d.sum().rename((k,'Total')))for k,d in p1.groupby(level=0)]).append(p1.sum().rename(('Grand','Total')))

pivg = piv.groupby(['developer','technology']).sum()
pivg

piv2

state	NSW			QLD			SA			TAS		VIC			total
status	Construction	Development	Operating	Construction	Development	Operating	Construction	Development	Operating	Development	Operating	Construction	Development	Operating
technology
Onshore Wind	1,013.1	26,364.0	2,234.9	1,625.0	33,486.2	1,025.7	412.0	15,915.0	2,340.9	5,597.0	559.4	1,071.0	16,024.4	4,377.1	112,045.7
Solar PV	2,102.9	26,332.7	3,688.5	206.4	23,133.7	3,519.0	10.9	8,443.8	797.9	621.0	5.0	278.9	9,918.2	1,311.7	80,370.6
total	3,116.1	52,696.7	5,923.4	1,831.4	56,619.9	4,544.7	422.9	24,358.8	3,138.8	6,218.0	564.4	1,350.0	25,942.6	5,688.8	192,416.3

byowner =pd.pivot_table(projects,index = ['owner','technology'], columns =['state','status'],values='mw',margins=True,margins_name = 'total',aggfunc = "sum")
byowner

byowner_status = byowner.groupby(level = 'status',axis=1).sum()
byowner_status.columns = ['total', 'Construction', 'Development', 'Operating']
byowner_status.sort_values(by = 'total',ascending = False)#by = 'mw',ascending = False, inplace = True

/var/folders/n_/ndsz23n10w355k9ys04xltkm0000gp/T/ipykernel_11569/2632836213.py:1: FutureWarning: DataFrame.groupby with axis=1 is deprecated. Do `frame.T.groupby(...)` without axis instead.
  byowner_status = byowner.groupby(level = 'status',axis=1).sum()

		total	Construction	Development	Operating
owner	technology
total		189,258.0	6,720.4	162,765.6	19,772.1
Copenhagen Infrastructure Partners	Onshore Wind	10,500.0	0.0	10,500.0	0.0
Ark Energy	Onshore Wind	6,661.0	0.0	6,661.0	0.0
Windlab	Onshore Wind	5,300.0	0.0	5,300.0	0.0
Neoen	Onshore Wind	4,584.0	412.0	3,821.0	351.0
...	...	...	...	...	...
Forestry Corporation NSW	Onshore Wind	0.0	0.0	0.0	0.0
Gippsland Water	Onshore Wind	0.0	0.0	0.0	0.0
LP Renewable Projects	Onshore Wind	0.0	0.0	0.0	0.0
Marble Energy	Onshore Wind	0.0	0.0	0.0	0.0
NIFSAN Developments	Solar PV	0.0	0.0	0.0	0.0

354 rows × 4 columns

bys = byowner_status.swaplevel('owner','technology')
bys.sort_values(['technology','owner'],ascending=False,inplace=True)
bys

		total	Construction	Development	Operating
technology	owner
Solar PV	ib vogt GmbH	433.0	0.0	433.0	0.0
	getgreen, Hareon Solar	50.0	0.0	50.0	0.0
	akaysha energy, Wind Prospect	200.0	0.0	200.0	0.0
	ZEN Energy	200.0	0.0	200.0	0.0
	Yancoal	330.0	0.0	330.0	0.0
...	...	...	...	...	...
Onshore Wind	AGL Energy	450.8	0.0	360.0	90.8
	ACEN	3,489.0	0.0	3,489.0	0.0
	ACE Power	150.0	0.0	150.0	0.0
	ACCIONA Energia (50%), EnergyAustralia (50%)	64.0	0.0	0.0	64.0
	total	189,258.0	6,720.4	162,765.6	19,772.1

354 rows × 4 columns

bys.sort_values(['technology','total'],ascending=False,inplace=True)
bys

		total	Construction	Development	Operating
technology	owner
Solar PV	Copenhagen Infrastructure Partners	4,012.0	0.0	4,012.0	0.0
	Lightsource BP	3,240.0	0.0	3,240.0	0.0
	FRV Australia	2,854.9	300.0	2,036.8	518.1
	ACEN	2,540.0	720.0	1,420.0	400.0
	GENEX, J-POWER	2,475.0	0.0	2,475.0	0.0
...	...	...	...	...	...
Onshore Wind	LP Renewable Projects	0.0	0.0	0.0	0.0
	Gippsland Water	0.0	0.0	0.0	0.0
	Forestry Corporation NSW	0.0	0.0	0.0	0.0
	Dawn Renewables	0.0	0.0	0.0	0.0
	total	189,258.0	6,720.4	162,765.6	19,772.1

354 rows × 4 columns

by_w = bys.loc['Onshore Wind']
by_w

	total	Construction	Development	Operating
owner
Woolnorth Renewables	400.0	0.0	400.0	0.0
Windlab, Eurus	43.2	0.0	0.0	43.2
Windlab	5,300.0	0.0	5,300.0	0.0
Wind Prospect, Engie	350.0	0.0	350.0	0.0
Wind Prospect	700.0	0.0	700.0	0.0
...	...	...	...	...
Acciona	3,584.0	0.0	3,056.0	528.0
AGL Energy	450.8	0.0	360.0	90.8
ACEN	3,489.0	0.0	3,489.0	0.0
ACE Power	150.0	0.0	150.0	0.0
ACCIONA Energia (50%), EnergyAustralia (50%)	64.0	0.0	0.0	64.0

115 rows × 4 columns

by_s =bys.loc['Solar PV']
by_s

	total	Construction	Development	Operating
owner
ib vogt GmbH	433.0	0.0	433.0	0.0
getgreen, Hareon Solar	50.0	0.0	50.0	0.0
akaysha energy, Wind Prospect	200.0	0.0	200.0	0.0
ZEN Energy	200.0	0.0	200.0	0.0
Yancoal	330.0	0.0	330.0	0.0
...	...	...	...	...
AMP Energy	895.0	0.0	780.0	115.0
AGL Energy	5.0	5.0	0.0	0.1
ACEnergy	59.9	0.0	34.9	25.0
ACEN	2,540.0	720.0	1,420.0	400.0
ACE Power	631.0	0.0	631.0	0.0

238 rows × 4 columns

test = by_s.index.sort_values()
deduplicated_data = set()
duplicates = set()
for i in range(1,len(test)):
    a = test[i-1]
    b = test[i]
    if a[0:7] == b[0:7]:
            duplicates.add(a)
            duplicates.add(b)

by_w.head(20)

by_s.head(20)

big_w = by_w.sort_values('total',ascending = False).head(25)
big_s = by_s.sort_values('total', ascending = False).head(25)
big = pd.concat([big_w,big_s])
big

big.sort_index(inplace = True)
big

bigco = big.index.unique()
bigco

Index(['ACEN', 'AMP Energy', 'Acciona', 'Ark Energy', 'BJEI', 'BayWa r.e.',
       'Bison Energy', 'Canadian Solar', 'Copenhagen Infrastructure Partners',
       'EIWA Energy', 'Edify Energy', 'Engie', 'European Energy',
       'FRV Australia', 'GENEX, J-POWER', 'Global Power Generation Australia',
       'Goldwind Australia', 'Iberdrola', 'Lightsource BP', 'Macorna Energy',
       'Mainstream Renewables', 'Mytilineos', 'Neoen',
       'Octopus Investments, CEFC', 'Origin Energy',
       'Pacific Blue (Pacific Hydro)', 'RATCH-Australia', 'RES',
       'RWE Renewables', 'Renewable Energy Partners', 'Samsung C&T',
       'SkyLab Australia', 'Spark Renewables', 'Squadron Energy',
       'Sunshine Energy', 'Tetris Energy', 'Tilt Renewables',
       'Tilt Renewables, CleanSight', 'Vena Energy Services', 'Vestas',
       'Virya Energy', 'WestWind Energy', 'Windlab', 'X-ELIO'],
      dtype='object', name='owner')

bigmask = projects["owner"].isin(bigco)
leaders = projects[bigmask]
leaders

leaders.set_index('project',inplace = True)
leaders

leaders.reset_index(inplace = True)
leadpiv = pd.pivot_table(leaders,index = ['developer','technology','project'], columns =['state','status'],values='mw',aggfunc = 'sum',margins = True)
leadpiv

leadpiv.fillna(0,inplace=True)
leadpiv

lead_sort = leadpiv.groupby(level = 0).sum().sort_values("All",ascending = False)
lead_sort.head(15)

top_lead_sort = lead_sort.head(15)
top_lead_sort.index

Index(['All', 'Copenhagen Infrastructure Partners', 'Ark Energy', 'Neoen',
       'ACEN', 'RES', 'Windlab', 'Spark Renewables', 'Acciona',
       'WestWind Energy', 'Lightsource BP', 'Renewable Energy Partners',
       'Virya Energy', 'Engie', 'BayWa r.e.'],
      dtype='object', name='developer')

leadpiv.head()

bigmask = leadpiv.index.isin(top_lead_sort.index,level=0)
top15 = leadpiv[bigmask]
top15

bysize = top_lead_sort.index.values.tolist()
bysize

['All',
 'Copenhagen Infrastructure Partners',
 'Ark Energy',
 'Neoen',
 'ACEN',
 'RES',
 'Windlab',
 'Spark Renewables',
 'Acciona',
 'WestWind Energy',
 'Lightsource BP',
 'Renewable Energy Partners',
 'Virya Energy',
 'Engie',
 'BayWa r.e.']

top2 = top15.reindex(bysize,level=0)
top2

top2.loc['Ark Energy']

topdev = top2.drop("All",level=0)

def df_to_space(x):
    x = "%.0f" % x
    if len(x) > 3:
        x = x[:-3]+','+x[-3:]
    if x == '0':
        x = ''
    return x

td = topdev.groupby(["developer",]).sum().sort_values(["All"],ascending = False)
td.to_clipboard()


#for col in td.columns:
    #td[col]= td[col].loc[td[col].str.contains('.')].str.split('.').str[0]
td.style.format (formatter = df_to_space).set_caption("Major developer projects MW").set_table_styles(styles)

topdev.groupby(["status"],axis=1).sum()

/var/folders/n_/ndsz23n10w355k9ys04xltkm0000gp/T/ipykernel_11569/2712648979.py:1: FutureWarning: DataFrame.groupby with axis=1 is deprecated. Do `frame.T.groupby(...)` without axis instead.
  topdev.groupby(["status"],axis=1).sum()

		status		Construction	Development	Operating
developer	technology	project
Copenhagen Infrastructure Partners	Onshore Wind	Evergreen Hydrogen Production Hub (Wind)	10,000.0	0.0	10,000.0	0.0
Copenhagen Infrastructure Partners	Solar PV	Evergreen Hydrogen Production Hub (Solar)	4,000.0	0.0	4,000.0	0.0
Ark Energy	Onshore Wind	Boomer Green Energy Hub	1,100.0	0.0	1,100.0	0.0
		Bowmans Creek Wind Farm	335.0	0.0	335.0	0.0
		Burrendong Wind Farm	500.0	0.0	500.0	0.0
...	...	...	...	...	...	...
BayWa r.e.	Solar PV	Jung Solar Farm	13.0	0.0	13.0	0.0
		Kelsey Creek Solar Farm	50.0	0.0	50.0	0.0
		Sandy Creek Solar Farm (BayWa)	12.0	0.0	12.0	0.0
		Yarren Hut Solar Farm	28.0	0.0	28.0	0.0
		Yatpool Solar Farm	94.0	0.0	0.0	94.0

140 rows × 4 columns

td2.columns

Index(['NSW', 'QLD', 'SA', 'TAS', 'VIC', 'total'], dtype='object', name='state')

td2 = topdev.xs("Development",level=1,axis=1)
states = ['NSW', 'QLD', 'SA', 'TAS', 'VIC']
td2['total'] = td2[states].sum(axis=1)

/var/folders/n_/ndsz23n10w355k9ys04xltkm0000gp/T/ipykernel_11569/1427645528.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  td2['total'] = td2[states].sum(axis=1)

td2.sort_values('total',ascending=False,inplace=True)

/var/folders/n_/ndsz23n10w355k9ys04xltkm0000gp/T/ipykernel_11569/2537107422.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  td2.sort_values('total',ascending=False,inplace=True)

td3 = td2.loc[(td2!=0).any(axis=1)]
td4 = td3.loc[td3['total'] > 90]
td4

		state	NSW	QLD	SA	TAS	VIC	total
developer	technology	project
Copenhagen Infrastructure Partners	Onshore Wind	Evergreen Hydrogen Production Hub (Wind)	0.0	0.0	10,000.0	0.0	0.0	10,000.0
Copenhagen Infrastructure Partners	Solar PV	Evergreen Hydrogen Production Hub (Solar)	0.0	0.0	4,000.0	0.0	0.0	4,000.0
Ark Energy	Onshore Wind	Collinsville Green Energy Hub	0.0	3,000.0	0.0	0.0	0.0	3,000.0
WestWind Energy	Onshore Wind	Warracknabeal Energy Park	0.0	0.0	0.0	0.0	1,650.0	1,650.0
Engie	Onshore Wind	The Plains Wind Farm	1,500.0	0.0	0.0	0.0	0.0	1,500.0
...	...	...	...	...	...	...	...	...
WestWind Energy	Onshore Wind	Normanville Energy Park	0.0	0.0	0.0	0.0	122.0	122.0
Engie	Solar PV	Silverleaf Solar Farm	120.0	0.0	0.0	0.0	0.0	120.0
Lightsource BP	Solar PV	Mannum Solar Farm	0.0	0.0	100.0	0.0	0.0	100.0
Engie	Solar PV	Yass Solar Energy Park	100.0	0.0	0.0	0.0	0.0	100.0
Neoen	Solar PV	Lemnos Solar Farm	0.0	0.0	0.0	0.0	100.0	100.0

96 rows × 6 columns

td4.drop('Copenhagen Infrastructure Partners',level = 0, inplace = True)
td4

/var/folders/n_/ndsz23n10w355k9ys04xltkm0000gp/T/ipykernel_11569/1425947989.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  td4.drop('Copenhagen Infrastructure Partners',level = 0, inplace = True)

		state	NSW	QLD	SA	TAS	VIC	total
developer	technology	project
Ark Energy	Onshore Wind	Collinsville Green Energy Hub	0.0	3,000.0	0.0	0.0	0.0	3,000.0
WestWind Energy	Onshore Wind	Warracknabeal Energy Park	0.0	0.0	0.0	0.0	1,650.0	1,650.0
Engie	Onshore Wind	The Plains Wind Farm	1,500.0	0.0	0.0	0.0	0.0	1,500.0
Virya Energy	Onshore Wind	Yanco Delta Wind Farm	1,500.0	0.0	0.0	0.0	0.0	1,500.0
Windlab	Onshore Wind	Bungaban Wind Farm (Southern QLD Renewable Generation Hub)	0.0	1,400.0	0.0	0.0	0.0	1,400.0
...	...	...	...	...	...	...	...	...
WestWind Energy	Onshore Wind	Normanville Energy Park	0.0	0.0	0.0	0.0	122.0	122.0
Engie	Solar PV	Silverleaf Solar Farm	120.0	0.0	0.0	0.0	0.0	120.0
Lightsource BP	Solar PV	Mannum Solar Farm	0.0	0.0	100.0	0.0	0.0	100.0
Engie	Solar PV	Yass Solar Energy Park	100.0	0.0	0.0	0.0	0.0	100.0
Neoen	Solar PV	Lemnos Solar Farm	0.0	0.0	0.0	0.0	100.0	100.0

94 rows × 6 columns

td4.describe()

state	NSW	QLD	SA	TAS	VIC	total
count	94.0	94.0	94.0	94.0	94.0	94.0
mean	226.8	178.5	23.5	36.6	102.7	568.0
std	341.5	429.4	113.2	171.9	298.4	450.6
min	0.0	0.0	0.0	0.0	0.0	100.0
25%	0.0	0.0	0.0	0.0	0.0	266.2
50%	0.0	0.0	0.0	0.0	0.0	400.0
75%	400.0	172.5	0.0	0.0	0.0	794.0
max	1,500.0	3,000.0	850.0	1,260.0	1,650.0	3,000.0

# itk teal style for data frame doesn't print the borders properly in vscode but does in jupyter
styles  = [dict(selector="caption",
       props=[("text-align", "left"),
              ("font-size", "150%"),
              ("color", 'white'),
              ("background-color", "teal"),
              ("caption-side", "top")]),
              dict(selector = "",props =[("color","grey"),("background-color",'white'),('border-bottom', '1px dotted grey'),('font-size','80%')]),
              dict(selector = "th",props =[("background-color",'#00DCDC'),('border-bottom', '1px dotted grey'),('min_width','50px')]),
              dict(selector = "tr",props =[("background-color",'white'),('border-bottom', '1px dotted grey'),("color","black")]),
              dict(selector = ".blank",props =[("background-color",'#00DCDC')]),
              dict(selector = "th.col_heading",props =[("color",'black'),('font-size','100%'),("background-color",'#00DCDC'),('width','60px')]),
              dict( selector = "tr:last-child", props =[("color","black"),('border-bottom', '5px black')])]

projects.columns

Index(['project', 'technology', 'mw', 'mwh', 'turbines', 'developer', 'status',
       'da', 'state', 'owner', 'council'],
      dtype='object')

development = projects.loc[projects['status'] == 'Development']

development.describe()

	mw	mwh	turbines
count	616.0	1.0	137.0
mean	269.2	3,600.0	91.7
std	525.8	NaN	141.1
min	1.2	3,600.0	2.0
25%	29.8	3,600.0	41.0
50%	120.0	3,600.0	63.0
75%	350.0	3,600.0	117.0
max	10,000.0	3,600.0	1,600.0

da = pd.pivot_table(development, index = ['technology','developer'],columns=['state','da'],values='mw',aggfunc = 'sum', margins = True)

da.groupby('technology').sum()

state	NSW					QLD			SA			TAS			VIC				All
da	Approved	Assessment	Not Approved	Not Submitted	Withdrawn	Approved	Assessment	Not Submitted	Approved	Assessment	Not Submitted	Approved	Assessment	Not Submitted	Approved	Assessment	Not Approved	Not Submitted
technology
All	7,171.1	32,364.8	10.0	11,744.8	1,365.0	21,505.9	1,450.0	33,664.0	4,694.4	559.9	19,104.5	1,529.0	2,310.0	2,379.0	7,796.9	5,988.9	75.0	12,076.8	165,790.1
Onshore Wind	2,220.0	19,092.0	0.0	5,052.0	0.0	8,163.0	0.0	25,323.2	1,935.0	270.0	13,710.0	1,196.0	2,310.0	2,091.0	1,914.4	4,824.0	0.0	9,286.0	97,386.6
Solar PV	4,951.1	13,272.9	10.0	6,692.8	1,365.0	13,342.9	1,450.0	8,340.8	2,759.4	289.9	5,394.5	333.0	0.0	288.0	5,882.5	1,165.0	75.0	2,790.8	68,403.5


idx  = pd.IndexSlice
da = da.loc[:,idx[:,["Approved","Assessment"]]]
da

	state	NSW		QLD		SA		TAS		VIC
	da	Approved	Assessment	Approved	Assessment	Approved	Assessment	Approved	Assessment	Approved	Assessment
technology	developer
Onshore Wind	ACE Power	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
	ACEN	NaN	1,100.0	NaN	NaN	NaN	NaN	1,129.0	1,260.0	NaN	NaN
	AGL Energy	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
	Acciona	NaN	1,406.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
	Ark Energy	335.0	840.0	971.0	NaN	NaN	NaN	NaN	1,050.0	NaN	NaN
...	...	...	...	...	...	...	...	...	...	...	...
Solar PV	akaysha energy, Wind Prospect	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
	cogency	5.7	NaN	NaN	NaN	NaN	NaN	NaN	NaN	500.0	NaN
	getgreen, Hareon Solar	NaN	NaN	50.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN
	ib vogt GmbH	127.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	74.0	NaN
All		7,171.1	32,364.8	21,505.9	1,450.0	4,694.4	559.9	1,529.0	2,310.0	7,796.9	5,988.9

287 rows × 10 columns

da.groupby(['technology']).sum()

state	NSW		QLD		SA		TAS		VIC
da	Approved	Assessment	Approved	Assessment	Approved	Assessment	Approved	Assessment	Approved	Assessment
technology
All	7,171.1	32,364.8	21,505.9	1,450.0	4,694.4	559.9	1,529.0	2,310.0	7,796.9	5,988.9
Onshore Wind	2,220.0	19,092.0	8,163.0	0.0	1,935.0	270.0	1,196.0	2,310.0	1,914.4	4,824.0
Solar PV	4,951.1	13,272.9	13,342.9	1,450.0	2,759.4	289.9	333.0	0.0	5,882.5	1,165.0

da.drop('All',inplace = True,level = 0)
da

da.groupby(['technology']).sum()

state	NSW		QLD		SA		TAS		VIC
da	Approved	Assessment	Approved	Assessment	Approved	Assessment	Approved	Assessment	Approved	Assessment
technology
Onshore Wind	2,220.0	19,092.0	8,163.0	0.0	1,935.0	270.0	1,196.0	2,310.0	1,914.4	4,824.0
Solar PV	4,951.1	13,272.9	13,342.9	1,450.0	2,759.4	289.9	333.0	0.0	5,882.5	1,165.0

windcht = da.loc['Onshore Wind'].sum()
a =pd.DataFrame(data = windcht)
#a.droplevel(level=0, axis=1)
a.shape
a.columns = [''] * len(a.columns)
a
a.columns = ['mw']

b= a.unstack()
b

	m2
da	Approved	Assessment
state
NSW	2,220.0	19,092.0
QLD	8,163.0	0.0
SA	1,935.0	270.0
TAS	1,196.0	2,310.0
VIC	1,914.4	4,824.0

c=b.droplevel(level=0, axis=1)
c

da	Approved	Assessment
state
NSW	2,220.0	19,092.0
QLD	8,163.0	0.0
SA	1,935.0	270.0
TAS	1,196.0	2,310.0
VIC	1,914.4	4,824.0

d=c.T
d.index.name = None
d

state	NSW	QLD	SA	TAS	VIC
Approved	2,220.0	8,163.0	1,935.0	1,196.0	1,914.4
Assessment	19,092.0	0.0	270.0	2,310.0	4,824.0

plt.rcParams['font.size'] = 10
plt.style.use (matplotx.styles.dracula)

# need to account for battery charging
titlestring = "Wind farms"
fig, ax = plt.subplots(figsize = (5,2))#1,5,sharey = True,
d.T.plot(kind='bar',legend  = True ,ylabel ="mw",title=titlestring, ax = ax,grid=False)#layout =(1,8)sharey = Truefigsize = (14,4)
sns.despine(top=True, right=True, left=True, bottom=True)
plt.show()

windcht.dtype

dtype('float64')

projects.columns

Index(['project', 'technology', 'mw', 'mwh', 'turbines', 'developer', 'status',
       'da', 'state', 'owner', 'council'],
      dtype='object')

projects['state'].unique()

approved_wind = projects.loc[(projects['technology'] == 'Onshore Wind') & (projects['da'] == 'Approved') & (projects['mw']>90) & (projects['status'] == 'Development')]

wind_ready = pd.pivot_table(approved_wind,index=['developer','project'],columns='state',values='mw',fill_value='',margins=True,aggfunc='sum')
wind_ready.sort_index()
wind_ready = wind_ready.drop('All')
wind_ready

import matplotlib as mpl
label_size = 8
mpl.rcParams['ytick.labelsize'] = label_size
fig, ax = plt.subplots()
fig.set_size_inches(6,6)

wind_ready.sort_values('All').plot(kind = 'barh', title='Approved wind farms by size',ylabel = 'mw',legend = False, ax = ax)
#ax.set_yticks(yticklabels,fontsize = 10)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
plt.show()

fig, ax = plt.subplots()
fig.set_size_inches(6,4)
wrd = wind_ready.groupby(['developer'])['All'].sum().to_frame(name='mw').sort_values('mw',ascending = False)
wrd.plot(kind = 'bar', title='Developers with approved wind farms',ylabel = 'mw',legend = False, ax = ax)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
plt.show()

# itk teal style for data frame doesn't print the borders properly in vscode but does in jupyter
styles  = [dict(selector="caption",
       props=[("text-align", "left"),
              ("font-size", "150%"),
              ("color", 'white'),
              ("background-color", "teal"),
              ("caption-side", "top")]),
              dict(selector = "",props =[("color","grey"),("background-color",'white'),('border-bottom', '1px dotted grey')]),
              dict(selector = "th",props =[("background-color",'white'),('border-bottom', '1px dotted grey'),('min_width','300px')]),
              dict(selector = "tr",props =[("background-color",'white'),('border-bottom', '1px dotted grey'),("color","black")]),
              dict(selector = ".blank",props =[("background-color",'#00DCDC')]),
              dict(selector = "th.col_heading",props =[("color",'black'),('font-size','110%'),("background-color",'#00DCDC')]),
              dict( selector = "tr:last-child", props =[("color","black"),('border-bottom', '5px black')])]

approved_solar = projects.loc[(projects['technology'] == 'Solar PV') & (projects['da'] == 'Approved') & (projects['mw']>90) & (projects['status'] == 'Development')]

solar_ready = pd.pivot_table(approved_solar,index=['developer','project'],columns='state',values='mw',fill_value='',margins=True,aggfunc='sum')
solar_ready.sort_index()
solar_ready = solar_ready.drop('All')
solar_ready

solar_ready.sort_values("All",ascending=False).head(20).to_clipboard()

fig, ax = plt.subplots()
fig.set_size_inches(6,4)
srd = solar_ready.groupby(['developer'])['All'].sum().to_frame(name='mw').sort_values('mw',ascending = False)
sdt = len(srd)
srd = srd.head(15)
srd.plot(kind = 'bar', title='Top 15 out of '+str(sdt)+' developers with approved solar farms',ylabel = 'mw',legend = False, ax = ax)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
plt.show()