ECB Statistical Data Warehouse
The European central bank provides a lot of public data in their Statistical Data Warehouse. In this notebook we will show how to access data and metadata from Python.
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import quandl
from pandasdmx import Request
plt.rcParams['figure.figsize'] = 14, 6
sns.set()
Get resource metadata¶
ECB provides multiple resources in the Statistical Data Warehouse. For any resource acronym we can find the associated full name.
ecb = Request('ECB')
def get_resource_name(this_resource_id):
exr_flow = ecb.dataflow()
resource_types = exr_flow.write().dataflow
this_resource_name = exr_flow.write().dataflow.loc[this_resource_id, 'name']
return this_resource_name
this_resource_id = 'IRS'
get_resource_name(this_resource_id)
In order to translate these acronyms into names we can query the ECB warehouse to get a list of all related metadata dimensions:
def get_resource_metadata_decodes(this_resource_id):
this_resource_decodes = ecb.dataflow(this_resource_id).write().codelist
return this_resource_decodes
decodes_df = get_resource_metadata_decodes(this_resource_id)
decodes_df.head(2)
decodes_df.shape
Download data¶
Download data with metadata as pandas
:
def get_ecb_long_term_yields(resource_id, ts_key, params=None):
"""
Download data from ECB
"""
ecb = Request('ECB')
if params is None:
data_response = ecb.data(resource_id = resource_id,
key={'REF_AREA': ts_key,
'IR_TYPE': 'L'})
else:
data_response = ecb.data(resource_id = resource_id,
key={'REF_AREA': ts_key,
'IR_TYPE': 'L'},
params=params
)
raw_data = data_response.data
series_tuple = (s for s in raw_data.series)
data = data_response.write(series_tuple)
return data
# define list of countries
list_of_areas_no_euro = ['BG', 'CZ', 'DK', 'GB', 'HR', 'HU', 'PL', 'RO', 'SE']
# download data
data_df = get_ecb_long_term_yields('IRS', list_of_areas_no_euro, params={'startPeriod': '2016'})
data_df.head(3)
Interpret metadata¶
For all data there exist multiple dimensions of metadata, called concepts
. We can extract the acronyms of all dimensions from the data columns:
def get_concept_list_from_data(data_df):
return list(data_df.columns.names)
list_of_data_concepts = get_concept_list_from_data(data_df)
list_of_data_concepts
So we can come up with a function that maps the dimension acronyms to meaningful names:
def get_concept_decodes(decodes_df, concepts):
concepts_labels = decodes_df.loc[(concepts, concepts), ['name']]
concepts_labels.index = concepts_labels.index.get_level_values(0)
return concepts_labels
concept_decode_mapping = get_concept_decodes(decodes_df, list_of_data_concepts)
concept_decode_mapping
The same way we can also translate all occurring dimension values into meaningful labels:
all_maps = None
for this_concept in list_of_data_concepts:
these_occurring_vals = data_df.columns.get_level_values(this_concept).unique().values
# get occurring values
this_map = decodes_df.loc[this_concept, :].loc[these_occurring_vals, 'name'].to_frame().reset_index()
this_map.rename({'index': 'code'}, axis=1, inplace=True)
this_map['concept'] = this_concept
if all_maps is None:
all_maps = this_map
else:
all_maps = pd.concat([all_maps, this_map], axis=0)
df_merge = concept_decode_mapping.reset_index()
df_merge.columns = ['concept', 'concept_name']
all_maps.merge(df_merge).loc[:, ['concept', 'concept_name', 'code', 'name']]
Or, if we want to look up some particular values for some given dimension (where the values will be translated and the original order will be preserved):
def decode_concept_values(this_concept, these_values):
if type(these_values) == 'str':
map_ss = all_maps.query('concept == @this_concept & code == @these_values')
else:
map_ss = all_maps.query('concept == @this_concept & code in @these_values')
return map_ss.set_index('code').loc[these_values, 'name']
this_concept = 'REF_AREA'
this_concept_values = data_df.columns.get_level_values(this_concept).values
decoded_vals = decode_concept_values('REF_AREA', this_concept_values)
decoded_vals
this_concept = 'MATURITY_CAT'
this_concept_values = data_df.columns.get_level_values(this_concept).values
decode_concept_values(this_concept, this_concept_values)
That way, we can easily relabel the data with regards to the most relevant dimension only:
data_labeled = data_df.copy()
data_labeled.columns = decoded_vals.values
data_labeled.head(3)
data_labeled.plot()
plt.show()