ArcticDB_demo_equity_options
在 Github 中查看 | 在 Google Colab 中打开使用 ArcticDB 处理股指期权数据:一个实践示例
股指期权样本工作流
在此 Notebook 中,我们将
- 下载(从 github)一组科技股期权市场数据
- 使用增量时间序列更新工作流将数据存储在 ArcticDB 中
- 创建一系列有用的期权查询
- 使用查询驱动交互式图表
感谢 optiondata.org
我们要感谢 optiondata.org 允许我们在本演示中使用他们的一小部分免费数据。
这些数据是免费的,可在其网站上自行下载。
安装和导入¶
输入 [ ]
已复制!
!pip install arcticdb
!pip install arcticdb
输入 [ ]
已复制!
%matplotlib inline
import arcticdb as adb
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import ipywidgets as widgets
import functools
%matplotlib inline import arcticdb as adb import pandas as pd from datetime import datetime import matplotlib.pyplot as plt import ipywidgets as widgets import functools
Notebook 参数¶
输入 [3]
已复制!
branch = 'master'
all_dates = ['2013-06-03', '2013-06-10', '2013-06-17', '2013-06-24']
delta_low = 0.05
delta_high = 0.55
branch = 'master' all_dates = ['2013-06-03', '2013-06-10', '2013-06-17', '2013-06-24'] delta_low = 0.05 delta_high = 0.55
ArcticDB 设置¶
输入 [4]
已复制!
arctic = adb.Arctic("lmdb://arcticdb_equity_options")
lib = arctic.get_library('demo_options', create_if_missing=True)
arctic = adb.Arctic("lmdb://arcticdb_equity_options") lib = arctic.get_library('demo_options', create_if_missing=True)
用于读取 Github 和 ArcticDB 查询的函数¶
输入 [12]
已复制!
def gitub_url(as_of, branch):
return f"https://raw.githubusercontent.com/man-group/ArcticDB/{branch}/docs/mkdocs/docs/notebooks/data/{as_of}tech-options.csv"
# read the csv from github
def read_github_options_file(as_of, branch='master'):
try:
raw_df = pd.read_csv(gitub_url(as_of, branch))
except Exception as e:
raise Exception(f"Github access error: {e}")
return raw_df.set_index(pd.DatetimeIndex(raw_df['quote_date'])).drop(columns=['Unnamed: 0'])
def uly_symbol(uly):
return f"options/{uly}"
# query to get option expiries for an underlying
def options_expiries_query(lazy_df, as_of, underlying):
# exclude options expiring on as_of - no time value left
filter = (lazy_df['expiration'] != as_of)
lazy_df = lazy_df[filter].groupby('expiration').agg({'volume': 'sum'})
return lazy_df
def read_expiries(as_of, underlying):
read_date = pd.Timestamp(as_of)
sym = uly_symbol(underlying)
lazy_df = lib.read(sym, date_range=(read_date, read_date), lazy=True)
lazy_df = options_expiries_query(lazy_df, as_of, underlying)
return lazy_df.collect().data.sort_index().index.values
def read_all_underlyings():
# use the symbol list to get all underlyings
return sorted([s.split('/')[1] for s in lib.list_symbols()])
# query to get all options for an expiry
# as_of via date_range, uly via symbol
def options_curve_single_expiry_query(lazy_df, expiry):
filter = (lazy_df['expiration'] == expiry)
return lazy_df[filter]
# query to get all options for an expiry with delta in a specified interval
# calls have delta >= 0, puts have delta <= 0
def options_curve_single_expiry_delta_bracket_query(lazy_df, expiry, delta_low, delta_high):
filter = ((lazy_df['expiration'] == expiry) &
(abs(lazy_df['delta']) >= delta_low) &
(abs(lazy_df['delta']) <= delta_high)
)
return lazy_df[filter].groupby('strike').agg({'implied_volatility': 'mean'})
def read_vol_curve_single_expiry(as_of, underlying, expiry):
read_date = pd.Timestamp(as_of)
sym = uly_symbol(underlying)
lazy_df = lib.read(sym, date_range=(read_date, read_date), lazy=True)
lazy_df = options_curve_single_expiry_query(lazy_df, expiry)
return lazy_df.collect().data
@functools.cache
def read_vol_curve_single_expiry_exclude_itm_otm(as_of, underlying, expiry, delta_low, delta_high):
read_date = pd.Timestamp(as_of)
sym = uly_symbol(underlying)
lazy_df = lib.read(sym, date_range=(read_date, read_date), lazy=True)
lazy_df = options_curve_single_expiry_delta_bracket_query(lazy_df, expiry, delta_low, delta_high)
vol_curve_raw = lazy_df.collect().data
return vol_curve_raw.sort_index()*100
def gitub_url(as_of, branch): return f"https://raw.githubusercontent.com/man-group/ArcticDB/{branch}/docs/mkdocs/docs/notebooks/data/{as_of}tech-options.csv" # read the csv from github def read_github_options_file(as_of, branch='master'): try: raw_df = pd.read_csv(gitub_url(as_of, branch)) except Exception as e: raise Exception(f"Github access error: {e}") return raw_df.set_index(pd.DatetimeIndex(raw_df['quote_date'])).drop(columns=['Unnamed: 0']) def uly_symbol(uly): return f"options/{uly}" # query to get option expiries for an underlying def options_expiries_query(lazy_df, as_of, underlying): # exclude options expiring on as_of - no time value left filter = (lazy_df['expiration'] != as_of) lazy_df = lazy_df[filter].groupby('expiration').agg({'volume': 'sum'}) return lazy_df def read_expiries(as_of, underlying): read_date = pd.Timestamp(as_of) sym = uly_symbol(underlying) lazy_df = lib.read(sym, date_range=(read_date, read_date), lazy=True) lazy_df = options_expiries_query(lazy_df, as_of, underlying) return lazy_df.collect().data.sort_index().index.values def read_all_underlyings(): # use the symbol list to get all underlyings return sorted([s.split('/')[1] for s in lib.list_symbols()]) # query to get all options for an expiry # as_of via date_range, uly via symbol def options_curve_single_expiry_query(lazy_df, expiry): filter = (lazy_df['expiration'] == expiry) return lazy_df[filter] # query to get all options for an expiry with delta in a specified interval # calls have delta >= 0, puts have delta <= 0 def options_curve_single_expiry_delta_bracket_query(lazy_df, expiry, delta_low, delta_high): filter = ((lazy_df['expiration'] == expiry) & (abs(lazy_df['delta']) >= delta_low) & (abs(lazy_df['delta']) <= delta_high) ) return lazy_df[filter].groupby('strike').agg({'implied_volatility': 'mean'}) def read_vol_curve_single_expiry(as_of, underlying, expiry): read_date = pd.Timestamp(as_of) sym = uly_symbol(underlying) lazy_df = lib.read(sym, date_range=(read_date, read_date), lazy=True) lazy_df = options_curve_single_expiry_query(lazy_df, expiry) return lazy_df.collect().data @functools.cache def read_vol_curve_single_expiry_exclude_itm_otm(as_of, underlying, expiry, delta_low, delta_high): read_date = pd.Timestamp(as_of) sym = uly_symbol(underlying) lazy_df = lib.read(sym, date_range=(read_date, read_date), lazy=True) lazy_df = options_curve_single_expiry_delta_bracket_query(lazy_df, expiry, delta_low, delta_high) vol_curve_raw = lazy_df.collect().data return vol_curve_raw.sort_index()*100
从 Github 读取数据并存储到 ArcticDB¶
输入 [13]
已复制!
for d in all_dates:
df = read_github_options_file(d, branch)
underlyings = df['underlying'].unique()
print(f"Date: {d} - {len(df)} records, {len(underlyings)} underlyings")
for u in underlyings:
uly_df = df[df['underlying']==u]
sym = uly_symbol(u)
# upsert option creates the symbol if it doesn't already exist
lib.update(sym, uly_df, upsert=True)
for d in all_dates: df = read_github_options_file(d, branch) underlyings = df['underlying'].unique() print(f"Date: {d} - {len(df)} records, {len(underlyings)} underlyings") for u in underlyings: uly_df = df[df['underlying']==u] sym = uly_symbol(u) # upsert option creates the symbol if it doesn't already exist lib.update(sym, uly_df, upsert=True)
Date: 2013-06-03 - 6792 records, 11 underlyings Date: 2013-06-10 - 6622 records, 11 underlyings Date: 2013-06-17 - 6442 records, 11 underlyings Date: 2013-06-24 - 6134 records, 11 underlyings
输入 [14]
已复制!
# lets take a look at the symbols we have created - one for each underlying
lib.list_symbols()
# 让我们看看我们创建的符号 - 每个底层资产一个 lib.list_symbols()
输出 [14]
['options/IBM', 'options/NVDA', 'options/MSFT', 'options/AAPL', 'options/CSCO', 'options/ASML', 'options/TSM', 'options/ADBE', 'options/AMZN', 'options/ORCL', 'options/TSLA']
在 [15]
已复制!
all_uly = read_all_underlyings()
all_uly
all_uly = read_all_underlyings() all_uly
输出 [15]
['AAPL', 'ADBE', 'AMZN', 'ASML', 'CSCO', 'IBM', 'MSFT', 'NVDA', 'ORCL', 'TSLA', 'TSM']
用于创建图表和简单交互式控件的函数¶
用于创建图表和简单交互式控件的函数¶
已复制!
def create_vol_curve_chart_single(ax, as_of, uly, delta_low, delta_high):
exp = read_expiries(as_of, uly)
cmap = plt.get_cmap('rainbow', len(exp))
format_kw = {'linewidth': 2, 'alpha': 0.85}
for i, e in enumerate(exp):
vol_curve = read_vol_curve_single_expiry_exclude_itm_otm(as_of, uly, e, delta_low, delta_high)
vol_curve.plot(ax=ax, y='implied_volatility', label=e, grid=True, color=cmap(i), **format_kw)
ax.set_title(f"Option Volatility Curves for {uly}, as of {as_of}")
ax.set_ylabel("implied volatility")
ax.legend(loc='upper right', framealpha=0.7)
ax.set_facecolor('whitesmoke')
def create_vol_curve_chart(as_of1, uly1, as_of2, uly2, delta_low, delta_high):
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 10))
create_vol_curve_chart_single(ax1, as_of1, uly1, delta_low, delta_high)
create_vol_curve_chart_single(ax2, as_of2, uly2, delta_low, delta_high)
class input_widgets(object):
def __init__(self):
self.container = widgets.VBox()
self.create_widgets()
self.redraw_chart()
@property
def as_of1(self):
return self.container.children[0].children[0].value
@property
def uly1(self):
return self.container.children[0].children[1].value
@property
def as_of2(self):
return self.container.children[1].children[0].value
@property
def uly2(self):
return self.container.children[1].children[1].value
@property
def out(self):
return self.container.children[2]
def create_widgets(self):
self.as_of1_dd = widgets.Dropdown(
options=all_dates,
value=all_dates[0],
description='Date1:',
disabled=False,
)
self.as_of1_dd.observe(self._on_change, ['value'])
self.as_of2_dd = widgets.Dropdown(
options=all_dates,
value=all_dates[0],
description='Date2:',
disabled=False,
)
self.as_of2_dd.observe(self._on_change, ['value'])
self.uly1_dd = widgets.Dropdown(
options=all_uly,
value=all_uly[0],
description='Underlying1:',
disabled=False,
)
self.uly1_dd.observe(self._on_change, ['value'])
self.uly2_dd = widgets.Dropdown(
options=all_uly,
value=all_uly[1],
description='Underlying2:',
disabled=False,
)
self.uly2_dd.observe(self._on_change, ['value'])
self.output_widget = widgets.Output(layout=widgets.Layout(height='900px'))
self.container.children = [
widgets.HBox([self.as_of1_dd, self.uly1_dd]),
widgets.HBox([self.as_of2_dd, self.uly2_dd]),
self.output_widget
]
def _on_change(self, _):
self.redraw_chart()
def redraw_chart(self):
with self.output_widget:
self.output_widget.clear_output(wait=True)
create_vol_curve_chart(self.as_of1, self.uly1, self.as_of2, self.uly2, delta_low, delta_high)
plt.show()
def create_vol_curve_chart_single(ax, as_of, uly, delta_low, delta_high): exp = read_expiries(as_of, uly) cmap = plt.get_cmap('rainbow', len(exp)) format_kw = {'linewidth': 2, 'alpha': 0.85} for i, e in enumerate(exp): vol_curve = read_vol_curve_single_expiry_exclude_itm_otm(as_of, uly, e, delta_low, delta_high) vol_curve.plot(ax=ax, y='implied_volatility', label=e, grid=True, color=cmap(i), **format_kw) ax.set_title(f"{uly} 的期权波动率曲线,截至 {as_of}") ax.set_ylabel("隐含波动率") ax.legend(loc='upper right', framealpha=0.7) ax.set_facecolor('whitesmoke') def create_vol_curve_chart(as_of1, uly1, as_of2, uly2, delta_low, delta_high): fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 10)) create_vol_curve_chart_single(ax1, as_of1, uly1, delta_low, delta_high) create_vol_curve_chart_single(ax2, as_of2, uly2, delta_low, delta_high) class input_widgets(object): def __init__(self): self.container = widgets.VBox() self.create_widgets() self.redraw_chart() @property def as_of1(self): return self.container.children[0].children[0].value @property def uly1(self): return self.container.children[0].children[1].value @property def as_of2(self): return self.container.children[1].children[0].value @property def uly2(self): return self.container.children[1].children[1].value @property def out(self): return self.container.children[2] def create_widgets(self): self.as_of1_dd = widgets.Dropdown( options=all_dates, value=all_dates[0], description='日期1:', disabled=False, ) self.as_of1_dd.observe(self._on_change, ['value']) self.as_of2_dd = widgets.Dropdown( options=all_dates, value=all_dates[0], description='日期2:', disabled=False, ) self.as_of2_dd.observe(self._on_change, ['value']) self.uly1_dd = widgets.Dropdown( options=all_uly, value=all_uly[0], description='底层资产1:', disabled=False, ) self.uly1_dd.observe(self._on_change, ['value']) self.uly2_dd = widgets.Dropdown( options=all_uly, value=all_uly[1], description='底层资产2:', disabled=False, ) self.uly2_dd.observe(self._on_change, ['value']) self.output_widget = widgets.Output(layout=widgets.Layout(height='900px')) self.container.children = [ widgets.HBox([self.as_of1_dd, self.uly1_dd]), widgets.HBox([self.as_of2_dd, self.uly2_dd]), self.output_widget ] def _on_change(self, _): self.redraw_chart() def redraw_chart(self): with self.output_widget: self.output_widget.clear_output(wait=True) create_vol_curve_chart(self.as_of1, self.uly1, self.as_of2, self.uly2, delta_low, delta_high) plt.show()
用于并排查看 2 个日期/股票的交互式波动率曲线¶
- 点击下拉菜单,图表将更新
- 数据是在图表创建过程中从 ArcticDB 读取的
输入 [17]
已复制!
w = input_widgets()
w.container
w = input_widgets() w.container
输出 [17]
VBox(children=(HBox(children=(Dropdown(description='Date1:', options=('2013-06-03', '2013-06-10', '2013-06-17'…
对于 Notebook 预览,图表和控件将如下图所示
结论¶
- 我们展示了一个简单的期权数据捕获和存储数据管道
- 我们创建了一系列有用的数据查询
- 这些查询用于构建交互式图表
- 结果是一个简单的期权波动率曲线可视化浏览器
- 在实际系统中,数据收集和存储可能与可视化是独立的过程
- ArcticDB 的简洁性和灵活性使得数据处理变得容易,相比之下,让图表和交互式控件看起来美观反而更具挑战性。