请问,计算指标通过expression filter筛选调出功能如何实现?(貌似现在只有raw data 通过D.instrument才可以调出,计算指标只有通过fetch筛选时间、股票名称),然后再用df的普通方法筛选,效率低。
from qlib.data.filter import ExpressionDFilter
filters = [
ExpressionDFilter("Mean(Ref($close, 1)/$close - 1, 30) > 0.01"),
]
inst = D.instruments("all", filter_pipe=filters)
df = D.features(inst, ["$close", "$volume"])
import qlib
from qlib.data import D
qlib.init(provider_uri ='C:/stockbase/Astocks')
instruments = ['sh600030', 'sh600000']
fields = ['$preclose']
features_df = D.features(instruments, fields, start_time='2016-12-08', end_time='2016-12-18', freq='day')
File "C:\Python39\lib\runpy.py", line 197, in _run_module_as_main
return _run_code(code, main_globals, None,
File "C:\Python39\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "c:\Users\kinger.vscode\extensions\ms-python.python-2021.12.1559732655\pythonFiles\lib\python\debugpy__main.py", line 45, in <module>
cli.main()
File "c:\Users\kinger.vscode\extensions\ms-python.python-2021.12.1559732655\pythonFiles\lib\python\debugpy/..\debugpy\server\cli.py", line 444, in main
run()
File "c:\Users\kinger.vscode\extensions\ms-python.python-2021.12.1559732655\pythonFiles\lib\python\debugpy/..\debugpy\server\cli.py", line 285, in run_file
runpy.run_path(target_as_str, run_name=compat.force_str("main"))
File "C:\Python39\lib\runpy.py", line 268, in run_path
return _run_module_code(code, init_globals, run_name,
File "C:\Python39\lib\site-packages\qlib\data\data.py", line 1036, in features
return DatasetD.dataset(instruments, fields, start_time, end_time, freq, inst_processors=inst_processors)
File "C:\Python39\lib\site-packages\qlib\data\data.py", line 771, in dataset
data = self.dataset_processor(
File "C:\Python39\lib\site-packages\qlib\data\data.py", line 554, in dataset_processor
ParallelExt(n_jobs=workers, backend=C.joblib_backend, maxtasksperchild=C.maxtasksperchild)(task_l),
File "C:\Python39\lib\site-packages\joblib\parallel.py", line 966, in call
n_jobs = self._initialize_backend()
File "C:\Python39\lib\site-packages\joblib\parallel.py", line 733, in _initialize_backend
n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self,
File "C:\Python39\lib\site-packages\joblib_parallel_backends.py", line 470, in configure
self._pool = MemmappingPool(n_jobs, **memmappingpool_args)
File "C:\Python39\lib\site-packages\joblib\pool.py", line 303, in init
manager = TemporaryResourcesManager(temp_folder)
File "C:\Python39\lib\site-packages\joblib_memmapping_reducer.py", line 531, in init
self.set_current_context(context_id)
File "C:\Python39\lib\site-packages\joblib_memmapping_reducer.py", line 535, in set_current_context
self.register_new_context(context_id)
File "C:\Python39\lib\site-packages\joblib_memmapping_reducer.py", line 560, in register_new_context
self.register_folder_finalizer(new_folder_path, context_id)
File "C:\Python39\lib\site-packages\joblib_memmapping_reducer.py", line 590, in register_folder_finalizer
resource_tracker.register(pool_subfolder, "folder")
File "C:\Python39\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py", line 191, in register
self._send('REGISTER', name, rtype)
File "C:\Python39\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py", line 204, in _send
msg = '{0}:{1}:{2}\n'.format(cmd, name, rtype).encode('ascii')
UnicodeEncodeError: 'ascii' codec can't encode characters in position 18-19: ordinal not in range(128)
Exception ignored in: <function Pool.del at 0x00000245E12561F0>
Traceback (most recent call last):
File "C:\Python39\lib\multiprocessing\pool.py", line 264, in del__
if self._state == RUN:
AttributeError: 'MemmappingPool' object has no attribute '_state'
qlib初始化是正常的,单个股票也是能读取数据的,但是当读取多个股票时就报错,问题可能处在哪里?
import qlib
from qlib.data import D
qlib.init_from_yaml_conf("qlib_clinet_config.yaml")
D.calendar(start_time='2010-01-01', end_time='2021-12-30', freq='day') # 报类型错误异常
Newbie here!
Dumb question ;-;
I’m running the google colab example using the LGBModel. And I’ve obtained as a result several charts as presented in the documentation, but I couldn’t fully understand the results collected. For this first chart, sent above, I’ve two doubts.
1) According to the documentation, "cum return wo cost” stands for "Cumulative returns series of portfolio without cost”, but what is this “cost”? The cost to purchase the stock?
2) The orange and green charts are the cumulative returns series created using the LGBModel? Therefore, the idea of this chart is to compare the portfolio created with LGBModel to the benchmark represented in blue?
dump_bin 在处理日期的时候可能有bug,csv文件中存储的是周线数据,数据文件的日期时间字段也是按周记录的。如下:
4,2021-12-10,4909.43,5055.12,5114.50,4888.69,924423634,3.14,154.10,000300,沪深300,5055.12
3,2021-12-17,5090.02,4954.76,5143.84,4954.76,814498404,-1.99,-100.36,000300,沪深300,4954.76
2,2021-12-24,4934.97,4921.34,4971.67,4872.68,728501059,-0.67,-33.42,000300,沪深300,4921.34
1,2021-12-31,4918.15,4940.37,4959.14,4878.65,611076626,0.39,19.03,000300,沪深300,4940.37
0,2022-01-04,4957.98,4917.77,4961.45,4874.53,151534776,-0.46,-22.60,000300,沪深300,4917.77
生成calender数据如下:
2021-12-27 00:00:00
2021-12-28 00:00:00
2021-12-29 00:00:00
2021-12-30 00:00:00
2021-12-31 00:00:00
2022-01-04 00:00:00
执行查询时
instruments = ['SH000300']
fields = ['$close', '$volume', 'Ref($close, 1)', 'Mean($close, 3)', '$high-$low']
D.features(instruments, fields, start_time='2021-12-25', end_time='2022-01-05',freq='week')
返回的数据出现很多空值记录行,处理起来不方便
dump_bin 在处理日期的时候可能有bug,csv文件中存储的是周线数据,数据文件的日期时间字段也是按周记录的。如下:
4,2021-12-10,4909.43,5055.12,5114.50,4888.69,924423634,3.14,154.10,000300,沪深300,5055.12 3,2021-12-17,5090.02,4954.76,5143.84,4954.76,814498404,-1.99,-100.36,000300,沪深300,4954.76 2,2021-12-24,4934.97,4921.34,4971.67,4872.68,728501059,-0.67,-33.42,000300,沪深300,4921.34 1,2021-12-31,4918.15,4940.37,4959.14,4878.65,611076626,0.39,19.03,000300,沪深300,4940.37 0,2022-01-04,4957.98,4917.77,4961.45,4874.53,151534776,-0.46,-22.60,000300,沪深300,4917.77
生成calender数据如下:
2021-12-27 00:00:00 2021-12-28 00:00:00 2021-12-29 00:00:00 2021-12-30 00:00:00 2021-12-31 00:00:00 2022-01-04 00:00:00
执行查询时
instruments = ['SH000300'] fields = ['$close', '$volume', 'Ref($close, 1)', 'Mean($close, 3)', '$high-$low'] D.features(instruments, fields, start_time='2021-12-25', end_time='2022-01-05',freq='week')
返回的数据出现很多空值记录行,处理起来不方便
@zhupr 这个问题请老师看下
@zhupr 请老师看下
执行 python collector.py --index_name CSI100 --qlib_dir K:/stock/qlib-data/cn_data_dump --method parse_instruments报错
2022-01-06 10:21:43.554 | INFO | data_collector.cn_index.collector:_read_change_from_url:259 - get 2015-06-15 00:00:00 changes from excel, title=关于调整沪深300和中证香港100等指数样本股的公告, excel_url=http://www.csindex.com.cn/file/20150529ch.xls
2022-01-06 10:21:43.989 | WARNING | data_collector.cn_index.collector:_read_change_from_url:263 - error downloading file: http://www.csindex.com.cn/file/20150529ch.xls, will parse the table from the content
Traceback (most recent call last):
File "K:\stock\qlib\scripts\data_collector\cn_index\collector.py", line 261, in _read_change_from_url
df = self._parse_excel(excel_url, add_date, remove_date)
File "K:\stock\qlib\scripts\data_collector\cn_index\collector.py", line 163, in _parse_excel
df_map = pd.read_excel(_io, sheet_name=None)
File "K:\stock\qlib-venv\lib\site-packages\pandas\util_decorators.py", line 311, in wrapper
return func(args, *kwargs)
File "K:\stock\qlib-venv\lib\site-packages\pandas\io\excel_base.py", line 364, in read_excel
io = ExcelFile(io, storage_options=storage_options, engine=engine)
File "K:\stock\qlib-venv\lib\site-packages\pandas\io\excel_base.py", line 1195, in init
raise ValueError(
ValueError: Excel file format cannot be determined, you must specify an engine manually.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "collector.py", line 386, in <module>
fire.Fire(get_instruments)
File "K:\stock\qlib-venv\lib\site-packages\fire\core.py", line 141, in Fire
component_trace = _Fire(component, args, parsed_flag_args, context, name)
File "K:\stock\qlib-venv\lib\site-packages\fire\core.py", line 466, in _Fire
component, remaining_args = _CallAndUpdateTrace(
File "K:\stock\qlib-venv\lib\site-packages\fire\core.py", line 681, in _CallAndUpdateTrace
component = fn(varargs, *kwargs)
File "collector.py", line 382, in get_instruments
getattr(obj, method)()
File "K:\stock\qlib\scripts\data_collector\index.py", line 213, in parse_instruments
changers_df = self.get_changes()
File "K:\stock\qlib\scripts\data_collector\cn_index\collector.py", line 138, in get_changes
_df = self._read_change_from_url(_url)
File "K:\stock\qlib\scripts\data_collector\cn_index\collector.py", line 264, in _read_change_from_url
df = self._parse_table(_text, add_date, remove_date)
File "K:\stock\qlib\scripts\data_collector\cn_index\collector.py", line 195, in _parse_table
_tmp_df[self.SYMBOL_FIELD_NAME] = _s.map(self.normalize_symbol)
File "K:\stock\qlib-venv\lib\site-packages\pandas\core\series.py", line 4161, in map
new_values = super()._map_values(arg, na_action=na_action)
File "K:\stock\qlib-venv\lib\site-packages\pandas\core\base.py", line 870, in _map_values
new_values = map_f(values, mapper)
File "pandas_libs\lib.pyx", line 2859, in pandas._libs.lib.map_infer
File "K:\stock\qlib\scripts\data_collector\cn_index\collector.py", line 157, in normalize_symbol
symbol = f"{int(symbol):06}"
ValueError: invalid literal for int() with base 10: '0069.HK'