[ts-gen] Database for tick data -- part II
R P Herrold
herrold at owlriver.com
Tue Oct 9 16:44:36 EDT 2007
> On Tue, 9 Oct 2007, an anonymous poster wrote:
>
>> Anyhow, I was hoping that the historybar could store the
>> current flow of incoming ticks.
Attached is code which I last used and maintained six months
ago, to 'eat' raw 'tick' Market Data output, as then emitted
by the version of the shim then released by us. It did
parsing of the output of the shim, in real time, and produced
some summarization of tick data into a more useful form to our
research needs, including an insert into a MySQL database (the
table layout of which is documented inline).
Once we had History retrieval working, I abandoned the code.
But it _did_ do tick inserts, with a little bit of 'cooking'
into a MySQL database.
-- Russ Herrold
#!/usr/bin/php -qc/etc
<?php
//
// eat_raw-database.php
//
// Copyright 2005 R P Herrold
// License: GPL v2
// info#owlriver.com
//
// master at: centos-4:~/shim/eat_raw-database.php
//
// experiment with adding summarized ticks to 'ticker'
// database in table digest
//
$version = "0.08-070201";
//
// 0.08-070201 - support directory relocations
// 0.07 060412 - support new pipe seperator
// 0.06 060220 - doco t or p and schema
// 0.05-060216 - support index, futures
// 0.04 - forks eat_raw.php at 0.03-050922
// 0.03-050922 add a test for non-rolling seconds,
// start doco on layout
// 0.02-050912 accomodate new seconds format
//
set_time_limit(0);
//
include("/home/herrold/shim/php/database_keying.php");
include("/home/herrold/shim/php/database_open.php");
//
// $inf = "/var/log/messages";
$inf = "/var/tmp/pipe";
// $inf = "/tmp/tester.txt";
//
$fh = fopen($inf,"r");
//
$first = array();
//
$valid_sec = array();
$valid_sec[] = "STK";
$valid_sec[] = "FUT";
$valid_sec[] = "IND";
//
$prior_minute = 0 ;
$exch_open = "n";
$year = date("Y");
$month = date("m");
$day = date("d");
$doyear = date("z");
$domonth = $day ;
$doweek = date("w");
//
//
while($str = fgets($fh,4096)) {
//
// Note that the following sample is NOT a price record
//
// one Sep 22 12
// two :11
// three :24 xeon
// shim : shim
// five :tws
// six :0.02
// shim_pid : 3692
// seconds :43883
// nine :191323950
// ten :2
// eleven :2
// twelve :3
// thirteen :0
// fourteen :1000000005
// price : 1
// volume :0
// seventeen :tick.size.ask.
// transtype :STK.SMART.DIS.
// tuple :
//
// Sep 22 12:11:24 xeon : shim:tws:0.02: 3692:43884:
// 191327670:1:1:2:1:1000000020: 17.74:
// 51:0:tick.price.outcry.ask. :STK.SMART.CSCO.:
//
////////////////////////////////////////////////////////////
//
// this IS a price record
//
// one Sep 22 12
// two :18
// three :48 xeon
// shim : shim
// five :tws
// six :0.02
// shim_pid : 3692
// seconds :44327
// nine :634729770
// ten :1
// eleven :1
// twelve :2
// thirteen :1
// fourteen :1000000007
// price : 38.06
// volume : 10
// seventeen :0
// transtype :tick.price.outcry.ask.
// tuple :STK.SMART.HD.
// (spare) :
//
// Sep 22 12:18:48 xeon : shim:tws:0.02: 3692:44327:
// 634729770:1:1:2:1:1000000007: 38.06:
// 10:0:tick.price.outcry.ask. :STK.SMART.HD.:
//
list($one,$two,$three,$rest) = explode(":",$str);
list($shim,$five,$six,$shim_pid,$seconds,$nine,
$ten,$eleven,$twelve,$thirteen,$fourteen,$price,$volume,
$seventeen,$transtype,$tuple) = explode("|",$rest);
//
// validate that we are looking at a data record
$shim = trim($shim);
list($sec_type,$exchange,$symbol) = explode(".",$tuple);
$sec_type = trim($sec_type);
$exchange = trim($exchange);
$symbol = trim($symbol);
if ("$shim" == "shim" && ( in_array($sec_type,$valid_sec))) {
//
$doy = substr($one,0,6);
$volume = trim($volume);
//
//
// we deal in minutes rather than thousandth-seconds
// $seconds = substr($seconds,0,(strlen($seconds)-3));
//
if (6 == 9 ) {
$print_line = "$str \n";
$print_line .= "Seconds: \t $seconds \n";
$formatted = sprintf("%01.2f", $price);
$print_line .= "Price: \t $formatted \n";
$print_line .= "Volume: \t $volume \n";
$transtype = trim($transtype);
$print_line .= "Transtype: \t -|$transtype|- \n";
$print_line .= "Tuple: \t $tuple \n";
$print_line .= "Symbol: \t $symbol \n";
$print_line .= "\n";
print $print_line ;
}
//
// Dump the prior minute when it rolls
//
// we roll it out as a $symbol, and add it in as a $stock
//
$minute = (int) ( $seconds / 60 ) ;
if ($minute != $prior_minute) {
$hour = (int) ($prior_minute / 60) ;
$minute_of_hour = $prior_minute - ($hour * 60) ;
$print_line = "# eat_raw-database.php " . " " . $version . " ";
$print_line .= $year . " " . $month . " " . $day . " ";
$print_line .= $hour . " " . $minute_of_hour . " \n";
print $print_line ;
$print_decade = "";
if ($minute_of_hour < 10) {
$print_decade = "0";
}
asort($first);
//
$insert = "";
$insert .= "insert into digest (date,time,year,month,doyear,";
$insert .= "domonth,doweek,moday,";
$insert .= "summarization_type,modulo,mkt_open,";
$insert .= "symbol,sec_type,ticks,";
$insert .= "price_open,price_high,price_low,price_close,volume,";
$insert .= "price_vwap,price_prior_close,t_p,active) ";
//
// as of 060220
$schema = "
mysql> describe digest ;
+--------------------+------------+------+-----+------------+----------------+
| Field | Type | Null | Key | Default | Extra |
+--------------------+------------+------+-----+------------+----------------+
| uid | int(11) | NO | PRI | NULL | auto_increment |
| date | date | NO | | 0000-00-00 | |
| time | time | NO | | 00:00:00 | |
| year | int(11) | NO | | 0 | |
| month | int(11) | NO | | 0 | |
| doyear | int(11) | NO | | 0 | |
- day of year
| domonth | int(11) | NO | | 0 | |
- day of month
| doweek | int(11) | NO | | 0 | |
- day of week
| moday | int(11) | NO | | 0 | |
- minute of the day
| summarization_type | text | NO | | | |
- 1m 5m 15m .... alter to conform to IB data farms
| modulo | int(11) | YES | | NULL | |
- the 'phase' of the summarization_type modulo N
| mkt_open | tinyint(1) | YES | | NULL | |
- 1 or 0
| symbol | text | NO | | | |
| sec_type | text | YES | | NULL | |
| ticks | int(11) | YES | | NULL | |
- number of ticks observed
| price_open | float | YES | | NULL | |
| price_high | float | YES | | NULL | |
| price_low | float | YES | | NULL | |
| price_close | float | YES | | NULL | |
| volume | float | YES | | NULL | |
| price_vwap | float | YES | | NULL | |
| price_prior_close | float | YES | | NULL | |
| t_p | text | YES | | NULL | |
- temporary or permanent data 't' or 'p'
| active | tinyint(1) | YES | | NULL | |
+--------------------+------------+------+-----+------------+----------------+
";
//
$line = "";
foreach($first as $tuple => $nonce) {
list($sec_type,$exchange,$symbol) = explode(".",$tuple);
$sec_type = trim($sec_type);
$exchange = trim($exchange);
$symbol = trim($symbol);
//
// all but the FIRST appearance need a comma separator
if ("$line" != "") {
$line .= ",";
} else {
$line .= "VALUES";
}
$line .= "(";
$line .= "'" . $year . "-" . $month . "-" . $day . "',";
$line .= "'" . $hour . ":" . $print_decade . $minute_of_hour. ":00',";
$line .= "'" . $year . "',";
$line .= "'" . $month . "',";
$line .= "'" . $doyear . "',";
$line .= "'" . $domonth . "',";
$line .= "'" . $doweek . "',";
$line .= "'" . $prior_minute . "',";
$line .= "'1min',";
$line .= "'0',";
if ("$exch_open" == "y") {
$line .= "'1',";
} else {
$line .= "'0',";
}
$line .= "'" . $symbol . "',";
$line .= "'" . $sec_type . "',";
//
$ticks = $cluster[$tuple];
$line .= "'" . $ticks . "',";
//
$pad ="";
if (strlen($symbol) < 6) {
$pad .= " ";
}
if (strlen($symbol) < 5) {
$pad .= " ";
}
if (strlen($symbol) < 4) {
$pad .= " ";
}
if (strlen($symbol) < 3) {
$pad .= " ";
}
if (strlen($symbol) < 2) {
$pad .= " ";
}
//
//
$print_line = "$doy $hour:$print_decade$minute_of_hour ";
$print_line .= "$prior_minute $exch_open $symbol$pad $ticks ";
//
$formatted = sprintf("%01.2f", $open[$tuple]);
$print_line .= "$formatted ";
$line .= "'" . $formatted . "',";
$formatted = sprintf("%01.2f", $high[$tuple]);
$print_line .= "$formatted ";
$line .= "'" . $formatted . "',";
$formatted = sprintf("%01.2f", $low[$tuple]);
$print_line .= "$formatted ";
$line .= "'" . $formatted . "',";
$formatted = sprintf("%01.2f", $close[$tuple]);
$print_line .= "$formatted ";
$line .= "'" . $formatted . "',";
//
$tv = $tick_volume[$tuple];
$pad ="";
if (strlen($tv) < 6) {
$pad .= " ";
}
if (strlen($tv) < 5) {
$pad .= " ";
}
if (strlen($tv) < 4) {
$pad .= " ";
}
if (strlen($tv) < 3) {
$pad .= " ";
}
if (strlen($tv) < 2) {
$pad .= " ";
}
//
$print_line .= $pad . $tv . " ";
$line .= "'" . $tv . "',";
if ($tv > 0) {
$symbol_vwap = (int) ($vwap[$tuple] * 100 / $tv) ;
} else {
$symbol_vwap = 0;
}
$symbol_vwap = $symbol_vwap / 100 ;
$formatted = sprintf("%01.2f", $symbol_vwap);
if ($formatted == 0.00) {
$formatted = sprintf("%01.2f", $open[$tuple]);
}
$print_line .= "$formatted ";
$line .= "'" . $formatted . "',";
//
// todo: TBD: prior_close
$line .= "'" . "',";
// t_p
$line .= "'" . "t" ."',";
// active
$line .= "'" . "1" ."')";
//
// all done with the per symbol print part
print $print_line . "\n" ;
//
}
if ("$line" != "") {
$line .= ";";
$insert .= $line ;
// print "$insert \n";
$result = mysql_query($insert);
if (!$result) {
die('Invalid query: ' . mysql_error());
}
}
//
// update AFTER we do each insert as we are less time criical
// and also accurate then
$year = date("Y");
$month = date("m");
$day = date("d");
//
$doyear = date("z");
$domonth = $day ;
$doweek = date("w");
//
//////////////////////////////////////////////////////////////////
//
$prior_minute = $minute ;
unset($first, $cluster, $open, $high, $low, $close, $tick_volume, $vwap);
// first is the first occurrence of a stock
// cluster is the non-zero volume appearances
$first = array();
$cluster = array();
$open = array();
$high = array();
$low = array();
$close = array();
$tick_volume = array();
$vwap = array();
}
//
// Done dumping the prior minute and resetting the counters
$exch_open = "n";
// 9:30 is 570 minutes; 1600 is 960 minutes
if ($minute > 569 && $minute < 961) {
$exch_open = "y";
}
$first[$tuple] += 1 ;
if ($first[$tuple] < 2) {
$cluster[$tuple] = 0 ;
$open[$tuple] = (float) $price ;
$high[$tuple] = (float) $price ;
$low[$tuple] = (float) $price ;
$close[$tuple] = (float) $price ;
$tick_volume[$tuple] = (int) 0 ;
$vwap[$tuple] = (float) 0.00 ;
}
//
if ($volume > 0) {
$cluster[$tuple] += 1;
if ($price > $high[$tuple]) {
$high[$tuple] = (float) $price ;
}
if ($price < $low[$tuple]) {
$low[$tuple] = (float) $price ;
}
$close[$tuple] = (float) $price ;
$tick_volume[$tuple] += $volume ;
$vwap[$tuple] += $price * $volume ;
}
//
//
}
}
fclose($fh);
?>
-------------- next part --------------
#!/usr/bin/php -qc/etc
<?php
//
// eat_raw-database.php
//
// Copyright 2005 R P Herrold
// License: GPL v2
// info#owlriver.com
//
// master at: centos-4:~/shim/eat_raw-database.php
//
// experiment with adding summarized ticks to 'ticker'
// database in table digest
//
$version = "0.08-070201";
//
// 0.08-070201 - support directory relocations
// 0.07 060412 - support new pipe seperator
// 0.06 060220 - doco t or p and schema
// 0.05-060216 - support index, futures
// 0.04 - forks eat_raw.php at 0.03-050922
// 0.03-050922 add a test for non-rolling seconds,
// start doco on layout
// 0.02-050912 accomodate new seconds format
//
set_time_limit(0);
//
include("/home/herrold/shim/php/database_keying.php");
include("/home/herrold/shim/php/database_open.php");
//
// $inf = "/var/log/messages";
$inf = "/var/tmp/pipe";
// $inf = "/tmp/tester.txt";
//
$fh = fopen($inf,"r");
//
$first = array();
//
$valid_sec = array();
$valid_sec[] = "STK";
$valid_sec[] = "FUT";
$valid_sec[] = "IND";
//
$prior_minute = 0 ;
$exch_open = "n";
$year = date("Y");
$month = date("m");
$day = date("d");
$doyear = date("z");
$domonth = $day ;
$doweek = date("w");
//
//
while($str = fgets($fh,4096)) {
//
// Note that the following sample is NOT a price record
//
// one Sep 22 12
// two :11
// three :24 xeon
// shim : shim
// five :tws
// six :0.02
// shim_pid : 3692
// seconds :43883
// nine :191323950
// ten :2
// eleven :2
// twelve :3
// thirteen :0
// fourteen :1000000005
// price : 1
// volume :0
// seventeen :tick.size.ask.
// transtype :STK.SMART.DIS.
// tuple :
//
// Sep 22 12:11:24 xeon : shim:tws:0.02: 3692:43884:
// 191327670:1:1:2:1:1000000020: 17.74:
// 51:0:tick.price.outcry.ask. :STK.SMART.CSCO.:
//
////////////////////////////////////////////////////////////
//
// this IS a price record
//
// one Sep 22 12
// two :18
// three :48 xeon
// shim : shim
// five :tws
// six :0.02
// shim_pid : 3692
// seconds :44327
// nine :634729770
// ten :1
// eleven :1
// twelve :2
// thirteen :1
// fourteen :1000000007
// price : 38.06
// volume : 10
// seventeen :0
// transtype :tick.price.outcry.ask.
// tuple :STK.SMART.HD.
// (spare) :
//
// Sep 22 12:18:48 xeon : shim:tws:0.02: 3692:44327:
// 634729770:1:1:2:1:1000000007: 38.06:
// 10:0:tick.price.outcry.ask. :STK.SMART.HD.:
//
list($one,$two,$three,$rest) = explode(":",$str);
list($shim,$five,$six,$shim_pid,$seconds,$nine,
$ten,$eleven,$twelve,$thirteen,$fourteen,$price,$volume,
$seventeen,$transtype,$tuple) = explode("|",$rest);
//
// validate that we are looking at a data record
$shim = trim($shim);
list($sec_type,$exchange,$symbol) = explode(".",$tuple);
$sec_type = trim($sec_type);
$exchange = trim($exchange);
$symbol = trim($symbol);
if ("$shim" == "shim" && ( in_array($sec_type,$valid_sec))) {
//
$doy = substr($one,0,6);
$volume = trim($volume);
//
//
// we deal in minutes rather than thousandth-seconds
// $seconds = substr($seconds,0,(strlen($seconds)-3));
//
if (6 == 9 ) {
$print_line = "$str \n";
$print_line .= "Seconds: \t $seconds \n";
$formatted = sprintf("%01.2f", $price);
$print_line .= "Price: \t $formatted \n";
$print_line .= "Volume: \t $volume \n";
$transtype = trim($transtype);
$print_line .= "Transtype: \t -|$transtype|- \n";
$print_line .= "Tuple: \t $tuple \n";
$print_line .= "Symbol: \t $symbol \n";
$print_line .= "\n";
print $print_line ;
}
//
// Dump the prior minute when it rolls
//
// we roll it out as a $symbol, and add it in as a $stock
//
$minute = (int) ( $seconds / 60 ) ;
if ($minute != $prior_minute) {
$hour = (int) ($prior_minute / 60) ;
$minute_of_hour = $prior_minute - ($hour * 60) ;
$print_line = "# eat_raw-database.php " . " " . $version . " ";
$print_line .= $year . " " . $month . " " . $day . " ";
$print_line .= $hour . " " . $minute_of_hour . " \n";
print $print_line ;
$print_decade = "";
if ($minute_of_hour < 10) {
$print_decade = "0";
}
asort($first);
//
$insert = "";
$insert .= "insert into digest (date,time,year,month,doyear,";
$insert .= "domonth,doweek,moday,";
$insert .= "summarization_type,modulo,mkt_open,";
$insert .= "symbol,sec_type,ticks,";
$insert .= "price_open,price_high,price_low,price_close,volume,";
$insert .= "price_vwap,price_prior_close,t_p,active) ";
//
// as of 060220
$schema = "
mysql> describe digest ;
+--------------------+------------+------+-----+------------+----------------+
| Field | Type | Null | Key | Default | Extra |
+--------------------+------------+------+-----+------------+----------------+
| uid | int(11) | NO | PRI | NULL | auto_increment |
| date | date | NO | | 0000-00-00 | |
| time | time | NO | | 00:00:00 | |
| year | int(11) | NO | | 0 | |
| month | int(11) | NO | | 0 | |
| doyear | int(11) | NO | | 0 | |
- day of year
| domonth | int(11) | NO | | 0 | |
- day of month
| doweek | int(11) | NO | | 0 | |
- day of week
| moday | int(11) | NO | | 0 | |
- minute of the day
| summarization_type | text | NO | | | |
- 1m 5m 15m .... alter to conform to IB data farms
| modulo | int(11) | YES | | NULL | |
- the 'phase' of the summarization_type modulo N
| mkt_open | tinyint(1) | YES | | NULL | |
- 1 or 0
| symbol | text | NO | | | |
| sec_type | text | YES | | NULL | |
| ticks | int(11) | YES | | NULL | |
- number of ticks observed
| price_open | float | YES | | NULL | |
| price_high | float | YES | | NULL | |
| price_low | float | YES | | NULL | |
| price_close | float | YES | | NULL | |
| volume | float | YES | | NULL | |
| price_vwap | float | YES | | NULL | |
| price_prior_close | float | YES | | NULL | |
| t_p | text | YES | | NULL | |
- temporary or permanent data 't' or 'p'
| active | tinyint(1) | YES | | NULL | |
+--------------------+------------+------+-----+------------+----------------+
";
//
$line = "";
foreach($first as $tuple => $nonce) {
list($sec_type,$exchange,$symbol) = explode(".",$tuple);
$sec_type = trim($sec_type);
$exchange = trim($exchange);
$symbol = trim($symbol);
//
// all but the FIRST appearance need a comma separator
if ("$line" != "") {
$line .= ",";
} else {
$line .= "VALUES";
}
$line .= "(";
$line .= "'" . $year . "-" . $month . "-" . $day . "',";
$line .= "'" . $hour . ":" . $print_decade . $minute_of_hour. ":00',";
$line .= "'" . $year . "',";
$line .= "'" . $month . "',";
$line .= "'" . $doyear . "',";
$line .= "'" . $domonth . "',";
$line .= "'" . $doweek . "',";
$line .= "'" . $prior_minute . "',";
$line .= "'1min',";
$line .= "'0',";
if ("$exch_open" == "y") {
$line .= "'1',";
} else {
$line .= "'0',";
}
$line .= "'" . $symbol . "',";
$line .= "'" . $sec_type . "',";
//
$ticks = $cluster[$tuple];
$line .= "'" . $ticks . "',";
//
$pad ="";
if (strlen($symbol) < 6) {
$pad .= " ";
}
if (strlen($symbol) < 5) {
$pad .= " ";
}
if (strlen($symbol) < 4) {
$pad .= " ";
}
if (strlen($symbol) < 3) {
$pad .= " ";
}
if (strlen($symbol) < 2) {
$pad .= " ";
}
//
//
$print_line = "$doy $hour:$print_decade$minute_of_hour ";
$print_line .= "$prior_minute $exch_open $symbol$pad $ticks ";
//
$formatted = sprintf("%01.2f", $open[$tuple]);
$print_line .= "$formatted ";
$line .= "'" . $formatted . "',";
$formatted = sprintf("%01.2f", $high[$tuple]);
$print_line .= "$formatted ";
$line .= "'" . $formatted . "',";
$formatted = sprintf("%01.2f", $low[$tuple]);
$print_line .= "$formatted ";
$line .= "'" . $formatted . "',";
$formatted = sprintf("%01.2f", $close[$tuple]);
$print_line .= "$formatted ";
$line .= "'" . $formatted . "',";
//
$tv = $tick_volume[$tuple];
$pad ="";
if (strlen($tv) < 6) {
$pad .= " ";
}
if (strlen($tv) < 5) {
$pad .= " ";
}
if (strlen($tv) < 4) {
$pad .= " ";
}
if (strlen($tv) < 3) {
$pad .= " ";
}
if (strlen($tv) < 2) {
$pad .= " ";
}
//
$print_line .= $pad . $tv . " ";
$line .= "'" . $tv . "',";
if ($tv > 0) {
$symbol_vwap = (int) ($vwap[$tuple] * 100 / $tv) ;
} else {
$symbol_vwap = 0;
}
$symbol_vwap = $symbol_vwap / 100 ;
$formatted = sprintf("%01.2f", $symbol_vwap);
if ($formatted == 0.00) {
$formatted = sprintf("%01.2f", $open[$tuple]);
}
$print_line .= "$formatted ";
$line .= "'" . $formatted . "',";
//
// todo: TBD: prior_close
$line .= "'" . "',";
// t_p
$line .= "'" . "t" ."',";
// active
$line .= "'" . "1" ."')";
//
// all done with the per symbol print part
print $print_line . "\n" ;
//
}
if ("$line" != "") {
$line .= ";";
$insert .= $line ;
// print "$insert \n";
$result = mysql_query($insert);
if (!$result) {
die('Invalid query: ' . mysql_error());
}
}
//
// update AFTER we do each insert as we are less time criical
// and also accurate then
$year = date("Y");
$month = date("m");
$day = date("d");
//
$doyear = date("z");
$domonth = $day ;
$doweek = date("w");
//
//////////////////////////////////////////////////////////////////
//
$prior_minute = $minute ;
unset($first, $cluster, $open, $high, $low, $close, $tick_volume, $vwap);
// first is the first occurrence of a stock
// cluster is the non-zero volume appearances
$first = array();
$cluster = array();
$open = array();
$high = array();
$low = array();
$close = array();
$tick_volume = array();
$vwap = array();
}
//
// Done dumping the prior minute and resetting the counters
$exch_open = "n";
// 9:30 is 570 minutes; 1600 is 960 minutes
if ($minute > 569 && $minute < 961) {
$exch_open = "y";
}
$first[$tuple] += 1 ;
if ($first[$tuple] < 2) {
$cluster[$tuple] = 0 ;
$open[$tuple] = (float) $price ;
$high[$tuple] = (float) $price ;
$low[$tuple] = (float) $price ;
$close[$tuple] = (float) $price ;
$tick_volume[$tuple] = (int) 0 ;
$vwap[$tuple] = (float) 0.00 ;
}
//
if ($volume > 0) {
$cluster[$tuple] += 1;
if ($price > $high[$tuple]) {
$high[$tuple] = (float) $price ;
}
if ($price < $low[$tuple]) {
$low[$tuple] = (float) $price ;
}
$close[$tuple] = (float) $price ;
$tick_volume[$tuple] += $volume ;
$vwap[$tuple] += $price * $volume ;
}
//
//
}
}
fclose($fh);
?>
More information about the ts-general
mailing list