diff options
author | Jay Vyas <bigpetstore@Jays-MacBook-Air.local> | 2014-05-31 17:51:43 -0400 |
---|---|---|
committer | Jay Vyas <jay@apache.org> | 2014-06-03 16:53:44 -0700 |
commit | ab12e7b800cddef088049cf5652673908356896e (patch) | |
tree | ad5a99b95b94385daa141ce2eb5e94b810558b78 /bigtop-bigpetstore | |
parent | 71b61d899d42b04e17966528026ca287143c9a7b (diff) |
BIGTOP-1327: Update archtecture to reflect the new pig,mahout architecture
Signed-off-by: Jay Vyas <jay@apache.org>
Diffstat (limited to 'bigtop-bigpetstore')
-rw-r--r-- | bigtop-bigpetstore/arch.dot | 24 |
1 files changed, 11 insertions, 13 deletions
diff --git a/bigtop-bigpetstore/arch.dot b/bigtop-bigpetstore/arch.dot index 4eb8ac4d..0f3f4041 100644 --- a/bigtop-bigpetstore/arch.dot +++ b/bigtop-bigpetstore/arch.dot @@ -17,28 +17,26 @@ digraph bigpetstore { node [shape=record]; - PIG_ANALYTICS [label="PIG_ANALYTICS|Unstructured-unsupported-pigscripts| pig_ad_hoc(0-n)"]; + PROD_And_USER_HASH_FUNC [label="python or datafu udf" ,style="rounded,filled", shape=diamond]; CUSTOMER_PAGE [label="CUSTOMER_PAGE|json|CUSTOMER_PAGE/part*"]; DIRTY_CSV [label="DIRTY_CSV|fname lname -prod , price ,prod,..|generated/part*"]; CSV [label="CSV|fname,lname,prod,price,date,xcoord,ycoord,...|cleaned/part*"]; MAHOUT_VIEW_INPUT [label="MAHOUT_VIEW | (hashed name) 10001, (hashed purchases) 203 | <hive_warehouse>/mahout_cf_in/part*" ]; - MAHOUT_CF [label="MAHOUT_CF | (hashed name) 10001, (hashed product) 201, .6 | mahout_cf_out/part*" ]; + MAHOUT_CF [label="MAHOUT collaborative filter output | (hashed name) 10001, (hashed product) 201, .6 | mahout_cf_out/part*" ]; Generate -> DIRTY_CSV [label="hadoop jar bigpetstore.jar org.bigtop.bigpetstore.generator.BPSGenerator 100 bps/generated/"] ; DIRTY_CSV -> pig [label=""]; - pig -> CSV [label="hadoop jar bigpetstore.jar org.bigtop.bigpetstore.etl.PigCSVCleaner bps/generated/ bps/cleaned/"]; - pig -> PIG_ANALYTICS [label="same as CSV job, but add your scripts to end... p1.pig p2.pig ..."]; - PIG_ANALYTICS -> CSV; - PROD_HASH -> hive [label="hive hash udf"]; - USER_HASH -> hive [label="hive hash udf"]; + pig -> CSV [label="hadoop jar bigpetstore.jar org.bigtop.bigpetstore.etl.PigCSVCleaner bps/generated/ bps/cleaned/ "]; + CSV -> MAHOUT_VIEW_INPUT [label="BPS_Mahout_Viewbuilder.pig"]; + PROD_And_USER_HASH_FUNC -> MAHOUT_VIEW_INPUT [label="used in BPS_MAHOUT_Viewbuilder.pig script"] ; - CSV -> hive ; - hive -> MAHOUT_VIEW_INPUT [label="hadoop jar bigpetstore.jar org.bigtop.bigpetstore.etl.HiveViewCreator bps/pig_out mahout_cf_in"]; - MAHOUT_VIEW_INPUT -> mahout_collab_filter_recomender -> MAHOUT_CF; - MAHOUT_CF -> crunch ; - CSV -> crunch ; - crunch -> CUSTOMER_PAGE [label="high performance joining"]; + MAHOUT_VIEW_INPUT -> mahout; + mahout -> MAHOUT_CF [label="hadoop jar bigpetstore.jar org.bigtop.bigpetstore.analytics.BPSRecommender bps/mahout_cf_in/part* bps/mahout_cf_out/"]; + CSV -> pig_job2; + MAHOUT_CF -> pig_job2 ; + PROD_And_USER_HASH_FUNC -> pig_job2; + pig_job2 -> CUSTOMER_PAGE [label="hadoop jar bigpetstore.jar org.bigtop.bigpetstore.analytics.BPSRecommender bpg/cleaned/ bps/mahout_cf_out/"]; } |