aboutsummaryrefslogtreecommitdiff
path: root/bigtop-bigpetstore
diff options
context:
space:
mode:
authorJay Vyas <bigpetstore@Jays-MacBook-Air.local>2014-05-31 17:51:43 -0400
committerJay Vyas <jay@apache.org>2014-06-03 16:53:44 -0700
commitab12e7b800cddef088049cf5652673908356896e (patch)
treead5a99b95b94385daa141ce2eb5e94b810558b78 /bigtop-bigpetstore
parent71b61d899d42b04e17966528026ca287143c9a7b (diff)
BIGTOP-1327: Update archtecture to reflect the new pig,mahout architecture
Signed-off-by: Jay Vyas <jay@apache.org>
Diffstat (limited to 'bigtop-bigpetstore')
-rw-r--r--bigtop-bigpetstore/arch.dot24
1 files changed, 11 insertions, 13 deletions
diff --git a/bigtop-bigpetstore/arch.dot b/bigtop-bigpetstore/arch.dot
index 4eb8ac4d..0f3f4041 100644
--- a/bigtop-bigpetstore/arch.dot
+++ b/bigtop-bigpetstore/arch.dot
@@ -17,28 +17,26 @@ digraph bigpetstore {
node [shape=record];
- PIG_ANALYTICS [label="PIG_ANALYTICS|Unstructured-unsupported-pigscripts| pig_ad_hoc(0-n)"];
+ PROD_And_USER_HASH_FUNC [label="python or datafu udf" ,style="rounded,filled", shape=diamond];
CUSTOMER_PAGE [label="CUSTOMER_PAGE|json|CUSTOMER_PAGE/part*"];
DIRTY_CSV [label="DIRTY_CSV|fname lname -prod , price ,prod,..|generated/part*"];
CSV [label="CSV|fname,lname,prod,price,date,xcoord,ycoord,...|cleaned/part*"];
MAHOUT_VIEW_INPUT [label="MAHOUT_VIEW | (hashed name) 10001, (hashed purchases) 203 | <hive_warehouse>/mahout_cf_in/part*" ];
- MAHOUT_CF [label="MAHOUT_CF | (hashed name) 10001, (hashed product) 201, .6 | mahout_cf_out/part*" ];
+ MAHOUT_CF [label="MAHOUT collaborative filter output | (hashed name) 10001, (hashed product) 201, .6 | mahout_cf_out/part*" ];
Generate -> DIRTY_CSV [label="hadoop jar bigpetstore.jar org.bigtop.bigpetstore.generator.BPSGenerator 100 bps/generated/"] ;
DIRTY_CSV -> pig [label=""];
- pig -> CSV [label="hadoop jar bigpetstore.jar org.bigtop.bigpetstore.etl.PigCSVCleaner bps/generated/ bps/cleaned/"];
- pig -> PIG_ANALYTICS [label="same as CSV job, but add your scripts to end... p1.pig p2.pig ..."];
- PIG_ANALYTICS -> CSV;
- PROD_HASH -> hive [label="hive hash udf"];
- USER_HASH -> hive [label="hive hash udf"];
+ pig -> CSV [label="hadoop jar bigpetstore.jar org.bigtop.bigpetstore.etl.PigCSVCleaner bps/generated/ bps/cleaned/ "];
+ CSV -> MAHOUT_VIEW_INPUT [label="BPS_Mahout_Viewbuilder.pig"];
+ PROD_And_USER_HASH_FUNC -> MAHOUT_VIEW_INPUT [label="used in BPS_MAHOUT_Viewbuilder.pig script"] ;
- CSV -> hive ;
- hive -> MAHOUT_VIEW_INPUT [label="hadoop jar bigpetstore.jar org.bigtop.bigpetstore.etl.HiveViewCreator bps/pig_out mahout_cf_in"];
- MAHOUT_VIEW_INPUT -> mahout_collab_filter_recomender -> MAHOUT_CF;
- MAHOUT_CF -> crunch ;
- CSV -> crunch ;
- crunch -> CUSTOMER_PAGE [label="high performance joining"];
+ MAHOUT_VIEW_INPUT -> mahout;
+ mahout -> MAHOUT_CF [label="hadoop jar bigpetstore.jar org.bigtop.bigpetstore.analytics.BPSRecommender bps/mahout_cf_in/part* bps/mahout_cf_out/"];
+ CSV -> pig_job2;
+ MAHOUT_CF -> pig_job2 ;
+ PROD_And_USER_HASH_FUNC -> pig_job2;
+ pig_job2 -> CUSTOMER_PAGE [label="hadoop jar bigpetstore.jar org.bigtop.bigpetstore.analytics.BPSRecommender bpg/cleaned/ bps/mahout_cf_out/"];
}