aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRJ Nowling <rnowling@gmail.com>2015-08-26 08:53:19 -0500
committerRJ Nowling <rnowling@gmail.com>2015-08-26 08:53:19 -0500
commit74a66b1e8e0a3721f834bd9c36b826a68b494d34 (patch)
tree34efea732ef8b6103ccbc582229c1d9badcd1b97
parentcaae595712381706a4ed4e938c393ede3b1311b2 (diff)
BIGTOP-1986. Extract location dataset from BigPetStore data generator
-rw-r--r--bigtop-data-generators/bigtop-location-data/README.md50
-rw-r--r--bigtop-data-generators/bigtop-location-data/build.gradle63
-rw-r--r--bigtop-data-generators/bigtop-location-data/settings.gradle16
-rw-r--r--bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/Location.java (renamed from bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/ZipcodeRecord.java)8
-rw-r--r--bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationConstants.java25
-rw-r--r--bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationReader.java (renamed from bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datareaders/ZipcodeReader.java)41
-rwxr-xr-xbigtop-data-generators/bigtop-location-data/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903.txt (renamed from bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903.txt)0
-rwxr-xr-xbigtop-data-generators/bigtop-location-data/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_metadata.csv (renamed from bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_metadata.csv)0
-rwxr-xr-xbigtop-data-generators/bigtop-location-data/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv (renamed from bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv)0
-rw-r--r--bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/population_data.csv (renamed from bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/population_data.csv)0
-rw-r--r--bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/zips.csv (renamed from bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/zips.csv)0
11 files changed, 173 insertions, 30 deletions
diff --git a/bigtop-data-generators/bigtop-location-data/README.md b/bigtop-data-generators/bigtop-location-data/README.md
new file mode 100644
index 00000000..9aff06f9
--- /dev/null
+++ b/bigtop-data-generators/bigtop-location-data/README.md
@@ -0,0 +1,50 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+BigTop Location Data
+====================
+
+U.S. zipcode data including GPS coordinates, median household incomes,
+and population sizes from the U.S. Census along with a reader and
+data model.
+
+Building and Testing
+--------------------
+We use the Gradle build system for the BPS data generator so you'll need
+to install Gradle on your system.
+Once that's done, you can use gradle to run the included unit tests
+and build the data generator jar.
+
+To build:
+
+ $ gradle build
+
+This will create several directories and a jar located at:
+
+ build/libs/bigtop-location-data-1.1.0-SNAPSHOT.jar
+
+Building automatically runs the included unit tests. If you would prefer
+to just run the unit tests, you can do so by:
+
+ $ gradle test
+
+To clean up the build files, run:
+
+ $ gradle clean
+
+To install a jar into your local maven repository:
+
+ $ gradle install
diff --git a/bigtop-data-generators/bigtop-location-data/build.gradle b/bigtop-data-generators/bigtop-location-data/build.gradle
new file mode 100644
index 00000000..9eb91e39
--- /dev/null
+++ b/bigtop-data-generators/bigtop-location-data/build.gradle
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+apply plugin: 'eclipse'
+apply plugin: 'groovy'
+apply plugin: 'java'
+apply plugin: 'maven'
+group = 'org.apache.bigtop'
+version = '1.1.0-SNAPSHOT'
+
+jar {
+
+ from {
+ configurations.runtime.collect {
+ it.isDirectory() ? it : zipTree(it)
+ }
+ }
+
+ manifest {
+ attributes 'Title': 'BigTop Samplers', 'Version': version
+ }
+}
+
+repositories {
+ mavenLocal()
+ mavenCentral()
+}
+
+test {
+ // show standard out and error on console
+ testLogging.showStandardStreams = true
+
+ // listen to events in the test execution lifecycle
+ beforeTest { descriptor ->
+ logger.lifecycle("Running test: " + descriptor)
+ }
+
+ // listen to standard out and standard error of the test JVM(s)
+ onOutput { descriptor, event ->
+ logger.lifecycle("Test: " + descriptor + " produced standard out/err: " + event.message )
+ }
+
+}
+
+dependencies {
+ compile 'com.google.guava:guava:18.0'
+
+ compile 'org.apache.commons:commons-lang3:3.4'
+
+ testCompile 'junit:junit:4.+'
+}
diff --git a/bigtop-data-generators/bigtop-location-data/settings.gradle b/bigtop-data-generators/bigtop-location-data/settings.gradle
new file mode 100644
index 00000000..f66bfdbc
--- /dev/null
+++ b/bigtop-data-generators/bigtop-location-data/settings.gradle
@@ -0,0 +1,16 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+rootProject.name = "bigtop-location-data" \ No newline at end of file
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/ZipcodeRecord.java b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/Location.java
index e5eeb600..62afc0a8 100644
--- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/ZipcodeRecord.java
+++ b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/Location.java
@@ -13,13 +13,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs;
+package org.apache.bigtop.datagenerators.locations;
import java.io.Serializable;
import org.apache.commons.lang3.tuple.Pair;
-public class ZipcodeRecord implements Serializable
+public class Location implements Serializable
{
private static final long serialVersionUID = 1769986686070108470L;
@@ -30,7 +30,7 @@ public class ZipcodeRecord implements Serializable
final double medianHouseholdIncome;
final long population;
- public ZipcodeRecord(String zipcode, Pair<Double, Double> coordinates,
+ public Location(String zipcode, Pair<Double, Double> coordinates,
String city, String state, double medianHouseholdIncome, long population)
{
this.city = city;
@@ -61,7 +61,7 @@ public class ZipcodeRecord implements Serializable
return population;
}
- public double distance(ZipcodeRecord other)
+ public double distance(Location other)
{
if(other.getZipcode().equals(zipcode))
return 0.0;
diff --git a/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationConstants.java b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationConstants.java
new file mode 100644
index 00000000..4140bfd4
--- /dev/null
+++ b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationConstants.java
@@ -0,0 +1,25 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.datagenerators.locations;
+
+import java.io.File;
+
+public class LocationConstants
+{
+ public static final File COORDINATES_FILE = new File("zips.csv");
+ public static final File INCOMES_FILE = new File("ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv");
+ public static final File POPULATION_FILE = new File("population_data.csv");
+}
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datareaders/ZipcodeReader.java b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationReader.java
index 2478c8e1..5be4d59d 100644
--- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datareaders/ZipcodeReader.java
+++ b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationReader.java
@@ -13,8 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.bigtop.datagenerators.bigpetstore.datareaders;
+package org.apache.bigtop.datagenerators.locations;
+import java.io.BufferedInputStream;
+import java.io.File;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.util.HashSet;
@@ -24,14 +26,13 @@ import java.util.Scanner;
import java.util.Set;
import java.util.Vector;
-import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord;
import org.apache.commons.lang3.tuple.Pair;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
-public class ZipcodeReader
+public class LocationReader
{
private static class ZipcodeLocationRecord
{
@@ -47,24 +48,11 @@ public class ZipcodeReader
this.state = state;
}
}
-
- InputStream zipcodeIncomesFile = null;
- InputStream zipcodePopulationFile = null;
- InputStream zipcodeCoordinatesFile = null;
-
- public void setIncomesFile(InputStream path)
- {
- this.zipcodeIncomesFile = path;
- }
-
- public void setPopulationFile(InputStream path)
- {
- this.zipcodePopulationFile = path;
- }
-
- public void setCoordinatesFile(InputStream path)
+
+ private InputStream getResource(File filename)
{
- this.zipcodeCoordinatesFile = path;
+ InputStream stream = getClass().getResourceAsStream("/input_data/" + filename);
+ return new BufferedInputStream(stream);
}
private ImmutableMap<String, Double> readIncomeData(InputStream path) throws FileNotFoundException
@@ -166,20 +154,21 @@ public class ZipcodeReader
return ImmutableMap.copyOf(entries);
}
- public ImmutableList<ZipcodeRecord> readData() throws FileNotFoundException
+ public ImmutableList<Location> readData() throws FileNotFoundException
{
- ImmutableMap<String, Double> incomes = readIncomeData(this.zipcodeIncomesFile);
- ImmutableMap<String, Long> populations = readPopulationData(this.zipcodePopulationFile);
- ImmutableMap<String, ZipcodeLocationRecord> coordinates = readCoordinates(this.zipcodeCoordinatesFile);
+
+ ImmutableMap<String, Double> incomes = readIncomeData(getResource(LocationConstants.INCOMES_FILE));
+ ImmutableMap<String, Long> populations = readPopulationData(getResource(LocationConstants.POPULATION_FILE));
+ ImmutableMap<String, ZipcodeLocationRecord> coordinates = readCoordinates(getResource(LocationConstants.COORDINATES_FILE));
Set<String> zipcodeSubset = new HashSet<String>(incomes.keySet());
zipcodeSubset.retainAll(populations.keySet());
zipcodeSubset.retainAll(coordinates.keySet());
- List<ZipcodeRecord> table = new Vector<ZipcodeRecord>();
+ List<Location> table = new Vector<Location>();
for(String zipcode : zipcodeSubset)
{
- ZipcodeRecord record = new ZipcodeRecord(zipcode,
+ Location record = new Location(zipcode,
coordinates.get(zipcode).coordinates,
coordinates.get(zipcode).city,
coordinates.get(zipcode).state,
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903.txt b/bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903.txt
index 7127f905..7127f905 100755
--- a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903.txt
+++ b/bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903.txt
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_metadata.csv b/bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_metadata.csv
index 9f703a36..9f703a36 100755
--- a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_metadata.csv
+++ b/bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_metadata.csv
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv b/bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv
index b5a5e027..b5a5e027 100755
--- a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv
+++ b/bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/population_data.csv b/bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/population_data.csv
index 027c8396..027c8396 100644
--- a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/population_data.csv
+++ b/bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/population_data.csv
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/zips.csv b/bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/zips.csv
index 7e487a25..7e487a25 100644
--- a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/zips.csv
+++ b/bigtop-data-generators/bigtop-location-data/src/main/resources/input_data/zips.csv