aboutsummaryrefslogtreecommitdiff
path: root/bigtop-data-generators/bigtop-location-data
diff options
context:
space:
mode:
Diffstat (limited to 'bigtop-data-generators/bigtop-location-data')
-rw-r--r--bigtop-data-generators/bigtop-location-data/README.md26
-rw-r--r--bigtop-data-generators/bigtop-location-data/build.gradle49
-rw-r--r--bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/Location.java136
-rw-r--r--bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationConstants.java10
-rw-r--r--bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationReader.java282
5 files changed, 220 insertions, 283 deletions
diff --git a/bigtop-data-generators/bigtop-location-data/README.md b/bigtop-data-generators/bigtop-location-data/README.md
index 9aff06f9..b61eeb03 100644
--- a/bigtop-data-generators/bigtop-location-data/README.md
+++ b/bigtop-data-generators/bigtop-location-data/README.md
@@ -23,28 +23,4 @@ data model.
Building and Testing
--------------------
-We use the Gradle build system for the BPS data generator so you'll need
-to install Gradle on your system.
-Once that's done, you can use gradle to run the included unit tests
-and build the data generator jar.
-
-To build:
-
- $ gradle build
-
-This will create several directories and a jar located at:
-
- build/libs/bigtop-location-data-1.1.0-SNAPSHOT.jar
-
-Building automatically runs the included unit tests. If you would prefer
-to just run the unit tests, you can do so by:
-
- $ gradle test
-
-To clean up the build files, run:
-
- $ gradle clean
-
-To install a jar into your local maven repository:
-
- $ gradle install
+This project is part of a Gradle multi-project build -- please see the parent directory for build instructions.
diff --git a/bigtop-data-generators/bigtop-location-data/build.gradle b/bigtop-data-generators/bigtop-location-data/build.gradle
index 9eb91e39..bbced98f 100644
--- a/bigtop-data-generators/bigtop-location-data/build.gradle
+++ b/bigtop-data-generators/bigtop-location-data/build.gradle
@@ -13,51 +13,22 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-apply plugin: 'eclipse'
-apply plugin: 'groovy'
-apply plugin: 'java'
-apply plugin: 'maven'
-group = 'org.apache.bigtop'
-version = '1.1.0-SNAPSHOT'
jar {
-
- from {
- configurations.runtime.collect {
- it.isDirectory() ? it : zipTree(it)
- }
- }
-
- manifest {
- attributes 'Title': 'BigTop Samplers', 'Version': version
+ from {
+ configurations.runtime.collect {
+ it.isDirectory() ? it : zipTree(it)
}
-}
-
-repositories {
- mavenLocal()
- mavenCentral()
-}
-
-test {
- // show standard out and error on console
- testLogging.showStandardStreams = true
-
- // listen to events in the test execution lifecycle
- beforeTest { descriptor ->
- logger.lifecycle("Running test: " + descriptor)
- }
-
- // listen to standard out and standard error of the test JVM(s)
- onOutput { descriptor, event ->
- logger.lifecycle("Test: " + descriptor + " produced standard out/err: " + event.message )
- }
+ }
+ manifest {
+ attributes 'Title': 'BigTop Samplers', 'Version': version
+ }
}
dependencies {
- compile 'com.google.guava:guava:18.0'
-
- compile 'org.apache.commons:commons-lang3:3.4'
+ compile 'com.google.guava:guava:18.0'
+ compile 'org.apache.commons:commons-lang3:3.4'
- testCompile 'junit:junit:4.+'
+ testCompile 'junit:junit:4.+'
}
diff --git a/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/Location.java b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/Location.java
index 62afc0a8..58f6cdf0 100644
--- a/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/Location.java
+++ b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/Location.java
@@ -19,72 +19,72 @@ import java.io.Serializable;
import org.apache.commons.lang3.tuple.Pair;
-public class Location implements Serializable
-{
- private static final long serialVersionUID = 1769986686070108470L;
-
- final String zipcode;
- final Pair<Double, Double> coordinates;
- final String city;
- final String state;
- final double medianHouseholdIncome;
- final long population;
-
- public Location(String zipcode, Pair<Double, Double> coordinates,
- String city, String state, double medianHouseholdIncome, long population)
- {
- this.city = city;
- this.state = state;
- this.zipcode = zipcode;
- this.coordinates = coordinates;
- this.medianHouseholdIncome = medianHouseholdIncome;
- this.population = population;
- }
-
- public String getZipcode()
- {
- return zipcode;
- }
-
- public Pair<Double, Double> getCoordinates()
- {
- return coordinates;
- }
-
- public double getMedianHouseholdIncome()
- {
- return medianHouseholdIncome;
- }
-
- public long getPopulation()
- {
- return population;
- }
-
- public double distance(Location other)
- {
- if(other.getZipcode().equals(zipcode))
- return 0.0;
-
- Pair<Double, Double> otherCoords = other.getCoordinates();
-
- double dist = Math.sin(Math.toRadians(coordinates.getLeft())) *
- Math.sin(Math.toRadians(otherCoords.getLeft())) +
- Math.cos(Math.toRadians(coordinates.getLeft())) *
- Math.cos(Math.toRadians(otherCoords.getLeft())) *
- Math.cos(Math.toRadians(coordinates.getRight() - otherCoords.getRight()));
- dist = Math.toDegrees(Math.acos(dist)) * 69.09;
-
- return dist;
- }
-
- public String getCity()
- {
- return city;
- }
-
- public String getState()
- {
- return state;
- }
+public class Location implements Serializable {
+ private static final long serialVersionUID = 1769986686070108470L;
+
+ final String zipcode;
+ final Pair<Double, Double> coordinates;
+ final String city;
+ final String state;
+ final double medianHouseholdIncome;
+ final long population;
+
+ public Location(String zipcode, Pair<Double, Double> coordinates, String city,
+ String state, double medianHouseholdIncome, long population) {
+ this.city = city;
+ this.state = state;
+ this.zipcode = zipcode;
+ this.coordinates = coordinates;
+ this.medianHouseholdIncome = medianHouseholdIncome;
+ this.population = population;
+ }
+
+ public String getZipcode() {
+ return zipcode;
+ }
+
+ public Pair<Double, Double> getCoordinates() {
+ return coordinates;
+ }
+
+ public double getMedianHouseholdIncome() {
+ return medianHouseholdIncome;
+ }
+
+ public long getPopulation() {
+ return population;
+ }
+
+ public double distance(Pair<Double, Double> otherCoords) {
+ if (Math.abs(coordinates.getLeft() - otherCoords.getLeft()) < 1e-5
+ || Math.abs(coordinates.getRight() - otherCoords.getRight()) < 1e-5)
+ return 0.0;
+
+ double dist = Math.sin(Math.toRadians(coordinates.getLeft()))
+ * Math.sin(Math.toRadians(otherCoords.getLeft()))
+ + Math.cos(Math.toRadians(coordinates.getLeft()))
+ * Math.cos(Math.toRadians(otherCoords.getLeft()))
+ * Math.cos(Math.toRadians(
+ coordinates.getRight() - otherCoords.getRight()));
+ dist = Math.toDegrees(Math.acos(dist)) * 69.09;
+
+ return dist;
+ }
+
+ public double distance(Location other) {
+ if (other.getZipcode().equals(zipcode))
+ return 0.0;
+
+ Pair<Double, Double> otherCoords = other.getCoordinates();
+
+ return distance(otherCoords);
+ }
+
+ public String getCity() {
+ return city;
+ }
+
+ public String getState() {
+ return state;
+ }
}
diff --git a/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationConstants.java b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationConstants.java
index 4140bfd4..e6ea242e 100644
--- a/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationConstants.java
+++ b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationConstants.java
@@ -17,9 +17,9 @@ package org.apache.bigtop.datagenerators.locations;
import java.io.File;
-public class LocationConstants
-{
- public static final File COORDINATES_FILE = new File("zips.csv");
- public static final File INCOMES_FILE = new File("ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv");
- public static final File POPULATION_FILE = new File("population_data.csv");
+public class LocationConstants {
+ public static final File COORDINATES_FILE = new File("zips.csv");
+ public static final File INCOMES_FILE = new File(
+ "ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv");
+ public static final File POPULATION_FILE = new File("population_data.csv");
}
diff --git a/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationReader.java b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationReader.java
index 5be4d59d..6d5b29ec 100644
--- a/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationReader.java
+++ b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationReader.java
@@ -32,151 +32,141 @@ import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
-public class LocationReader
-{
- private static class ZipcodeLocationRecord
- {
- public final Pair<Double, Double> coordinates;
- public final String state;
- public final String city;
-
- public ZipcodeLocationRecord(Pair<Double, Double> coordinates,
- String city, String state)
- {
- this.coordinates = coordinates;
- this.city = city;
- this.state = state;
- }
- }
-
- private InputStream getResource(File filename)
- {
- InputStream stream = getClass().getResourceAsStream("/input_data/" + filename);
- return new BufferedInputStream(stream);
- }
-
- private ImmutableMap<String, Double> readIncomeData(InputStream path) throws FileNotFoundException
- {
- Scanner scanner = new Scanner(path);
-
- // skip headers
- scanner.nextLine();
- scanner.nextLine();
-
- Map<String, Double> entries = Maps.newHashMap();
- while(scanner.hasNextLine())
- {
- String line = scanner.nextLine().trim();
- String[] cols = line.split(",");
- // zipcodes are in the form "ZCTA5 XXXXX"
- String zipcode = cols[2].split(" ")[1].trim();
- try
- {
- double medianHouseholdIncome = Integer.parseInt(cols[5].trim());
- entries.put(zipcode, medianHouseholdIncome);
- }
- catch(NumberFormatException e)
- {
-
- }
- }
-
- scanner.close();
-
- return ImmutableMap.copyOf(entries);
- }
-
- private ImmutableMap<String, Long> readPopulationData(InputStream path) throws FileNotFoundException
- {
- Scanner scanner = new Scanner(path);
-
- // skip header
- scanner.nextLine();
-
- Map<String, Long> entries = Maps.newHashMap();
- while(scanner.hasNextLine())
- {
- String line = scanner.nextLine().trim();
-
- if(line.length() == 0)
- continue;
-
- String[] cols = line.split(",");
-
- String zipcode = cols[0].trim();
- Long population = Long.parseLong(cols[1].trim());
-
- if(entries.containsKey(zipcode))
- {
- entries.put(zipcode, Math.max(entries.get(zipcode), population));
- }
- else
- {
- entries.put(zipcode, population);
- }
- }
-
- scanner.close();
-
- return ImmutableMap.copyOf(entries);
- }
-
- private ImmutableMap<String, ZipcodeLocationRecord> readCoordinates(InputStream path) throws FileNotFoundException
- {
- Scanner scanner = new Scanner(path);
-
- // skip header
- scanner.nextLine();
-
- Map<String, ZipcodeLocationRecord> entries = Maps.newHashMap();
- while(scanner.hasNextLine())
- {
- String line = scanner.nextLine().trim();
-
- String[] cols = line.split(", ");
-
- // remove quote marks
- String zipcode = cols[0].substring(1, cols[0].length() - 1);
- String state = cols[1].substring(1, cols[1].length() - 1);
- Double latitude = Double.parseDouble(cols[2].substring(1, cols[2].length() - 1));
- Double longitude = Double.parseDouble(cols[3].substring(1, cols[3].length() - 1));
- String city = cols[4].substring(1, cols[4].length() - 1);
-
- Pair<Double, Double> coords = Pair.of(latitude, longitude);
-
- ZipcodeLocationRecord record = new ZipcodeLocationRecord(coords, city, state);
-
- entries.put(zipcode, record);
- }
-
- scanner.close();
-
- return ImmutableMap.copyOf(entries);
- }
-
- public ImmutableList<Location> readData() throws FileNotFoundException
- {
-
- ImmutableMap<String, Double> incomes = readIncomeData(getResource(LocationConstants.INCOMES_FILE));
- ImmutableMap<String, Long> populations = readPopulationData(getResource(LocationConstants.POPULATION_FILE));
- ImmutableMap<String, ZipcodeLocationRecord> coordinates = readCoordinates(getResource(LocationConstants.COORDINATES_FILE));
-
- Set<String> zipcodeSubset = new HashSet<String>(incomes.keySet());
- zipcodeSubset.retainAll(populations.keySet());
- zipcodeSubset.retainAll(coordinates.keySet());
-
- List<Location> table = new Vector<Location>();
- for(String zipcode : zipcodeSubset)
- {
- Location record = new Location(zipcode,
- coordinates.get(zipcode).coordinates,
- coordinates.get(zipcode).city,
- coordinates.get(zipcode).state,
- incomes.get(zipcode),
- populations.get(zipcode));
- table.add(record);
- }
+public class LocationReader {
+ private static class ZipcodeLocationRecord {
+ public final Pair<Double, Double> coordinates;
+ public final String state;
+ public final String city;
+
+ public ZipcodeLocationRecord(Pair<Double, Double> coordinates, String city,
+ String state) {
+ this.coordinates = coordinates;
+ this.city = city;
+ this.state = state;
+ }
+ }
- return ImmutableList.copyOf(table);
- }
+ private InputStream getResource(File filename) {
+ InputStream stream = getClass()
+ .getResourceAsStream("/input_data/" + filename);
+ return new BufferedInputStream(stream);
+ }
+
+ private ImmutableMap<String, Double> readIncomeData(InputStream path)
+ throws FileNotFoundException {
+ Scanner scanner = new Scanner(path);
+
+ // skip headers
+ scanner.nextLine();
+ scanner.nextLine();
+
+ Map<String, Double> entries = Maps.newHashMap();
+ while (scanner.hasNextLine()) {
+ String line = scanner.nextLine().trim();
+ String[] cols = line.split(",");
+ // zipcodes are in the form "ZCTA5 XXXXX"
+ String zipcode = cols[2].split(" ")[1].trim();
+ try {
+ double medianHouseholdIncome = Integer.parseInt(cols[5].trim());
+ entries.put(zipcode, medianHouseholdIncome);
+ } catch (NumberFormatException e) {
+
+ }
+ }
+
+ scanner.close();
+
+ return ImmutableMap.copyOf(entries);
+ }
+
+ private ImmutableMap<String, Long> readPopulationData(InputStream path)
+ throws FileNotFoundException {
+ Scanner scanner = new Scanner(path);
+
+ // skip header
+ scanner.nextLine();
+
+ Map<String, Long> entries = Maps.newHashMap();
+ while (scanner.hasNextLine()) {
+ String line = scanner.nextLine().trim();
+
+ if (line.length() == 0)
+ continue;
+
+ String[] cols = line.split(",");
+
+ String zipcode = cols[0].trim();
+ Long population = Long.parseLong(cols[1].trim());
+
+ if (entries.containsKey(zipcode)) {
+ entries.put(zipcode, Math.max(entries.get(zipcode), population));
+ } else {
+ entries.put(zipcode, population);
+ }
+ }
+
+ scanner.close();
+
+ return ImmutableMap.copyOf(entries);
+ }
+
+ private ImmutableMap<String, ZipcodeLocationRecord> readCoordinates(
+ InputStream path) throws FileNotFoundException {
+ Scanner scanner = new Scanner(path);
+
+ // skip header
+ scanner.nextLine();
+
+ Map<String, ZipcodeLocationRecord> entries = Maps.newHashMap();
+ while (scanner.hasNextLine()) {
+ String line = scanner.nextLine().trim();
+
+ String[] cols = line.split(", ");
+
+ // remove quote marks
+ String zipcode = cols[0].substring(1, cols[0].length() - 1);
+ String state = cols[1].substring(1, cols[1].length() - 1);
+ Double latitude = Double
+ .parseDouble(cols[2].substring(1, cols[2].length() - 1));
+ Double longitude = Double
+ .parseDouble(cols[3].substring(1, cols[3].length() - 1));
+ String city = cols[4].substring(1, cols[4].length() - 1);
+
+ Pair<Double, Double> coords = Pair.of(latitude, longitude);
+
+ ZipcodeLocationRecord record = new ZipcodeLocationRecord(coords, city,
+ state);
+
+ entries.put(zipcode, record);
+ }
+
+ scanner.close();
+
+ return ImmutableMap.copyOf(entries);
+ }
+
+ public ImmutableList<Location> readData() throws FileNotFoundException {
+
+ ImmutableMap<String, Double> incomes = readIncomeData(
+ getResource(LocationConstants.INCOMES_FILE));
+ ImmutableMap<String, Long> populations = readPopulationData(
+ getResource(LocationConstants.POPULATION_FILE));
+ ImmutableMap<String, ZipcodeLocationRecord> coordinates = readCoordinates(
+ getResource(LocationConstants.COORDINATES_FILE));
+
+ Set<String> zipcodeSubset = new HashSet<String>(incomes.keySet());
+ zipcodeSubset.retainAll(populations.keySet());
+ zipcodeSubset.retainAll(coordinates.keySet());
+
+ List<Location> table = new Vector<Location>();
+ for (String zipcode : zipcodeSubset) {
+ Location record = new Location(zipcode,
+ coordinates.get(zipcode).coordinates,
+ coordinates.get(zipcode).city, coordinates.get(zipcode).state,
+ incomes.get(zipcode), populations.get(zipcode));
+ table.add(record);
+ }
+
+ return ImmutableList.copyOf(table);
+ }
}