diff options
Diffstat (limited to 'bigtop-data-generators/bigtop-location-data')
5 files changed, 220 insertions, 283 deletions
diff --git a/bigtop-data-generators/bigtop-location-data/README.md b/bigtop-data-generators/bigtop-location-data/README.md index 9aff06f9..b61eeb03 100644 --- a/bigtop-data-generators/bigtop-location-data/README.md +++ b/bigtop-data-generators/bigtop-location-data/README.md @@ -23,28 +23,4 @@ data model. Building and Testing -------------------- -We use the Gradle build system for the BPS data generator so you'll need -to install Gradle on your system. -Once that's done, you can use gradle to run the included unit tests -and build the data generator jar. - -To build: - - $ gradle build - -This will create several directories and a jar located at: - - build/libs/bigtop-location-data-1.1.0-SNAPSHOT.jar - -Building automatically runs the included unit tests. If you would prefer -to just run the unit tests, you can do so by: - - $ gradle test - -To clean up the build files, run: - - $ gradle clean - -To install a jar into your local maven repository: - - $ gradle install +This project is part of a Gradle multi-project build -- please see the parent directory for build instructions. diff --git a/bigtop-data-generators/bigtop-location-data/build.gradle b/bigtop-data-generators/bigtop-location-data/build.gradle index 9eb91e39..bbced98f 100644 --- a/bigtop-data-generators/bigtop-location-data/build.gradle +++ b/bigtop-data-generators/bigtop-location-data/build.gradle @@ -13,51 +13,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -apply plugin: 'eclipse' -apply plugin: 'groovy' -apply plugin: 'java' -apply plugin: 'maven' -group = 'org.apache.bigtop' -version = '1.1.0-SNAPSHOT' jar { - - from { - configurations.runtime.collect { - it.isDirectory() ? it : zipTree(it) - } - } - - manifest { - attributes 'Title': 'BigTop Samplers', 'Version': version + from { + configurations.runtime.collect { + it.isDirectory() ? it : zipTree(it) } -} - -repositories { - mavenLocal() - mavenCentral() -} - -test { - // show standard out and error on console - testLogging.showStandardStreams = true - - // listen to events in the test execution lifecycle - beforeTest { descriptor -> - logger.lifecycle("Running test: " + descriptor) - } - - // listen to standard out and standard error of the test JVM(s) - onOutput { descriptor, event -> - logger.lifecycle("Test: " + descriptor + " produced standard out/err: " + event.message ) - } + } + manifest { + attributes 'Title': 'BigTop Samplers', 'Version': version + } } dependencies { - compile 'com.google.guava:guava:18.0' - - compile 'org.apache.commons:commons-lang3:3.4' + compile 'com.google.guava:guava:18.0' + compile 'org.apache.commons:commons-lang3:3.4' - testCompile 'junit:junit:4.+' + testCompile 'junit:junit:4.+' } diff --git a/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/Location.java b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/Location.java index 62afc0a8..58f6cdf0 100644 --- a/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/Location.java +++ b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/Location.java @@ -19,72 +19,72 @@ import java.io.Serializable; import org.apache.commons.lang3.tuple.Pair; -public class Location implements Serializable -{ - private static final long serialVersionUID = 1769986686070108470L; - - final String zipcode; - final Pair<Double, Double> coordinates; - final String city; - final String state; - final double medianHouseholdIncome; - final long population; - - public Location(String zipcode, Pair<Double, Double> coordinates, - String city, String state, double medianHouseholdIncome, long population) - { - this.city = city; - this.state = state; - this.zipcode = zipcode; - this.coordinates = coordinates; - this.medianHouseholdIncome = medianHouseholdIncome; - this.population = population; - } - - public String getZipcode() - { - return zipcode; - } - - public Pair<Double, Double> getCoordinates() - { - return coordinates; - } - - public double getMedianHouseholdIncome() - { - return medianHouseholdIncome; - } - - public long getPopulation() - { - return population; - } - - public double distance(Location other) - { - if(other.getZipcode().equals(zipcode)) - return 0.0; - - Pair<Double, Double> otherCoords = other.getCoordinates(); - - double dist = Math.sin(Math.toRadians(coordinates.getLeft())) * - Math.sin(Math.toRadians(otherCoords.getLeft())) + - Math.cos(Math.toRadians(coordinates.getLeft())) * - Math.cos(Math.toRadians(otherCoords.getLeft())) * - Math.cos(Math.toRadians(coordinates.getRight() - otherCoords.getRight())); - dist = Math.toDegrees(Math.acos(dist)) * 69.09; - - return dist; - } - - public String getCity() - { - return city; - } - - public String getState() - { - return state; - } +public class Location implements Serializable { + private static final long serialVersionUID = 1769986686070108470L; + + final String zipcode; + final Pair<Double, Double> coordinates; + final String city; + final String state; + final double medianHouseholdIncome; + final long population; + + public Location(String zipcode, Pair<Double, Double> coordinates, String city, + String state, double medianHouseholdIncome, long population) { + this.city = city; + this.state = state; + this.zipcode = zipcode; + this.coordinates = coordinates; + this.medianHouseholdIncome = medianHouseholdIncome; + this.population = population; + } + + public String getZipcode() { + return zipcode; + } + + public Pair<Double, Double> getCoordinates() { + return coordinates; + } + + public double getMedianHouseholdIncome() { + return medianHouseholdIncome; + } + + public long getPopulation() { + return population; + } + + public double distance(Pair<Double, Double> otherCoords) { + if (Math.abs(coordinates.getLeft() - otherCoords.getLeft()) < 1e-5 + || Math.abs(coordinates.getRight() - otherCoords.getRight()) < 1e-5) + return 0.0; + + double dist = Math.sin(Math.toRadians(coordinates.getLeft())) + * Math.sin(Math.toRadians(otherCoords.getLeft())) + + Math.cos(Math.toRadians(coordinates.getLeft())) + * Math.cos(Math.toRadians(otherCoords.getLeft())) + * Math.cos(Math.toRadians( + coordinates.getRight() - otherCoords.getRight())); + dist = Math.toDegrees(Math.acos(dist)) * 69.09; + + return dist; + } + + public double distance(Location other) { + if (other.getZipcode().equals(zipcode)) + return 0.0; + + Pair<Double, Double> otherCoords = other.getCoordinates(); + + return distance(otherCoords); + } + + public String getCity() { + return city; + } + + public String getState() { + return state; + } } diff --git a/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationConstants.java b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationConstants.java index 4140bfd4..e6ea242e 100644 --- a/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationConstants.java +++ b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationConstants.java @@ -17,9 +17,9 @@ package org.apache.bigtop.datagenerators.locations; import java.io.File; -public class LocationConstants -{ - public static final File COORDINATES_FILE = new File("zips.csv"); - public static final File INCOMES_FILE = new File("ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv"); - public static final File POPULATION_FILE = new File("population_data.csv"); +public class LocationConstants { + public static final File COORDINATES_FILE = new File("zips.csv"); + public static final File INCOMES_FILE = new File( + "ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv"); + public static final File POPULATION_FILE = new File("population_data.csv"); } diff --git a/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationReader.java b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationReader.java index 5be4d59d..6d5b29ec 100644 --- a/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationReader.java +++ b/bigtop-data-generators/bigtop-location-data/src/main/java/org/apache/bigtop/datagenerators/locations/LocationReader.java @@ -32,151 +32,141 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; -public class LocationReader -{ - private static class ZipcodeLocationRecord - { - public final Pair<Double, Double> coordinates; - public final String state; - public final String city; - - public ZipcodeLocationRecord(Pair<Double, Double> coordinates, - String city, String state) - { - this.coordinates = coordinates; - this.city = city; - this.state = state; - } - } - - private InputStream getResource(File filename) - { - InputStream stream = getClass().getResourceAsStream("/input_data/" + filename); - return new BufferedInputStream(stream); - } - - private ImmutableMap<String, Double> readIncomeData(InputStream path) throws FileNotFoundException - { - Scanner scanner = new Scanner(path); - - // skip headers - scanner.nextLine(); - scanner.nextLine(); - - Map<String, Double> entries = Maps.newHashMap(); - while(scanner.hasNextLine()) - { - String line = scanner.nextLine().trim(); - String[] cols = line.split(","); - // zipcodes are in the form "ZCTA5 XXXXX" - String zipcode = cols[2].split(" ")[1].trim(); - try - { - double medianHouseholdIncome = Integer.parseInt(cols[5].trim()); - entries.put(zipcode, medianHouseholdIncome); - } - catch(NumberFormatException e) - { - - } - } - - scanner.close(); - - return ImmutableMap.copyOf(entries); - } - - private ImmutableMap<String, Long> readPopulationData(InputStream path) throws FileNotFoundException - { - Scanner scanner = new Scanner(path); - - // skip header - scanner.nextLine(); - - Map<String, Long> entries = Maps.newHashMap(); - while(scanner.hasNextLine()) - { - String line = scanner.nextLine().trim(); - - if(line.length() == 0) - continue; - - String[] cols = line.split(","); - - String zipcode = cols[0].trim(); - Long population = Long.parseLong(cols[1].trim()); - - if(entries.containsKey(zipcode)) - { - entries.put(zipcode, Math.max(entries.get(zipcode), population)); - } - else - { - entries.put(zipcode, population); - } - } - - scanner.close(); - - return ImmutableMap.copyOf(entries); - } - - private ImmutableMap<String, ZipcodeLocationRecord> readCoordinates(InputStream path) throws FileNotFoundException - { - Scanner scanner = new Scanner(path); - - // skip header - scanner.nextLine(); - - Map<String, ZipcodeLocationRecord> entries = Maps.newHashMap(); - while(scanner.hasNextLine()) - { - String line = scanner.nextLine().trim(); - - String[] cols = line.split(", "); - - // remove quote marks - String zipcode = cols[0].substring(1, cols[0].length() - 1); - String state = cols[1].substring(1, cols[1].length() - 1); - Double latitude = Double.parseDouble(cols[2].substring(1, cols[2].length() - 1)); - Double longitude = Double.parseDouble(cols[3].substring(1, cols[3].length() - 1)); - String city = cols[4].substring(1, cols[4].length() - 1); - - Pair<Double, Double> coords = Pair.of(latitude, longitude); - - ZipcodeLocationRecord record = new ZipcodeLocationRecord(coords, city, state); - - entries.put(zipcode, record); - } - - scanner.close(); - - return ImmutableMap.copyOf(entries); - } - - public ImmutableList<Location> readData() throws FileNotFoundException - { - - ImmutableMap<String, Double> incomes = readIncomeData(getResource(LocationConstants.INCOMES_FILE)); - ImmutableMap<String, Long> populations = readPopulationData(getResource(LocationConstants.POPULATION_FILE)); - ImmutableMap<String, ZipcodeLocationRecord> coordinates = readCoordinates(getResource(LocationConstants.COORDINATES_FILE)); - - Set<String> zipcodeSubset = new HashSet<String>(incomes.keySet()); - zipcodeSubset.retainAll(populations.keySet()); - zipcodeSubset.retainAll(coordinates.keySet()); - - List<Location> table = new Vector<Location>(); - for(String zipcode : zipcodeSubset) - { - Location record = new Location(zipcode, - coordinates.get(zipcode).coordinates, - coordinates.get(zipcode).city, - coordinates.get(zipcode).state, - incomes.get(zipcode), - populations.get(zipcode)); - table.add(record); - } +public class LocationReader { + private static class ZipcodeLocationRecord { + public final Pair<Double, Double> coordinates; + public final String state; + public final String city; + + public ZipcodeLocationRecord(Pair<Double, Double> coordinates, String city, + String state) { + this.coordinates = coordinates; + this.city = city; + this.state = state; + } + } - return ImmutableList.copyOf(table); - } + private InputStream getResource(File filename) { + InputStream stream = getClass() + .getResourceAsStream("/input_data/" + filename); + return new BufferedInputStream(stream); + } + + private ImmutableMap<String, Double> readIncomeData(InputStream path) + throws FileNotFoundException { + Scanner scanner = new Scanner(path); + + // skip headers + scanner.nextLine(); + scanner.nextLine(); + + Map<String, Double> entries = Maps.newHashMap(); + while (scanner.hasNextLine()) { + String line = scanner.nextLine().trim(); + String[] cols = line.split(","); + // zipcodes are in the form "ZCTA5 XXXXX" + String zipcode = cols[2].split(" ")[1].trim(); + try { + double medianHouseholdIncome = Integer.parseInt(cols[5].trim()); + entries.put(zipcode, medianHouseholdIncome); + } catch (NumberFormatException e) { + + } + } + + scanner.close(); + + return ImmutableMap.copyOf(entries); + } + + private ImmutableMap<String, Long> readPopulationData(InputStream path) + throws FileNotFoundException { + Scanner scanner = new Scanner(path); + + // skip header + scanner.nextLine(); + + Map<String, Long> entries = Maps.newHashMap(); + while (scanner.hasNextLine()) { + String line = scanner.nextLine().trim(); + + if (line.length() == 0) + continue; + + String[] cols = line.split(","); + + String zipcode = cols[0].trim(); + Long population = Long.parseLong(cols[1].trim()); + + if (entries.containsKey(zipcode)) { + entries.put(zipcode, Math.max(entries.get(zipcode), population)); + } else { + entries.put(zipcode, population); + } + } + + scanner.close(); + + return ImmutableMap.copyOf(entries); + } + + private ImmutableMap<String, ZipcodeLocationRecord> readCoordinates( + InputStream path) throws FileNotFoundException { + Scanner scanner = new Scanner(path); + + // skip header + scanner.nextLine(); + + Map<String, ZipcodeLocationRecord> entries = Maps.newHashMap(); + while (scanner.hasNextLine()) { + String line = scanner.nextLine().trim(); + + String[] cols = line.split(", "); + + // remove quote marks + String zipcode = cols[0].substring(1, cols[0].length() - 1); + String state = cols[1].substring(1, cols[1].length() - 1); + Double latitude = Double + .parseDouble(cols[2].substring(1, cols[2].length() - 1)); + Double longitude = Double + .parseDouble(cols[3].substring(1, cols[3].length() - 1)); + String city = cols[4].substring(1, cols[4].length() - 1); + + Pair<Double, Double> coords = Pair.of(latitude, longitude); + + ZipcodeLocationRecord record = new ZipcodeLocationRecord(coords, city, + state); + + entries.put(zipcode, record); + } + + scanner.close(); + + return ImmutableMap.copyOf(entries); + } + + public ImmutableList<Location> readData() throws FileNotFoundException { + + ImmutableMap<String, Double> incomes = readIncomeData( + getResource(LocationConstants.INCOMES_FILE)); + ImmutableMap<String, Long> populations = readPopulationData( + getResource(LocationConstants.POPULATION_FILE)); + ImmutableMap<String, ZipcodeLocationRecord> coordinates = readCoordinates( + getResource(LocationConstants.COORDINATES_FILE)); + + Set<String> zipcodeSubset = new HashSet<String>(incomes.keySet()); + zipcodeSubset.retainAll(populations.keySet()); + zipcodeSubset.retainAll(coordinates.keySet()); + + List<Location> table = new Vector<Location>(); + for (String zipcode : zipcodeSubset) { + Location record = new Location(zipcode, + coordinates.get(zipcode).coordinates, + coordinates.get(zipcode).city, coordinates.get(zipcode).state, + incomes.get(zipcode), populations.get(zipcode)); + table.add(record); + } + + return ImmutableList.copyOf(table); + } } |