| 1 |
15 |
art |
#!/usr/bin/env python
|
| 2 |
|
|
|
| 3 |
|
|
# forgottenislanderbot - makes a DSL coverage map from Bell Aliant's website.
|
| 4 |
|
|
# Copyright (c) 2010 Art Ortenburger
|
| 5 |
|
|
#
|
| 6 |
|
|
# This program is free software; you can redistribute it and/or
|
| 7 |
|
|
# modify it under the terms of the GNU General Public License
|
| 8 |
|
|
# as published by the Free Software Foundation; either version 2
|
| 9 |
|
|
# of the License, or any later version.
|
| 10 |
|
|
#
|
| 11 |
|
|
# This program is distributed in the hope that it will be useful,
|
| 12 |
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
|
|
# GNU General Public License for more details.
|
| 15 |
|
|
#
|
| 16 |
|
|
# You should have received a copy of the GNU General Public License
|
| 17 |
|
|
# along with this program; if not, write to the Free Software
|
| 18 |
|
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
| 19 |
|
|
#
|
| 20 |
|
|
|
| 21 |
|
|
# fib.py
|
| 22 |
|
|
# by Art Ortenburger IV <utrrrongeeb a users , sf , net>
|
| 23 |
|
|
# written 2010-01-25 -
|
| 24 |
|
|
#
|
| 25 |
|
|
# forgottenislanderbot -- unified interface
|
| 26 |
|
|
#
|
| 27 |
|
|
# Provides all forgottenislanderbot functions from
|
| 28 |
|
|
# one command-line interface, using standard-form
|
| 29 |
|
|
# command-line options with flexibility.
|
| 30 |
|
|
#
|
| 31 |
|
|
|
| 32 |
|
|
|
| 33 |
18 |
art |
import sys # for argument input and stream buffer flushing
|
| 34 |
|
|
import optparse # for command and option parsing
|
| 35 |
|
|
import sqlite3 # for all database operations
|
| 36 |
|
|
import zipfile # for KMZ output
|
| 37 |
|
|
import StringIO # for KML output
|
| 38 |
|
|
import traceback # maybe used once for debugging
|
| 39 |
|
|
import robotparser # Robot Exclusion Standard; user may override
|
| 40 |
|
|
import urllib #
|
| 41 |
|
|
import httplib # loading web page
|
| 42 |
|
|
import time # sleeping between requests
|
| 43 |
|
|
import random # randomizing sleeping, etc.
|
| 44 |
15 |
art |
|
| 45 |
18 |
art |
|
| 46 |
|
|
sd = None # Sqlite Database
|
| 47 |
|
|
sdc = None # Sqlite Database Connection
|
| 48 |
|
|
# names for each DSL status
|
| 49 |
|
|
dslnames = ["ERROR","EMPTY","NODSL","BASIC","ULTRA","TOTAL"]
|
| 50 |
|
|
# collected statistics stored here
|
| 51 |
|
|
statistics = [0, 0, 0, 0, 0, 0]
|
| 52 |
|
|
kmlbuffer = None # KML file to write
|
| 53 |
|
|
kmlIconHighlitScale = 1.1 # coefficient for highlit icon size in KML
|
| 54 |
|
|
GMIconURLs = ["http://maps.google.com/mapfiles/kml/pushpin/purple-pushpin.png",
|
| 55 |
|
|
"http://maps.google.com/mapfiles/kml/pushpin/wht-pushpin.png",
|
| 56 |
|
|
"http://maps.google.com/mapfiles/kml/pushpin/ylw-pushpin.png",
|
| 57 |
|
|
"http://maps.google.com/mapfiles/kml/pushpin/ltblu-pushpin.png",
|
| 58 |
|
|
"http://maps.google.com/mapfiles/kml/pushpin/grn-pushpin.png"]
|
| 59 |
|
|
GMIconHotspot = [ 20, 2 ]
|
| 60 |
|
|
# hostname of Bell Aliant server
|
| 61 |
|
|
ba_host = "productsandservice.bellaliant.net"
|
| 62 |
|
|
# webpage which so conveniently does a whole address check with one request. Such a shame it won't take the Range: bytes= header....
|
| 63 |
|
|
ba_handler = "/PS/pe/english/productsandservices/qualificationFullAddressCheck.do"
|
| 64 |
|
|
|
| 65 |
|
|
|
| 66 |
|
|
def main():
|
| 67 |
|
|
""" main UI function for forgottenislanderbot"""
|
| 68 |
|
|
|
| 69 |
|
|
print sys.argv[0],"""(forgottenislanderbot unified interface) Version 0.1
|
| 70 |
15 |
art |
Copyright (c) 2010 Art Ortenburger. Licensed under the GNU GPL; no warranty."""
|
| 71 |
18 |
art |
|
| 72 |
|
|
# ease argument parsing. The two parsers make laziness look elegent --
|
| 73 |
|
|
# you can run "fib.py --help" to get a command list,
|
| 74 |
|
|
# or "fib.py --all --help" to get an option list.
|
| 75 |
|
|
# usage strings must be set appropriately.
|
| 76 |
|
|
# later: add --version?
|
| 77 |
|
|
# command parser -- only parses one arg.
|
| 78 |
|
|
cmdParser = optparse.OptionParser(usage="")
|
| 79 |
|
|
# option parser -- parses anything else
|
| 80 |
|
|
optParser = optparse.OptionParser(usage="")
|
| 81 |
|
|
|
| 82 |
|
|
# commands. At the moment, only one command is allowed, but by using
|
| 83 |
|
|
# the "append" or "callback" actions I might make it multi-command later.
|
| 84 |
|
|
# later: single-char options?
|
| 85 |
|
|
cmdParser.add_option("--all", action="store_true", dest="auto", default=False,
|
| 86 |
|
|
help="do everything -- get civic addresses, run bot, and produce KMLs")
|
| 87 |
|
|
cmdParser.add_option("--newdb", action="store_true", dest="newdb", default=False,
|
| 88 |
|
|
help="download civic addresses and make a new, empty database")
|
| 89 |
|
|
# later: add commands to only download, only make/merge
|
| 90 |
|
|
cmdParser.add_option("--crawl", action="store_true", dest="crawl", default=False,
|
| 91 |
|
|
help="query the Bell Aliant server, finding DSL statuses")
|
| 92 |
|
|
cmdParser.add_option("--kml", action="store_true", dest="kml", default=False,
|
| 93 |
|
|
help="generate a KML overlay from a database")
|
| 94 |
|
|
cmdParser.add_option("--stats", action="store_true", dest="stats", default=False,
|
| 95 |
|
|
help="print the totals of each DSL status")
|
| 96 |
|
|
cmdParser.add_option("--list", action="store_true", dest="listdb", default=False,
|
| 97 |
|
|
help="print every selected address")
|
| 98 |
|
|
# later: add merge/update, diff, strip commands
|
| 99 |
|
|
|
| 100 |
|
|
# options. All options are...optional.
|
| 101 |
|
|
# later: single-char options?, better help
|
| 102 |
|
|
# Global options:
|
| 103 |
|
|
optParser.add_option("--db", action="store", dest="db", default="database.sqlite",
|
| 104 |
|
|
help="SQlite database file to work with. Default: database.sqlite", metavar="FILE")
|
| 105 |
|
|
# there are NOT going to be individual dslmasks, sparsities, maxrows, latlons,
|
| 106 |
|
|
# custom SQLs, endofroads, or nocities for --crawl, --stats, and --kml. Period.
|
| 107 |
|
|
# anyone needing such functionality will have to use a shell script and multiple
|
| 108 |
|
|
# invocations, or modify the code themselves.
|
| 109 |
|
|
optParser.add_option("--dslmask", action="store", dest="dslmask", type="string",
|
| 110 |
|
|
default=None, metavar="MASK",
|
| 111 |
|
|
help="DSL types to process; y/n for each of EMPTY, NODSL, BASIC, & ULTRA. Overrides defaults: crawl=ynnn, stats=yyyy, kml=nyyy")
|
| 112 |
|
|
optParser.add_option("--sparsity", action="store", dest="sparsity", type="int",
|
| 113 |
|
|
default=None, metavar="INT",
|
| 114 |
|
|
help="minimum difference in civic numbers along a road to process. Overrides defaults: crawl=1, stats=1, kml=40")
|
| 115 |
|
|
optParser.add_option("--nocities", action="store_true", dest="nocities", default=False,
|
| 116 |
|
|
help="don't process addresses in Charlottetown or Summerside")
|
| 117 |
|
|
optParser.add_option("--max", action="store_true", dest="maxrows", default=False,
|
| 118 |
|
|
help="process more addresses when using a >1 sparsity.")
|
| 119 |
|
|
optParser.add_option("--always-get", action="store", dest="alwaysget",
|
| 120 |
|
|
default="first", metavar="HOUSE",
|
| 121 |
|
|
help="ignoring sparsity, which address on the end of a road to always get. Values: 'none', 'first' (default), 'last', 'both'.")
|
| 122 |
|
|
optParser.add_option("--sql-where", action="store", dest="sqlwhere", type="string",
|
| 123 |
|
|
default=None, metavar="SQL", help="custom SQL SELECT conditions. Inserted directly after WHERE. Be careful!")
|
| 124 |
|
|
optParser.add_option("--coord-rect", action="store", dest="coordrect", type="string",
|
| 125 |
|
|
default=None, metavar="latA,longA,latB,longB", help="latitude/longitude coordinates limiting processing to addresses within the rectangle")
|
| 126 |
|
|
# newdb, merge options:
|
| 127 |
|
|
optParser.add_option("--tsv", action="append", dest="tsv", type="string",
|
| 128 |
|
|
default=None, metavar="TSV-FILE", help="TSV database of civic addresses to add to SQlite database. Can appear multiple times.")
|
| 129 |
|
|
# crawler-bot options:
|
| 130 |
|
|
optParser.add_option("--override-robot-exclusion-standard", action="store_true",
|
| 131 |
|
|
dest="ores", default=None, help="override a robots.txt, instead of asking")
|
| 132 |
|
|
optParser.add_option("--quiet-fail", action="store_false", dest="ores",
|
| 133 |
|
|
default=None, help="don't ask about a robots.txt -- just exit")
|
| 134 |
|
|
optParser.add_option("--delay", action="store", type="float", dest="delay",
|
| 135 |
|
|
default=1.0, metavar="SECONDS", help="time to sleep between single-threaded requests to Bell Aliant server. Can be float. Default: 1.0")
|
| 136 |
|
|
optParser.add_option("--no-connect", action="store_true", dest="noconnect",
|
| 137 |
|
|
default=False,help="Don't reconnect HTTP connection between requests. Experimental, will probably fail.")
|
| 138 |
|
|
optParser.add_option("--useragent", action="store", dest="useragent", default="forgottenislanderbot",
|
| 139 |
|
|
metavar="NAME", help="user-agent for the bot to represent itself with. Default: %default")
|
| 140 |
|
|
optParser.add_option("--pause-every", action="store", dest="pauseblock", type="int",
|
| 141 |
|
|
default=20, metavar="QUERIES", help="number of requests to run between database-commit / kill-opportunity sleep")
|
| 142 |
|
|
optParser.add_option("--pause-time", action="store", dest="pausetime", type="float",
|
| 143 |
|
|
default=3, metavar="SECONDS", help="time to sleep at an intermittent pause")
|
| 144 |
|
|
# stats options:
|
| 145 |
|
|
# kml options:
|
| 146 |
|
|
optParser.add_option("--map",action="store",type="string",dest="kmlfile",
|
| 147 |
|
|
default="dslmap.kml",metavar="KMLFILE",help="file to write KML to. Will be overwritten. Default: dslmap.kml")
|
| 148 |
|
|
optParser.add_option("--globe",action="store",type="string",dest="kmlglobe",
|
| 149 |
|
|
default=None, metavar="[GE|NWW]", help="virtual-globe to optimize KML for")
|
| 150 |
|
|
optParser.add_option("--civic-addresses-in-kml", action="store_true", dest="keepaddrs",
|
| 151 |
|
|
default=False, help="Label icons in KML by civic address. May raise privacy concerns, file size, lag.")
|
| 152 |
|
|
optParser.add_option("--no-highlight", action="store_true", dest="nohighlight",
|
| 153 |
|
|
default="False", help="Disable highlightable icons in KML. Experimental.")
|
| 154 |
|
|
optParser.add_option("--kmz", action="store_true", dest="kmz", default=False,
|
| 155 |
|
|
help="Make a compressed KMZ from the KML.")
|
| 156 |
|
|
optParser.add_option("--iconsize", action="store", type="float", dest="iconsize",
|
| 157 |
|
|
default="1.0", metavar="FLOAT", help="Size of KML icons. Default: 1.0")
|
| 158 |
|
|
optParser.add_option("--icontheme", action="store", type="string", dest="icontheme",
|
| 159 |
|
|
default="GM", metavar="[NAME|ante*post.png]",
|
| 160 |
|
|
help="Icons to use in KML. Either builtin, like 'GM', or external, like "
|
| 161 |
|
|
"'dsl-*.png', where '*' comprises 'ERROR', 'EMPTY', 'NODSL', 'BASIC', and 'ULTRA', "
|
| 162 |
|
|
"in separate files. It can also be a URL.")
|
| 163 |
|
|
optParser.add_option("--iconhotspot", action="store", type="string", dest="iconhotspot",
|
| 164 |
|
|
default=None,metavar="x,y",
|
| 165 |
|
|
help="Pixel coords in an icon where it is aligned over the lat/long coords.")
|
| 166 |
|
|
optParser.add_option("--iconcolours",action="store", type="string", dest="iconcolours",
|
| 167 |
|
|
default=None, metavar="LIST",help="Colours to tint KML icons, "
|
| 168 |
|
|
"as comma-separated list of five colours like 'bf0000ff', in 'AABBGGRR' format.")
|
| 169 |
|
|
# csv options:
|
| 170 |
|
|
optParser.add_option("--csvfile", action="store", type="string", dest="csvfile",
|
| 171 |
|
|
default="coords.csv", metavar="CSVFILE", help="file to write CSV to. Will be overwritten. Default: %default")
|
| 172 |
|
|
# ui options:
|
| 173 |
|
|
optParser.add_option("--pipeable", action="store_true", dest="pipeable", default=False,
|
| 174 |
|
|
help="Changes stdout flushing behavior. May cause problems on some systems. Not implemented.")
|
| 175 |
|
|
optParser.add_option("--verbose", action="store_true", dest="verbose", default=False,
|
| 176 |
|
|
help="Prints additional information. Not implemented.")
|
| 177 |
|
|
optParser.add_option("--debug", action="store_true", dest="debug", default=False,
|
| 178 |
|
|
help="Prints massive amounts of debug information. Implemented as necessary.")
|
| 179 |
|
|
|
| 180 |
|
|
|
| 181 |
|
|
if len(sys.argv) <= 1: # no command, can't do anything
|
| 182 |
|
|
cmdParser.print_help()
|
| 183 |
|
|
exit(0)
|
| 184 |
|
|
|
| 185 |
|
|
# parse command
|
| 186 |
|
|
(cmd,cmdargs) = cmdParser.parse_args([sys.argv[1]])
|
| 187 |
|
|
# parse options
|
| 188 |
|
|
(opt, optargs) = optParser.parse_args(sys.argv[2:])
|
| 189 |
|
|
|
| 190 |
|
|
# extract and parse dslmask and sparsity -- the first time
|
| 191 |
|
|
dslmask = (opt.dslmask and [parseDSLMask(opt.dslmask)] or [None])[0]
|
| 192 |
|
|
sparsity = (opt.sparsity and [opt.sparsity] or [None])[0]
|
| 193 |
|
|
# individual mode dslmasks and sparsities
|
| 194 |
|
|
crawldslmask = (dslmask or [True,False,False,False])
|
| 195 |
|
|
kmldslmask = (dslmask or [False,True,True,True])
|
| 196 |
|
|
csvdslmask = (dslmask or [False,True,False,False])
|
| 197 |
|
|
statsdslmask = (dslmask or [True,True,True,True])
|
| 198 |
|
|
listdbdslmask = (dslmask or [False,True,False,False])
|
| 199 |
|
|
crawlsparsity = (sparsity or 1)
|
| 200 |
|
|
kmlsparsity = (sparsity or 40)
|
| 201 |
|
|
csvsparsity = (sparsity or 1)
|
| 202 |
|
|
statssparsity = (sparsity or 1)
|
| 203 |
|
|
listdbsparsity = (sparsity or 1000)
|
| 204 |
|
|
# change default name to .kmz if necessary
|
| 205 |
|
|
outkml = ((opt.kmz and opt.kmlfile == "dslmap.kml") and ["dslmap.kmz"] or [opt.kmlfile])[0]
|
| 206 |
|
|
|
| 207 |
|
|
# take action depending on mode
|
| 208 |
|
|
try:
|
| 209 |
|
|
if cmd.auto: # do the whole thing
|
| 210 |
|
|
initdb(opt.db)
|
| 211 |
|
|
newdb(sdc, opt)
|
| 212 |
|
|
crawl(sdc, crawldslmask, crawlsparsity, opt)
|
| 213 |
|
|
# loop, checking stats, to repeat crawl
|
| 214 |
|
|
kml(sdc, kmldslmask, kmlsparsity, opt, outkml)
|
| 215 |
|
|
stats(sdc, statsdslmask, statssparsity, opt, True)
|
| 216 |
|
|
elif cmd.newdb:
|
| 217 |
|
|
initdb(opt.db)
|
| 218 |
|
|
newdb(sdc, opt);
|
| 219 |
|
|
elif cmd.crawl:
|
| 220 |
|
|
initdb(opt.db)
|
| 221 |
|
|
crawl(sdc, crawldslmask, crawlsparsity, opt)
|
| 222 |
|
|
elif cmd.kml:
|
| 223 |
|
|
initdb(opt.db)
|
| 224 |
|
|
kml(sdc, kmldslmask, kmlsparsity, opt, outkml)
|
| 225 |
|
|
elif cmd.stats:
|
| 226 |
|
|
initdb(opt.db)
|
| 227 |
|
|
stats(sdc, statsdslmask, statssparsity, opt, True)
|
| 228 |
|
|
elif cmd.listdb:
|
| 229 |
|
|
initdb(opt.db)
|
| 230 |
|
|
listdb(sdc, listdbdslmask, listdbsparsity, opt)
|
| 231 |
|
|
else:
|
| 232 |
|
|
cmdParser.print_help()
|
| 233 |
|
|
except Exception, errdet:
|
| 234 |
|
|
print "Failing. Command could not be completed. Data may not have been saved."
|
| 235 |
|
|
print "Error:",errdet
|
| 236 |
|
|
traceback.print_exc()
|
| 237 |
|
|
done(1)
|
| 238 |
|
|
done()
|
| 239 |
15 |
art |
|
| 240 |
18 |
art |
def done(errcode=0):
|
| 241 |
|
|
""" Exits safely,
|
| 242 |
|
|
committing, flushing, and closing anything that needs it."""
|
| 243 |
|
|
try:
|
| 244 |
|
|
sdc.close()
|
| 245 |
|
|
except:
|
| 246 |
|
|
pass
|
| 247 |
|
|
try:
|
| 248 |
|
|
sd.commit()
|
| 249 |
|
|
sd.close()
|
| 250 |
|
|
except:
|
| 251 |
|
|
pass
|
| 252 |
|
|
exit(errcode)
|
| 253 |
17 |
art |
|
| 254 |
18 |
art |
|
| 255 |
|
|
def initdb(dbn): # opens the database
|
| 256 |
|
|
""" opens sqlite database for use by FIB UI."""
|
| 257 |
|
|
try:
|
| 258 |
|
|
globals()['sd'] = sqlite3.connect(dbn)
|
| 259 |
|
|
globals()['sdc'] = sd.cursor()
|
| 260 |
|
|
except Exception, errdet:
|
| 261 |
|
|
print "Failing: couldn't open database ",dbn,": ", errdet
|
| 262 |
|
|
print sys.exc_info()[0]
|
| 263 |
|
|
done(1)
|
| 264 |
|
|
|
| 265 |
|
|
def newdb(dc, opt): # downloads civic address databases, and merges them into a new database
|
| 266 |
|
|
pass
|
| 267 |
|
|
|
| 268 |
|
|
def crawl(dc, opt): # real bot part -- queries
|
| 269 |
|
|
pass
|
| 270 |
|
|
|
| 271 |
|
|
def kml(dc, dslmask, sparsity, opt, outkml): # generates KML file
|
| 272 |
|
|
""" Generates a KML file from a database.
|
| 273 |
|
|
dc:
|
| 274 |
|
|
sqlite3.Cursor to FIB database.
|
| 275 |
|
|
dslmask:
|
| 276 |
|
|
4 bools in list.
|
| 277 |
|
|
Whether to include in KML output
|
| 278 |
|
|
each of EMPTY, NODSL, BASIC, & ULTRA
|
| 279 |
|
|
categories. Usually [False,True,True,True]
|
| 280 |
|
|
sparsity:
|
| 281 |
|
|
int; minimum civic-number distance
|
| 282 |
|
|
between houses to output. This is
|
| 283 |
|
|
to avoid slowing Google Earth to a
|
| 284 |
|
|
crawl with 68023 placemarks.
|
| 285 |
|
|
opt:
|
| 286 |
|
|
key-value list of miscellaneous options.
|
| 287 |
|
|
Documentation is elsewhere.
|
| 288 |
|
|
outkml:
|
| 289 |
|
|
string; path to KML file to write.
|
| 290 |
|
|
Will be overwritten."""
|
| 291 |
|
|
globals()['kmlbuffer'] = StringIO.StringIO()
|
| 292 |
|
|
kmlbuffer.write(kmlMakeHeader(outkml,opt))
|
| 293 |
|
|
processDB(dc,kmlItemAction,dslmask,sparsity,opt)
|
| 294 |
|
|
kmlbuffer.write(kmlMakeFooter(outkml,opt))
|
| 295 |
|
|
|
| 296 |
|
|
kmlfile = None
|
| 297 |
|
|
kmzzip = None
|
| 298 |
|
|
try:
|
| 299 |
|
|
kmlfile = open(outkml,"wb")
|
| 300 |
|
|
if opt.kmz: # write a zipfile
|
| 301 |
|
|
kmzzip = zipfile.ZipFile(kmlfile,'w',zipfile.ZIP_DEFLATED)
|
| 302 |
|
|
kmzzip.writestr("doc.kml",kmlbuffer.getvalue()) # doc.kml is the special name required by Google Earth
|
| 303 |
|
|
kmzzip.close()
|
| 304 |
|
|
else: # write normal KML
|
| 305 |
|
|
kmlfile.write(kmlbuffer.getvalue())
|
| 306 |
|
|
kmlfile.flush()
|
| 307 |
|
|
kmlfile.close()
|
| 308 |
|
|
except Exception, errdet:
|
| 309 |
|
|
print "Error writing KML:",errdet
|
| 310 |
|
|
print sys.exc_info()[0]
|
| 311 |
|
|
try:
|
| 312 |
|
|
kmzzip.close()
|
| 313 |
|
|
except:
|
| 314 |
|
|
pass
|
| 315 |
|
|
try:
|
| 316 |
|
|
kmlfile.flush()
|
| 317 |
|
|
kmlfile.close()
|
| 318 |
|
|
except:
|
| 319 |
|
|
pass
|
| 320 |
|
|
kmlbuffer.close()
|
| 321 |
|
|
|
| 322 |
|
|
def stats(dc, dslmask, sparsity, opt, printtable): # prints totals of each DSL type
|
| 323 |
|
|
""" Prints statistics from civic address database.
|
| 324 |
|
|
dc:
|
| 325 |
|
|
sqlite3.Cursor to FIB database
|
| 326 |
|
|
dslmask:
|
| 327 |
|
|
4 bools in list.
|
| 328 |
|
|
Whether to sum in output
|
| 329 |
|
|
each of EMPTY, NODSL, BASIC, & ULTRA
|
| 330 |
|
|
categories. Usually [True,True,True,True]
|
| 331 |
|
|
sparsity:
|
| 332 |
|
|
int; minimum civic-number distance
|
| 333 |
|
|
between houses to output. This is
|
| 334 |
|
|
to find out what a given sparsity
|
| 335 |
|
|
will do to the crawler or Google Earth.
|
| 336 |
|
|
For totals, use 1.
|
| 337 |
|
|
opt:
|
| 338 |
|
|
key-value list of miscellaneous options.
|
| 339 |
|
|
Documentation is elsewhere.
|
| 340 |
|
|
printtable:
|
| 341 |
|
|
bool; whether to print table, or just
|
| 342 |
|
|
return the values."""
|
| 343 |
|
|
|
| 344 |
|
|
# reset statistics totals
|
| 345 |
|
|
for i in range(len(statistics)):
|
| 346 |
|
|
statistics[i] = 0
|
| 347 |
|
|
# use utility function to loop through database
|
| 348 |
|
|
processDB(dc,statsItemAction,dslmask,sparsity,opt)
|
| 349 |
|
|
if printtable:
|
| 350 |
|
|
print # newline needed
|
| 351 |
|
|
# print totals table
|
| 352 |
|
|
for i in range(len(dslnames)):
|
| 353 |
|
|
print dslnames[i],": ",statistics[i]
|
| 354 |
|
|
return statistics
|
| 355 |
|
|
|
| 356 |
|
|
def listdb(dc, dslmask, sparsity, opt):
|
| 357 |
|
|
""" Prints addresses from civic address database.
|
| 358 |
|
|
dc: sqlite3.Cursor to FIB database
|
| 359 |
|
|
dslmask:
|
| 360 |
|
|
sparsity: int; minimum civic-number distance
|
| 361 |
|
|
between houses to output. This is
|
| 362 |
|
|
to find out what a given sparsity
|
| 363 |
|
|
will do to the crawler or Google Earth.
|
| 364 |
|
|
For totals, use 1.
|
| 365 |
|
|
opt:"""
|
| 366 |
|
|
processDB(dc,listItemAction,dslmask,sparsity,opt)
|
| 367 |
|
|
sys.stdout.flush()
|
| 368 |
|
|
|
| 369 |
|
|
def processDB(dc, actionfunc, dslmask, sparsity, opt):
|
| 370 |
|
|
""" Loops through SQlite database, selecting rows
|
| 371 |
|
|
based on parameters, and calling the specified function
|
| 372 |
|
|
for each selected row.
|
| 373 |
|
|
dc:
|
| 374 |
|
|
sqlite3.Cursor to civic address database from fib-cadb2sql.py
|
| 375 |
|
|
actionfunc:
|
| 376 |
|
|
function to call for selected addresses.
|
| 377 |
|
|
Must accept these parameters:
|
| 378 |
|
|
sqlite3.Cursor
|
| 379 |
|
|
town (string)
|
| 380 |
|
|
road (string)
|
| 381 |
|
|
civic number (int)
|
| 382 |
|
|
dsl status of address (int)
|
| 383 |
|
|
longitude (float)
|
| 384 |
|
|
latitude (float)
|
| 385 |
|
|
option set (key:value list)
|
| 386 |
|
|
dslmask:
|
| 387 |
|
|
4 bools in a list.
|
| 388 |
|
|
Whether to call actionfunc() for rows
|
| 389 |
|
|
in the EMPTY, NODSL, BASIC, or ULTRA
|
| 390 |
|
|
categories.
|
| 391 |
|
|
sparsity:
|
| 392 |
|
|
int; minimum civic-number difference between houses to process.
|
| 393 |
|
|
To get every house, enter 1.
|
| 394 |
|
|
For about one house per road, enter 10000.
|
| 395 |
|
|
opt:
|
| 396 |
|
|
key-value list of miscellaneous options.
|
| 397 |
|
|
Documentation is elsewhere."""
|
| 398 |
|
|
# parse the lat/lon rectangle, if needed
|
| 399 |
|
|
coordrect = (opt.coordrect and True)
|
| 400 |
|
|
if coordrect:
|
| 401 |
|
|
coordmaskstr = opt.coordrect.split(',')
|
| 402 |
|
|
coordmask = [0.0, 0.0, 0.0, 0.0]
|
| 403 |
|
|
# will not allow invalid args to fail this, but will complain
|
| 404 |
|
|
if len(coordmaskstr) < 4:
|
| 405 |
|
|
print "invalid coordinate rectangle",coordmaskstr
|
| 406 |
|
|
try: # same here
|
| 407 |
|
|
for i in range(4):
|
| 408 |
|
|
coordmask[i] = float(coordmaskstr[i])
|
| 409 |
|
|
except:
|
| 410 |
|
|
print "invalid coordinate rectangle",coordmaskstr
|
| 411 |
|
|
coordrect = False
|
| 412 |
|
|
|
| 413 |
|
|
# parse whether to always get the first house, the last house, both, or neither on a road
|
| 414 |
|
|
alwaysfirst = (opt.alwaysget == "first") or (opt.alwaysget == "both")
|
| 415 |
|
|
alwayslast = (opt.alwaysget == "last") or (opt.alwaysget == "both")
|
| 416 |
|
|
|
| 417 |
|
|
# query the sqlite database for everything. The blob at the end is for custom SQL conditions.
|
| 418 |
|
|
dc.execute("select town,road,civic,dsl,long,lat from dsl"+(opt.sqlwhere and [" where "+opt.sqlwhere] or [""])[0]+" order by town,road,civic")
|
| 419 |
|
|
|
| 420 |
|
|
# town, road... : data for presently-processed address
|
| 421 |
|
|
# last___ : data for previous address as pertains to determining sparsity
|
| 422 |
|
|
# lastok___ : data for previous selected-in-all-but-sparsity address, for last-on-road feature
|
| 423 |
|
|
town = road = lasttown = lastroad = lastoktown = lastokroad = "" # strings
|
| 424 |
|
|
civic = status = lastcivic = laststatus = lastokcivic = lastokstatus = 0 # ints
|
| 425 |
|
|
lon = lat = lastlon = lastlat = lastoklon = lastoklat = 0.0 # floats
|
| 426 |
|
|
notgot = lastoknotgot = False # bools -- used exclusively for last-on-road feature. Name translates to 'last okay address -- not been gotten?', meaning 'selected'
|
| 427 |
|
|
# this block will take a lot of ram due to fetchall() rather than fetchone().
|
| 428 |
|
|
# Shouldn't be hard to fix if needed, though. Current-db ram req: 6 MB max?
|
| 429 |
|
|
for cca in dc.fetchall(): # current civic address
|
| 430 |
|
|
town,road,civic,status,lon,lat,okay = cca[0],cca[1],int(cca[2]),int(cca[3]),float(cca[4]),float(cca[5]),False # casts here are important
|
| 431 |
|
|
|
| 432 |
|
|
# skip cities -- currently only Ch'town and Summerside -- if requested in opt
|
| 433 |
|
|
if opt.nocities and ( town == "CHARLOTTETOWN" or town == "SUMMERSIDE" ):
|
| 434 |
|
|
continue
|
| 435 |
|
|
# skip addresses outside of the latitude-longitude rectangle, if requested in opt
|
| 436 |
|
|
if coordrect:
|
| 437 |
|
|
if not ((min(coordmask[0],coordmask[2]) < lat < max(coordmask[0],coordmask[2]))
|
| 438 |
|
|
and (min(coordmask[1],coordmask[3]) < lon < max(coordmask[1],coordmask[3]))): # check whether address is within coord rectangle
|
| 439 |
|
|
continue
|
| 440 |
|
|
|
| 441 |
|
|
# apply the dslmask. If status doesn't match, check maxrows
|
| 442 |
|
|
# on whether to set last___ vars, to determine whether a
|
| 443 |
|
|
# maximum number of addresses will be gotten, or a regular
|
| 444 |
|
|
# sparseness will be maintained.
|
| 445 |
|
|
if not dslmask[((status < 0) and [0] or [status])[0]]: # treat errors (-1) as empty (0).
|
| 446 |
|
|
# doesn't match up -- ignore
|
| 447 |
|
|
if not opt.maxrows:
|
| 448 |
|
|
lasttown,lastroad,lastcivic,laststatus,lastlon,lastlat = town,road,civic,status,lon,lat
|
| 449 |
|
|
continue;
|
| 450 |
|
|
|
| 451 |
|
|
try: # loop must not crash
|
| 452 |
|
|
notgot = False # by default, assume the address was selected
|
| 453 |
|
|
# on the same road, make sure to skip houses for sparseness
|
| 454 |
|
|
if town == lasttown and road == lastroad:
|
| 455 |
|
|
if (civic - lastcivic) >= sparsity: # sparseness check
|
| 456 |
|
|
actionfunc(dc,town,road,civic,status,lon,lat,opt)
|
| 457 |
|
|
else: # if sparseness check discards an address, put it in the running to be selected for being last on a road:
|
| 458 |
|
|
notgot = True
|
| 459 |
|
|
lastoktown,lastokroad,lastokcivic,lastokstatus,lastoklon,lastoklat = town,road,civic,status,lon,lat
|
| 460 |
|
|
else: # on a new road, decide whether to get the first house, or the last house on the previous road
|
| 461 |
|
|
if alwaysfirst:
|
| 462 |
|
|
actionfunc(dc,town,road,civic,status,lon,lat,opt)
|
| 463 |
|
|
if alwayslast and (lastroad != "") and lastoknotgot:
|
| 464 |
|
|
actionfunc(dc,lastoktown,lastokroad,lastokcivic,lastokstatus,lastoklon,lastoklat,opt)
|
| 465 |
|
|
except Exception, errdet:
|
| 466 |
|
|
print "> error < :",errdet
|
| 467 |
|
|
print sys.exc_info()[0]
|
| 468 |
|
|
lasttown,lastroad,lastcivic,laststatus,lastlon,lastlat = town,road,civic,status,lon,lat
|
| 469 |
|
|
lastoknotgot = notgot
|
| 470 |
|
|
# end of for loop
|
| 471 |
|
|
if alwayslast and (lastroad != "") and lastoknotgot: # this ridiculous excess is to get the very last address in the database
|
| 472 |
|
|
try:
|
| 473 |
|
|
actionfunc(dc,lastoktown,lastokroad,lastokcivic,lastokstatus,lastoklon,lastoklat,opt)
|
| 474 |
|
|
except Exception, errdet:
|
| 475 |
|
|
print "> error < :",errdet
|
| 476 |
|
|
print sys.exc_info()[0]
|
| 477 |
|
|
|
| 478 |
|
|
def crawlItemAction(dc, town, road, civic, status, lon, lat, opt):
|
| 479 |
|
|
pass
|
| 480 |
|
|
|
| 481 |
|
|
def kmlItemAction(dc, town, road, civic, status, lon, lat, opt):
|
| 482 |
|
|
""" handles an individual selected street address
|
| 483 |
|
|
Called by processDB(), from kml().
|
| 484 |
|
|
Does not honour delay param.
|
| 485 |
|
|
See processDB() for param description."""
|
| 486 |
|
|
if opt.kmlglobe: # absolutely nothing is done with this for now
|
| 487 |
|
|
pass
|
| 488 |
|
|
# placemark text block
|
| 489 |
|
|
kmlbuffer.write("""
|
| 490 |
|
|
<Placemark>
|
| 491 |
|
|
"""+(opt.keepaddrs and [("<name>"+str(civic)+" "+road.replace("&","&")+", "+town+"""</name>
|
| 492 |
|
|
""")] or [""])[0]+"""<styleUrl>#stylemap_"""+dslnames[(status+1)]+"""</styleUrl>
|
| 493 |
|
|
<Point>
|
| 494 |
|
|
<coordinates>"""+str(lon)+""","""+str(lat)+""",0</coordinates>
|
| 495 |
|
|
</Point>
|
| 496 |
|
|
</Placemark>
|
| 497 |
|
|
""")
|
| 498 |
|
|
# The only useful bit in this block is the <name> block.
|
| 499 |
|
|
# However, this makes Google Earth rather cluttered,
|
| 500 |
|
|
# adds to file size, and raises privacy concerns.
|
| 501 |
|
|
# The '&' replacement is very important -- Earth chokes easily.
|
| 502 |
|
|
# The <LookAt> block is useless bloat.
|
| 503 |
|
|
# This stuff was originally above <styleUrl>.
|
| 504 |
|
|
## <name>"""+str(civic)+" "+road.replace("&","&")+", "+town+"""</name>
|
| 505 |
|
|
## <LookAt>
|
| 506 |
|
|
## <longitude>"""+str(lon)+"""</longitude>
|
| 507 |
|
|
## <latitude>"""+str(lat)+"""</latitude>
|
| 508 |
|
|
## <altitude>0</altitude>
|
| 509 |
|
|
## <range>600</range>
|
| 510 |
|
|
## <tilt>0</tilt>
|
| 511 |
|
|
## <heading>0</heading>
|
| 512 |
|
|
## <altitudeMode>relativeToGround</altitudeMode>
|
| 513 |
|
|
## <gx:altitudeMode>relativeToSeaFloor</gx:altitudeMode>
|
| 514 |
|
|
## </LookAt>
|
| 515 |
|
|
# end of kmlItemAction()
|
| 516 |
|
|
|
| 517 |
|
|
def statsItemAction(dc, town, road, civic, status, lon, lat, opt):
|
| 518 |
|
|
""" handles an individual selected street address.
|
| 519 |
|
|
Called by processDB(), from stats().
|
| 520 |
|
|
Does not honour delay param. For param details,
|
| 521 |
|
|
see docstring for processDB()."""
|
| 522 |
|
|
statistics[5] = statistics[5] + 1 # increment the total
|
| 523 |
|
|
if status < 0: # error
|
| 524 |
|
|
statistics[0] = statistics[0] + 1
|
| 525 |
|
|
elif status == 0: # unknown [EMPTY]
|
| 526 |
|
|
statistics[1] = statistics[1] + 1
|
| 527 |
|
|
elif status == 1: # no [NODSL]
|
| 528 |
|
|
statistics[2] = statistics[2] + 1
|
| 529 |
|
|
elif status == 2: # basic [BASIC]
|
| 530 |
|
|
statistics[3] = statistics[3] + 1
|
| 531 |
|
|
elif status == 3: # ultra [ULTRA]
|
| 532 |
|
|
statistics[4] = statistics[4] + 1
|
| 533 |
|
|
else: # error
|
| 534 |
|
|
print "invalid status: ",civic, road,",", town,": ",status
|
| 535 |
|
|
|
| 536 |
|
|
def listItemAction(dc, town, road, civic, status, lon, lat, opt):
|
| 537 |
|
|
""" prints an individual selected street address.
|
| 538 |
|
|
Called by fibmod.processDB, from printList().
|
| 539 |
|
|
Does not honour delay param. For param details,
|
| 540 |
|
|
see docstring for fibmod.processDB()."""
|
| 541 |
|
|
print dslnames[(status+1)]," ",civic,road+",",town
|
| 542 |
|
|
|
| 543 |
|
|
|
| 544 |
|
|
def parseDSLMask(instr):
|
| 545 |
|
|
""" parses four boolean values from string, or bool list.
|
| 546 |
|
|
_Please_ don't use the less-than-four mechanism."""
|
| 547 |
|
|
dslmask = [ False, False, False, False ] # real dslmask for use
|
| 548 |
|
|
dfl = len(instr) # input length
|
| 549 |
|
|
if dfl >= 4: # 4 or more? only use first 4.
|
| 550 |
|
|
for i in range(4):
|
| 551 |
|
|
dslmask[i] = char2Bool(instr[i])
|
| 552 |
|
|
elif dfl == 3: # hopefully this won't be used -- EMPTY, NODSL, & (BASIC+ULTRA)
|
| 553 |
|
|
for i in range(3):
|
| 554 |
|
|
dslmask[i] = char2Bool(instr[i])
|
| 555 |
|
|
dslmask[3] = dslmask[2]
|
| 556 |
|
|
elif dfl == 2: # hopefully to be ignored -- EMPTY, KNOWN
|
| 557 |
|
|
dslmask[0] = char2Bool(instr[0])
|
| 558 |
|
|
dslmask[1] = dslmask[2] = dslmask[3] = char2Bool(instr[1])
|
| 559 |
|
|
elif dfl == 1: # hopefully to be ignored -- EMPTY [KNOWN = True]
|
| 560 |
|
|
dslmask = [char2Bool(instr[0]), True, True, True]
|
| 561 |
|
|
else: # easy -- all True
|
| 562 |
|
|
dslmask = [ True, True, True, True ]
|
| 563 |
|
|
return dslmask
|
| 564 |
|
|
|
| 565 |
|
|
def char2Bool(inchar):
|
| 566 |
|
|
""" Returns a bool from a y/n char param.
|
| 567 |
|
|
y:Y: True; anything else: False
|
| 568 |
|
|
However, if inchar is a bool, it will be returned as is.
|
| 569 |
|
|
|
| 570 |
|
|
param sequence can be any length,
|
| 571 |
|
|
but only the first char gets evaluated"""
|
| 572 |
|
|
if inchar == True or inchar == False:
|
| 573 |
|
|
return inchar
|
| 574 |
|
|
elif inchar[0] == 'y' or inchar[0] == 'Y':
|
| 575 |
|
|
return True
|
| 576 |
|
|
else:
|
| 577 |
|
|
return False
|
| 578 |
|
|
|
| 579 |
|
|
|
| 580 |
|
|
def kmlMakeHeader(outkml, opt):
|
| 581 |
|
|
""" Returns a header for a Google Earth KML file.
|
| 582 |
|
|
Manipulates some of the styles based on the options.
|
| 583 |
|
|
outkml:
|
| 584 |
|
|
name of the KML file -- used as title
|
| 585 |
|
|
opt:
|
| 586 |
|
|
Miscellaneous options -- see docs elsewhere
|
| 587 |
|
|
"""
|
| 588 |
|
|
if opt.kmlglobe: # not implemented, but don't want it forgotten....
|
| 589 |
|
|
pass
|
| 590 |
|
|
|
| 591 |
|
|
# determine the icon URLs to use
|
| 592 |
|
|
iconURL = ["", "", "", "", ""]
|
| 593 |
|
|
if opt.icontheme == "GM":
|
| 594 |
|
|
iconURL = GMIconURLs
|
| 595 |
|
|
else:
|
| 596 |
|
|
for i in range(len(dslnames)-1):
|
| 597 |
|
|
iconURL[i] = opt.icontheme.replace("*",dslnames[i])
|
| 598 |
|
|
# determine the icon hotspots
|
| 599 |
|
|
iconHotspots = GMIconHotspot
|
| 600 |
|
|
if opt.iconhotspot:
|
| 601 |
|
|
ihsc = opt.iconhotspot.split(",") # chunk into x/y coords
|
| 602 |
|
|
if len(ihsc) < 2: # not a coord
|
| 603 |
|
|
print "KML icon hotspot invalid: Too few coords. Using default...."
|
| 604 |
|
|
else:
|
| 605 |
|
|
for i in range(2):
|
| 606 |
|
|
iconHotspots[i] = int(ihsc[i])
|
| 607 |
|
|
# determine icon colors
|
| 608 |
|
|
useIconColours = False
|
| 609 |
|
|
iconColours = ["", "", "", "", ""]
|
| 610 |
|
|
if opt.iconcolours:
|
| 611 |
|
|
iconColours = opt.iconcolours.split(",") # chunk into icon colours
|
| 612 |
|
|
if len(iconColours) < (len(dslnames)-1): # bad input
|
| 613 |
|
|
print "KML icon colours invalid: Too few. Using default...."
|
| 614 |
|
|
useIconColours = False
|
| 615 |
|
|
else:
|
| 616 |
|
|
useIconColours = True
|
| 617 |
|
|
|
| 618 |
|
|
headerstr = """<?xml version="1.0" encoding="UTF-8"?>
|
| 619 |
|
|
<kml xmlns="http://www.opengis.net/kml/2.2" xmlns:gx="http://www.google.com/kml/ext/2.2" xmlns:kml="http://www.opengis.net/kml/2.2" xmlns:atom="http://www.w3.org/2005/Atom">
|
| 620 |
|
|
<Document>
|
| 621 |
|
|
<name>"""+outkml+"""</name>"""
|
| 622 |
|
|
for i in range(len(dslnames)-1): # don't want the last one -- it's TOTAL
|
| 623 |
|
|
headerstr = headerstr+"""
|
| 624 |
|
|
<StyleMap id="stylemap_"""+dslnames[i]+"""">
|
| 625 |
|
|
<Pair>
|
| 626 |
|
|
<key>normal</key>
|
| 627 |
|
|
<styleUrl>#stylenormal_"""+dslnames[i]+"""</styleUrl>
|
| 628 |
|
|
</Pair>
|
| 629 |
|
|
<Pair>
|
| 630 |
|
|
<key>highlight</key>
|
| 631 |
|
|
<styleUrl>#style"""+((opt.nohighlight == True) and ["normal_"] or ["highlit_"])[0]+dslnames[i]+"""</styleUrl>
|
| 632 |
|
|
</Pair>"""
|
| 633 |
|
|
headerstr = headerstr+"""
|
| 634 |
|
|
</StyleMap>
|
| 635 |
|
|
<Style id="stylenormal_"""+dslnames[i]+"""">
|
| 636 |
|
|
<IconStyle>"""+(useIconColours and ["""
|
| 637 |
|
|
<color>"""+iconColours[i]+"""</color>"""] or [""])[0]+"""
|
| 638 |
|
|
<scale>"""+str(opt.iconsize)+"""</scale>
|
| 639 |
|
|
<Icon>
|
| 640 |
|
|
<href>"""+iconURL[i]+"""</href>
|
| 641 |
|
|
</Icon>
|
| 642 |
|
|
<hotSpot x="""+"\""+str(iconHotspots[0])+"\" y=\""+str(iconHotspots[1])+"\""+""" xunits="pixels" yunits="pixels"/>
|
| 643 |
|
|
</IconStyle>
|
| 644 |
|
|
<ListStyle>
|
| 645 |
|
|
</ListStyle>
|
| 646 |
|
|
</Style>"""
|
| 647 |
|
|
# this line was plagued by a mysterious bug, as was the other nohighlight line above.
|
| 648 |
|
|
# This works -- please don't just change it for appearance
|
| 649 |
|
|
if opt.nohighlight != True:
|
| 650 |
|
|
headerstr = headerstr+"""
|
| 651 |
|
|
<Style id="stylehighlit_"""+dslnames[i]+"""">
|
| 652 |
|
|
<IconStyle>"""+(useIconColours and ["""
|
| 653 |
|
|
<color>"""+iconColours[i]+"""</color>"""] or [""])[0]+"""
|
| 654 |
|
|
<scale>"""+str(opt.iconsize*kmlIconHighlitScale)+"""</scale>
|
| 655 |
|
|
<Icon>
|
| 656 |
|
|
<href>"""+iconURL[i]+"""</href>
|
| 657 |
|
|
</Icon>
|
| 658 |
|
|
<hotSpot x="""+"\""+str(iconHotspots[0])+"\" y=\""+str(iconHotspots[1])+"\""+""" xunits="pixels" yunits="pixels"/>
|
| 659 |
|
|
</IconStyle>
|
| 660 |
|
|
<ListStyle>
|
| 661 |
|
|
</ListStyle>
|
| 662 |
|
|
</Style>
|
| 663 |
|
|
"""
|
| 664 |
|
|
headerstr = headerstr+"""
|
| 665 |
|
|
|
| 666 |
|
|
<Folder>
|
| 667 |
|
|
<name>"""+outkml+"""</name>
|
| 668 |
|
|
<open>0</open>
|
| 669 |
|
|
|
| 670 |
|
|
"""
|
| 671 |
|
|
return headerstr
|
| 672 |
|
|
# end of kmlMakeHeader()
|
| 673 |
|
|
|
| 674 |
|
|
def kmlMakeFooter(outkml, opt):
|
| 675 |
|
|
""" Returns a footer for a Google Earth KML file.
|
| 676 |
|
|
Manipulates some of the styles based on the options.
|
| 677 |
|
|
outkml:
|
| 678 |
|
|
name of the KML file -- used as title
|
| 679 |
|
|
opt:
|
| 680 |
|
|
Miscellaneous options -- see docs elsewhere
|
| 681 |
|
|
"""
|
| 682 |
|
|
return """
|
| 683 |
|
|
|
| 684 |
|
|
</Folder>
|
| 685 |
|
|
</Document>
|
| 686 |
|
|
</kml>
|
| 687 |
|
|
|
| 688 |
|
|
"""
|
| 689 |
|
|
# end of kmlMakeFooter()
|
| 690 |
|
|
|
| 691 |
|
|
|
| 692 |
|
|
if __name__ == "__main__":
|
| 693 |
|
|
main()
|