aboutsummaryrefslogtreecommitdiff
path: root/bugzilla_fetch.py
diff options
context:
space:
mode:
authorcpettet <rush@wikimedia.org>2014-08-21 00:48:12 +0000
committercpettet <rush@wikimedia.org>2014-08-21 00:48:12 +0000
commitb1a77730863f270fb468aaf353dccb3e4d64daeb (patch)
treec94ff1c77c607294c4e58bbe9670de1f1889fad8 /bugzilla_fetch.py
parent2e07190fa3e385ebbb48b6d4dbdb4a16f27d2a04 (diff)
bugzilla fetch per-multiprocessing
Diffstat (limited to 'bugzilla_fetch.py')
-rwxr-xr-xbugzilla_fetch.py362
1 files changed, 63 insertions, 299 deletions
diff --git a/bugzilla_fetch.py b/bugzilla_fetch.py
index f8872eb..eb698fb 100755
--- a/bugzilla_fetch.py
+++ b/bugzilla_fetch.py
@@ -8,7 +8,7 @@ This script is a WIP for getting Bugzilla information
with the end goal of it living in phabricator
"""
-
+import time
import yaml
import ast
import base64
@@ -26,9 +26,6 @@ from phabdb import set_project_icon
from email.parser import Parser
import ConfigParser
-import phabricator
-from phabricator import Phabricator
-
def log(msg):
import syslog
msg = unicode(msg)
@@ -39,86 +36,44 @@ def log(msg):
except:
print 'error logging output'
-def main(bugid):
-
+def fetch(bugid):
parser = ConfigParser.SafeConfigParser()
- parser_mode = 'phab'
- parser.read('/etc/gz_fetch.conf')
- phab = Phabricator(username=parser.get(parser_mode, 'username'),
- certificate=parser.get(parser_mode, 'certificate'),
- host=parser.get(parser_mode, 'host'))
-
parser_mode = 'bz'
+ parser.read('/etc/gz_fetch.conf')
server = xmlrpclib.ServerProxy(parser.get(parser_mode, 'url'), use_datetime=True)
- kwargs = { 'ids': [bugid],
- 'Bugzilla_login': parser.get(parser_mode, 'Bugzilla_login'),
- 'Bugzilla_password': parser.get(parser_mode, 'Bugzilla_password')}
-
-
- bzdata= open("data/bugzilla.yaml", 'r')
- bzdata_yaml = yaml.load(bzdata)
- tag_keys = bzdata_yaml['keywords_to_tags'].split(' ')
- mlists = bzdata_yaml['assigned_to_lists'].split(' ')
- log("Mailinglists: " + str(mlists))
-
- def priority_convert(bz_priority):
+ token_data = server.User.login({'login': parser.get(parser_mode, 'Bugzilla_login'),
+ 'password': parser.get(parser_mode, 'Bugzilla_password')})
- rank = {'unprioritized': 90,
- 'immediate': 100,
- 'highest': 100,
- 'high': 80,
- 'normal': 50,
- 'low': 25,
- 'lowest': 0}
+ token = token_data['token']
- return rank[bz_priority.lower()]
+ #kwargs = { 'ids': [bugid],
+ # 'Bugzilla_login': parser.get(parser_mode, 'Bugzilla_login'),
+ # 'Bugzilla_password': parser.get(parser_mode, 'Bugzilla_password')}
-
- def comment(task, msg):
- out = phab.maniphest.update(id=task, comments=msg)
- log(out)
- return out
-
- def set_status(task, status):
- out = phab.maniphest.update(id=task, status=status)
- log(out)
- return out
-
- def create(title, desc, id, priority, ccPHIDs=[], projects=[]):
- out = phab.maniphest.createtask(title=title,
- description="%s" % desc,
- projectPHIDs=projects,
- priority=priority,
- auxiliary={"std:maniphest:external_id":"%s" % (id,)})
- log(out)
- return out
+ kwargs = { 'ids': [bugid], 'Bugzilla_token': token }
def datetime_to_epoch(date_time):
return str((date_time - datetime.datetime(1970,1,1)).total_seconds())
- def epoch_to_datetime(epoch, timezone='UTC'):
- return str((datetime.datetime.fromtimestamp(int(float(epoch))
- ).strftime('%Y-%m-%d %H:%M:%S'))) + " (%s)" % (timezone,)
-
def status_convert(bz_status):
"""
- UNCONFIRMED (default) Open + Needs Triage (default)
- NEW Open
- ASSIGNED open
- PATCH_TO_REVIEW open
- NEED_INFO needs_info
- RESOLVED FIXED resolved
- RESOLVED INVALID invalid
- RESOLVED WONTFIX declined
- RESOLVED WORKSFORME resolved
- RESOLVED DUPLICATE closed
-
- needs_info stalled
- resolved closed
- invalid no historical value will be purged eventually (spam, etc)
- declined we have decided not too -- even though we could
+ UNCONFIRMED (default) Open + Needs Triage (default)
+ NEW Open
+ ASSIGNED open
+ PATCH_TO_REVIEW open
+ NEED_INFO needs_info
+ RESOLVED FIXED resolved
+ RESOLVED INVALID invalid
+ RESOLVED WONTFIX declined
+ RESOLVED WORKSFORME resolved
+ RESOLVED DUPLICATE closed
+
+ needs_info stalled
+ resolved closed
+ invalid no historical value will be purged eventually (spam, etc)
+ declined we have decided not too -- even though we could
"""
statuses = {'new': 'open',
@@ -131,258 +86,67 @@ def main(bugid):
'unconfirmed': 'open',
'patch_to_review': 'open'}
- return statuses[bz_status.lower()]
-
- def ensure_project(project_name):
- """make sure project exists, return phid either way"""
-
- existing_proj = phab.project.query(names=[project_name])
- if not existing_proj['data']:
- log('need to make: ' + project_name)
- try:
- new_proj = phab.project.create(name=project_name, members=['PHID-USER-wa4idclisnm6aeakk7ur'])
- #XXX: Bug where we have to specify a members array!
- except phabricator.APIError:
- pass
-
- existing_proj = phab.project.query(names=[project_name])
- log(str(existing_proj))
- phid = existing_proj['data'][existing_proj['data'].keys()[0]]['phid']
- else:
- phid = existing_proj['data'][existing_proj['data'].keys()[0]]['phid']
- log(project_name + ' exists')
- return phid
+ return statuses[bz_status.lower()]
- def upload_file(name, data):
- #print type(data)
- encoded = base64.b64encode(data)
- return phab.file.upload(name=name, data_base64=encoded)
#grabbing one bug at a time for now
buginfo = server.Bug.get(kwargs)['bugs']
buginfo = buginfo[0]
- #print buginfo
com = server.Bug.comments(kwargs)['bugs'][bugid]['comments']
bug_id = com[0]['bug_id']
- #http://www.bugzilla.org/docs/tip/en/html/api/Bugzilla/WebService/Bug.html#attachments
- attached = server.Bug.attachments(kwargs)['bugs'][bugid]
-
- #process ticket uploads to map attach id to phab file id
- uploads = {}
- for a in attached:
- if a['is_private']:
- print 'oh no private!!!!'
- upload = upload_file(a['file_name'], str(a['data']))
- finfo = phab.file.info(phid=upload.response).response
- a['phid'] = finfo['phid']
- a['name'] = finfo['name']
- a['objectName'] = finfo['objectName']
- uploads[a['id']] = a
-
- log('Attachment count: ' + str(len(uploads.keys())))
#have to do for json
buginfo['last_change_time'] = datetime_to_epoch(buginfo['last_change_time'])
buginfo['creation_time'] = datetime_to_epoch(buginfo['creation_time'])
-
-
-
-
for c in com:
c['creation_time'] = datetime_to_epoch(c['creation_time'])
c['time'] = datetime_to_epoch(c['time'])
+ # set ticket status for priority import
+ status = status_convert(buginfo['status'])
+ if status != 'open':
+ import_priority = 0
+ else:
+ import_priority = 1
+
pmig = phdb()
- insert_values = (bugid, json.dumps(buginfo), json.dumps(com))
- pmig.sql_x("INSERT INTO bugzilla_meta (id, header, comments) VALUES (%s, %s, %s)",
+ insert_values = (bugid, import_priority, json.dumps(buginfo), json.dumps(com))
+ pmig.sql_x("INSERT INTO bugzilla_meta (id, priority, header, comments) VALUES (%s, %s, %s, %s)",
insert_values)
-
- bugid, buginfo, com = pmig.sql_x("SELECT * FROM bugzilla_meta WHERE id = %s",
- (bugid,))
pmig.close()
-
-
- buginfo = json.loads(buginfo)
- com = json.loads(com)
- bugid = int(bugid)
- log(bugid)
- log(buginfo)
-
- #list of projects to add to ticket
- ptags = []
-
- #mask emails for public consumption
- buginfo['cc'] = [c.split('@')[0] for c in buginfo['cc']]
-
- with open('dump', 'w') as d:
- d.write(str(json.dumps(buginfo)))
-
- #XXX: if is patch_to_review add that project
- if buginfo['status'].lower() == 'patch_to_review':
- ptags.append(('patch_to_review', 'tag', 'green'))
-
- if buginfo['status'] == 'verified':
- ptags.append(('verified', 'tag'))
-
- if buginfo['cf_browser'] not in ['---', "Other"]:
- log('Adding browser tag: %s' % (buginfo['cf_browser'],))
- ptags.append((buginfo['cf_browser'], 'tag'))
-
- if buginfo['target_milestone'] != '---':
- log('Creating milestone: %s' % (buginfo['target_milestone'],))
- ptags.append((buginfo['target_milestone'], 'truck'))
-
- #convert bugzilla source to phabricator
- #verified
- buginfo['status'] = status_convert(buginfo['status'])
-
- #XXX: fix
- if buginfo["product"] == 'Security':
- print '!!!!!!do special security stuff here'
- return
-
- component_separator = '-'
- buginfo["product"] = buginfo["product"].replace('-', '_')
- buginfo["product"] = buginfo["product"].replace(' ', '_')
- buginfo["component"] = buginfo["component"].replace('/', '_and_')
- buginfo["component"] = buginfo["component"].replace('-', '_')
- buginfo["component"] = buginfo["component"].replace(' ', '_')
-
- project = "%s%s%s" % (buginfo["product"],
- component_separator,
- buginfo["component"])
-
- buginfo['project'] = project
- log(buginfo['project'])
- ptags.append((buginfo['project'], None))
-
- priority = priority_convert(buginfo['priority'])
- title = buginfo['summary']
-
- def find_attachment(text):
- import re
- a = re.search('Created\sattachment\s(\d+)', text)
- if a:
- return a.group(1)
- else:
- return ''
-
- clean_com = []
- for c in com:
- if not isinstance(c, dict):
- c = ast.literal_eval(c)
- clean_c = {}
- clean_c['author'] = c['author'].split('@')[0]
-
- clean_c['creation_time'] = str(c['creation_time'])
- if c['author'] != c['creator']:
- clean_c['creator'] = c['creator'].split('@')[0]
-
- if c['count'] == 0:
- clean_c['bug_id'] = c['bug_id']
-
- if c['is_private']:
- c['text'] = '_hidden_'
-
- attachment = find_attachment(c['text'])
- if attachment:
- fmt_text = []
- text = c['text'].splitlines()
- for t in text:
- if not t.startswith('Created attachment'):
- fmt_text.append(t)
- c['text'] = '\n'.join(fmt_text)
- clean_c['attachment'] = attachment
-
- clean_c['text'] = c['text']
- clean_com.append(clean_c)
-
- log('project: ' + buginfo['project'])
-
- # strip out comment 0 as description
- description = clean_com[0]
- del clean_com[0]
-
- created = epoch_to_datetime(description['creation_time'])
- desc_block = "**Created**: `%s`\n\n**Author:** `%s`\n\n**Description:**\n%s\n" % (created,
- description['author'],
- description['text'])
- desc_tail = '--------------------------'
- desc_tail += "\n**URL**: %s" % (buginfo['url'].lower() or 'none')
- desc_tail += "\n**Version**: %s" % (buginfo['version'].lower())
- desc_tail += "\n**See Also**: %s" % ('\n'.join(buginfo['see_also']).lower() or 'none')
-
- if 'alias' in buginfo:
- desc_tail += "\n**Alias**: %s" % (buginfo['alias'])
-
- if buginfo["cf_platform"] != "---":
- desc_tail += "\n**Mobile Platform**: %s" % (buginfo["cf_platform"])
-
- if "rep_platform" in buginfo and buginfo['op_sys'] != 'All':
- desc_tail += "\n**Hardware/OS**: %s/%s" % (buginfo["rep_platform"], buginfo['op_sys'])
- else:
- desc_tail += "\n**Hardware/OS**: %s/%s" % ('unknown', 'unknown')
-
- full_description = desc_block + '\n' + desc_tail
-
- keys = buginfo['keywords']
- for k in keys:
- if k in tag_keys:
- ptags.append((k, 'tags'))
-
- phids = []
- for p in ptags:
- phids.append(ensure_project(p[0]))
- if p[1] is not None:
- if len(p) > 2:
- color = p[2]
- else:
- color = 'blue'
- log("Setting project %s icon to %s" % (p[0], p[1]))
- set_project_icon(p[0], icon=p[1], color=color)
-
- log("ptags: " + str(ptags))
- log("phids: " + str(phids))
-
- #buginfo'assigned_to': u'wikibugs-l@lists.wikimedia.org'
- assignee = buginfo['assigned_to']
-
- ccphids = []
- if assignee in mlists:
- ccphids.append(mailinglist_phid(assignee))
-
- log("Ticket Info: %s" % (desc_block,))
- ticket = create(title,
- full_description,
- description['bug_id'],
- priority,
- ccPHIDs=ccphids,
- projects=phids)
-
- comment_block = "**%s** `%s` \n\n %s"
- for c in clean_com:
- log('-------------------------------------')
- created = epoch_to_datetime(c['creation_time'])
- comment_body = "**%s** wrote on `%s`\n\n%s" % (c['author'], created, c['text'])
- if 'attachment' in c:
- cattached = uploads[int(c['attachment'])]
- comment_body += "\n\n**Attached**: {%s}" % (cattached['objectName'])
- comment(ticket['id'], comment_body)
-
- log(str(ticket['id']) + str(buginfo['status']))
-
- if buginfo['status'] != 'open':
- comment(ticket['id'], '//importing issue status//')
- set_status(ticket['id'], buginfo['status'])
+ return True
+
+def run_fetch(bugid, tries=3):
+ if tries == 0:
+ print 'failed to grab %s' % (bugid,)
+ return False
+ try:
+ if fetch(bugid):
+ return True
+ except Exception as e:
+ tries -= 1
+ time.sleep(5)
+ print 'failed to grab %s (%s)' % (bugid, e)
+ return run_fetch(bugid, tries=tries)
if sys.stdin.isatty():
bugs = sys.argv[1:]
else:
bugs = sys.stdin.read().strip('\n').strip().split()
-main('1')
-#for i in bugs:
-# if i.isdigit():
-# main(i)
+print bugs
+bugs = [i for i in bugs if i.isdigit()]
+print bugs
+for i in bugs:
+ if run_fetch(i):
+ print time.time()
+ print 'done with %s' % (i,)
+ else:
+ pmig = phdb()
+ insert_values = (i, 6, '', '')
+ pmig.sql_x("INSERT INTO bugzilla_meta (id, priority, header, comments) VALUES (%s, %s, %s, %s)",
+ insert_values)
+ pmig.close()
+ print 'failed on %s' % (i,)