Add untested reconciliation support.

This commit is contained in:
Paul Makepeace 2011-06-09 12:07:18 +00:00
parent 92504e0b34
commit 30d119f5c8
1 changed files with 83 additions and 4 deletions

View File

@ -187,6 +187,9 @@ def RowsResponseFactory(column_index):
self.index = row_response['i'] self.index = row_response['i']
self.row = [c['v'] if c else None self.row = [c['v'] if c else None
for c in row_response['cells']] for c in row_response['cells']]
# list of reconciliation ids indexing into self.recons
self.recon = [c.get('r', None) if c else None
for c in row_response['cells']]
def __getitem__(self, column): def __getitem__(self, column):
# Trailing nulls seem to be stripped from row data # Trailing nulls seem to be stripped from row data
try: try:
@ -210,8 +213,25 @@ def RowsResponseFactory(column_index):
self.start = response['start'] self.start = response['start']
self.limit = response['limit'] self.limit = response['limit']
self.total = response['total'] self.total = response['total']
# 'pool': {"reconCandidates": {},"recons": {}}
self.pool = response['pool'] self.pool = response['pool']
self.recons = self.pool['recons']
#"1307457513974512303": {
# "id": 1307457513974512303,
# "service": "http://.../reconcile/",
# "identifierSpace": "http://.../ns/authority",
# "schemaSpace": "http://.../ns/type",
# # j for judgment
# "j": "none", # "matched"
# # c for candidates. Indexes into self.recon_candidates
# "c": ["/domain/type/id", ...]
#}
self.recon_candidates = self.pool['reconCandidates']
#"/domain/type/id": {
# "id": "/domain/type/id",
# "name": "...",
# "score": 0.439394,
# "types": ["/domain/type"]
#}
self.rows = self.RefineRows(response['rows']) self.rows = self.RefineRows(response['rows'])
return RowsResponse return RowsResponse
@ -242,6 +262,8 @@ class RefineProject:
self.column_order = {} # map of column names to order in UI self.column_order = {} # map of column names to order in UI
self.rows_response_factory = None # for parsing get_rows() self.rows_response_factory = None # for parsing get_rows()
self.get_models() self.get_models()
# following filled in by get_reconciliation_services
self.recon_services = None
def project_name(self): def project_name(self):
return Refine(self.server).get_project_name(self.project_id) return Refine(self.server).get_project_name(self.project_id)
@ -290,6 +312,12 @@ class RefineProject:
# TODO: implement rest # TODO: implement rest
return response return response
def get_preferences(self, name):
"""Returns the (JSON) value of a given preference."""
response = self.do_json('get-preferences', {'name': name},
include_engine=False)
return response['value']
def wait_until_idle(self, polling_delay=0.5): def wait_until_idle(self, polling_delay=0.5):
while True: while True:
response = self.do_json('get-processes') response = self.do_json('get-processes')
@ -495,13 +523,64 @@ class RefineProject:
def guess_types_of_column(self, column, service): def guess_types_of_column(self, column, service):
"""Query the reconciliation service for what it thinks this column is. """Query the reconciliation service for what it thinks this column is.
service -- reconciliation endpoint URL service: reconciliation endpoint URL
Returns [ Returns [
{"id":"/artfinder/artist","name":"Artist","score":10.2,"count":18}, {"id":"/domain/type","name":"Type Name","score":10.2,"count":18},
... ...
] ]
""" """
response = self.do_json('guess-types-of-column', { response = self.do_json('guess-types-of-column', {
'columnName': column, 'service': service}) 'columnName': column, 'service': service}, include_engine=False)
return response['types'] return response['types']
def get_reconciliation_services(self):
response = self.get_preferences('reconciliation.standardServices')
self.recon_services = response
return response
def get_reconciliation_service_by_name_or_url(self, name):
recon_services = self.get_reconciliation_services()
for recon_service in recon_services:
if recon_service['name'] == name or recon_service['url'] == name:
return recon_service
return None
def reconcile(self, column, service, type=None, config=None):
"""Perform a reconciliation asynchronously.
config: {
"mode": "standard-service",
"service": "http://.../reconcile/",
"identifierSpace": "http://.../ns/authority",
"schemaSpace": "http://.../ns/type",
"type": {
"id": "/domain/type",
"name": "Type Name"
},
"autoMatch": true,
"columnDetails": []
}
Returns typically {'code': 'pending'}; call wait_until_idle() to wait
for reconciliation to complete.
"""
# Create a reconciliation config by looking up recon service info
if config is None:
service = self.get_reconciliation_service_by_name_or_url(service)
if type is None:
raise ValueError('Must have at least one of config or type')
config = {
'mode': 'standard-service',
'service': service['url'],
'identifierSpace': service['identifierSpace'],
'schemaSpace': service['schemaSpace'],
'type': {
'id': type['id'],
'name': type['name'],
},
'autoMatch': True,
'columnDetails': [],
}
return self.do_json('reconcile', {
'columnName': column, 'config': config})