prepare distribution on PyPI
moved parser from refine.py to google/refine/__main__.py to allow module execution (python -m google.refine); script refine.py is now just a forwarding to __main__.py
This commit is contained in:
parent
b8675fc894
commit
ece61c1096
|
@ -2,6 +2,6 @@ build
|
||||||
dist
|
dist
|
||||||
*.pyc
|
*.pyc
|
||||||
.*
|
.*
|
||||||
refine_client.egg-info
|
openrefine_client.egg-info
|
||||||
refine.spec
|
refine.spec
|
||||||
README.html
|
README.html
|
||||||
|
|
|
@ -2,3 +2,4 @@ include README.md
|
||||||
include COPYING.txt
|
include COPYING.txt
|
||||||
recursive-include tests/data *.csv
|
recursive-include tests/data *.csv
|
||||||
recursive-include tests *.py
|
recursive-include tests *.py
|
||||||
|
prune docker
|
||||||
|
|
10
README.md
10
README.md
|
@ -45,11 +45,13 @@ The environment variables `OPENREFINE_HOST` and `OPENREFINE_PORT` enable overrid
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
Install dependencies, which currently is `urllib2_file`:
|
```
|
||||||
|
pip install openrefine-client
|
||||||
|
```
|
||||||
|
|
||||||
```
|
(requires Python 2.x, depends on urllib2_file>=0.2.1)
|
||||||
sudo pip install -r requirements.txt
|
|
||||||
```
|
## Tests
|
||||||
|
|
||||||
Ensure you have a Refine server running somewhere and, if necessary, set the environment vars as above.
|
Ensure you have a Refine server running somewhere and, if necessary, set the environment vars as above.
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,218 @@
|
||||||
|
#! /usr/bin/env python
|
||||||
|
"""
|
||||||
|
Script to provide a command line interface to a OpenRefine server.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Copyright (c) 2011 Paul Makepeace, Real Programmers. All rights reserved.
|
||||||
|
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||||
|
|
||||||
|
|
||||||
|
import optparse
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from google.refine import refine
|
||||||
|
from google.refine import client
|
||||||
|
|
||||||
|
reload(sys)
|
||||||
|
sys.setdefaultencoding('utf-8')
|
||||||
|
|
||||||
|
class myParser(optparse.OptionParser):
|
||||||
|
|
||||||
|
def format_epilog(self, formatter):
|
||||||
|
return self.epilog
|
||||||
|
|
||||||
|
PARSER = \
|
||||||
|
myParser(description='Script to provide a command line interface to an OpenRefine server.',
|
||||||
|
usage='usage: %prog [--help | OPTIONS]',
|
||||||
|
epilog="""
|
||||||
|
Examples:
|
||||||
|
--list # show list of projects (id: name)
|
||||||
|
--list -H 127.0.0.1 -P 80 # specify hostname and port
|
||||||
|
--info 2161595260364 # show metadata of project
|
||||||
|
--info "christmas gifts"
|
||||||
|
--create example.csv # create new project from file example.csv
|
||||||
|
--create example.tsv --encoding=UTF-8
|
||||||
|
--create example.xml --recordPath=collection --recordPath=record
|
||||||
|
--create example.json --recordPath=_ --recordPath=_
|
||||||
|
--create example.xlsx --sheets=0
|
||||||
|
--create example.ods --sheets=0
|
||||||
|
--apply trim.json 2161595260364 # apply rules in trim.json to project 1234...
|
||||||
|
--apply trim.json "christmas gifts"
|
||||||
|
--export 2161595260364 > project.tsv # export project 2161595260364 in tsv format
|
||||||
|
--export "christmas gifts" > project.tsv
|
||||||
|
--export --output=project.xlsx 2161595260364 # export project in xlsx format
|
||||||
|
--export --output=project.xlsx "christmas gifts"
|
||||||
|
--export "My Address Book" --template='{ "friend" : {{jsonize(cells["friend"].value)}}, "address" : {{jsonize(cells["address"].value)}} }' --prefix='{ "rows" : [' --rowSeparator ',' --suffix '] }' --filterQuery="^mary$"
|
||||||
|
--delete 2161595260364 # delete project
|
||||||
|
--delete "christmas gifts"
|
||||||
|
""")
|
||||||
|
|
||||||
|
group1 = optparse.OptionGroup(PARSER, 'Connection options')
|
||||||
|
group1.add_option('-H', '--host', dest='host', metavar='127.0.0.1',
|
||||||
|
help='OpenRefine hostname (default: 127.0.0.1)')
|
||||||
|
group1.add_option('-P', '--port', dest='port', metavar='3333',
|
||||||
|
help='OpenRefine port (default: 3333)')
|
||||||
|
PARSER.add_option_group(group1)
|
||||||
|
|
||||||
|
group2 = optparse.OptionGroup(PARSER, 'Commands')
|
||||||
|
group2.add_option('-c', '--create', dest='create', metavar='[FILE]',
|
||||||
|
help='Create project from file. The filename ending (e.g. .csv) defines the input format (csv,tsv,xml,json,txt,xls,xlsx,ods)')
|
||||||
|
group2.add_option('-l', '--list', dest='list', action='store_true',
|
||||||
|
help='List projects')
|
||||||
|
PARSER.add_option_group(group2)
|
||||||
|
|
||||||
|
group3 = optparse.OptionGroup(PARSER, 'Commands with argument [PROJECTID/PROJECTNAME]')
|
||||||
|
group3.add_option('-d', '--delete', dest='delete', action='store_true',
|
||||||
|
help='Delete project')
|
||||||
|
group3.add_option('-f', '--apply', dest='apply', metavar='[FILE]',
|
||||||
|
help='Apply JSON rules to OpenRefine project')
|
||||||
|
group3.add_option('-E', '--export', dest='export', action='store_true',
|
||||||
|
help='Export project in tsv format to stdout.')
|
||||||
|
group3.add_option('-o', '--output', dest='output', metavar='[FILE]',
|
||||||
|
help='Export project to file. The filename ending (e.g. .tsv) defines the output format (csv,tsv,xls,xlsx,html)')
|
||||||
|
group3.add_option('--info', dest='info', action='store_true',
|
||||||
|
help='show project metadata')
|
||||||
|
PARSER.add_option_group(group3)
|
||||||
|
|
||||||
|
group4 = optparse.OptionGroup(PARSER, 'Create options')
|
||||||
|
group4.add_option('--columnWidths', dest='columnWidths',
|
||||||
|
help='(txt/fixed-width) please provide widths separated by comma (e.g. 7,5)')
|
||||||
|
group4.add_option('--encoding', dest='encoding',
|
||||||
|
help='(csv,tsv,txt), please provide short encoding name (e.g. UTF-8)')
|
||||||
|
group4.add_option('--guessCellValueTypes', dest='guessCellValueTypes', metavar='true/false', choices=('true', 'false'),
|
||||||
|
help='(xml,csv,tsv,txt,json, default: false)')
|
||||||
|
group4.add_option('--headerLines', dest='headerLines', type="int",
|
||||||
|
help='(csv,tsv,txt/fixed-width,xls,xlsx,ods), default: 1, default txt/fixed-width: 0')
|
||||||
|
group4.add_option('--ignoreLines', dest='ignoreLines', type="int",
|
||||||
|
help='(csv,tsv,txt,xls,xlsx,ods), default: -1')
|
||||||
|
group4.add_option('--includeFileSources', dest='includeFileSources', metavar='true/false', choices=('true', 'false'),
|
||||||
|
help='(all formats), default: false')
|
||||||
|
group4.add_option('--limit', dest='limit', type="int",
|
||||||
|
help='(all formats), default: -1')
|
||||||
|
group4.add_option('--linesPerRow', dest='linesPerRow', type="int",
|
||||||
|
help='(txt/line-based), default: 1')
|
||||||
|
group4.add_option('--processQuotes', dest='processQuotes', metavar='true/false', choices=('true', 'false'),
|
||||||
|
help='(csv,tsv), default: true')
|
||||||
|
group4.add_option('--projectName', dest='project_name',
|
||||||
|
help='(all formats), default: filename')
|
||||||
|
group4.add_option('--recordPath', dest='recordPath', action='append',
|
||||||
|
help='(xml,json), please provide path in multiple arguments without slashes, e.g. /collection/record/ should be entered like this: --recordPath=collection --recordPath=record, default xml: record, default json: _ _')
|
||||||
|
group4.add_option('--separator', dest='separator',
|
||||||
|
help='(csv,tsv), default csv: , default tsv: \\t')
|
||||||
|
group4.add_option('--sheets', dest='sheets', action='append', type="int",
|
||||||
|
help='(xls,xlsx,ods), please provide sheets in multiple arguments, e.g. --sheets=0 --sheets=1, default: 0 (first sheet)')
|
||||||
|
group4.add_option('--skipDataLines', dest='skipDataLines', type="int",
|
||||||
|
help='(csv,tsv,txt,xls,xlsx,ods), default: 0, default line-based: -1')
|
||||||
|
group4.add_option('--storeBlankRows', dest='storeBlankRows', metavar='true/false', choices=('true', 'false'),
|
||||||
|
help='(csv,tsv,txt,xls,xlsx,ods), default: true')
|
||||||
|
group4.add_option('--storeBlankCellsAsNulls', dest='storeBlankCellsAsNulls', metavar='true/false', choices=('true', 'false'),
|
||||||
|
help='(csv,tsv,txt,xls,xlsx,ods), default: true')
|
||||||
|
group4.add_option('--storeEmptyStrings', dest='storeEmptyStrings', metavar='true/false', choices=('true', 'false'),
|
||||||
|
help='(xml,json), default: true')
|
||||||
|
group4.add_option('--trimStrings', dest='trimStrings', metavar='true/false', choices=('true', 'false'),
|
||||||
|
help='(xml,json), default: false')
|
||||||
|
PARSER.add_option_group(group4)
|
||||||
|
|
||||||
|
group5 = optparse.OptionGroup(PARSER, 'Legacy options')
|
||||||
|
group5.add_option('--format', dest='input_format',
|
||||||
|
help='Specify input format (csv,tsv,xml,json,line-based,fixed-width,xls,xlsx,ods)')
|
||||||
|
PARSER.add_option_group(group5)
|
||||||
|
|
||||||
|
group6= optparse.OptionGroup(PARSER, 'Templating export options')
|
||||||
|
group6.add_option('--template', dest='template',
|
||||||
|
help='mandatory; (big) text string that you enter in the *row template* textfield in the export/templating menu in the browser app)')
|
||||||
|
group6.add_option('--mode', dest='mode', metavar='row-based/record-based', choices=('row-based', 'record-based'),
|
||||||
|
help='engine mode (default: row-based)')
|
||||||
|
group6.add_option('--prefix', dest='prefix',
|
||||||
|
help='text string that you enter in the *prefix* textfield in the browser app')
|
||||||
|
group6.add_option('--rowSeparator', dest='rowSeparator',
|
||||||
|
help='text string that you enter in the *row separator* textfield in the browser app')
|
||||||
|
group6.add_option('--suffix', dest='suffix',
|
||||||
|
help='text string that you enter in the *suffix* textfield in the browser app')
|
||||||
|
group6.add_option('--filterQuery', dest='filterQuery', metavar='REGEX',
|
||||||
|
help='Simple RegEx text filter on filterColumn, e.g. ^12015$'),
|
||||||
|
group6.add_option('--filterColumn', dest='filterColumn', metavar='COLUMNNAME',
|
||||||
|
help='column name for filterQuery (default: name of first column)')
|
||||||
|
group6.add_option('--facets', dest='facets',
|
||||||
|
help='facets config in json format (may be extracted with browser dev tools in browser app)')
|
||||||
|
group6.add_option('--splitToFiles', dest='splitToFiles', metavar='true/false', choices=('true', 'false'),
|
||||||
|
help='will split each row/record into a single file; it specifies a presumably unique character series for splitting; --prefix and --suffix will be applied to all files; filename-prefix can be specified with --output (default: %Y%m%d)')
|
||||||
|
group6.add_option('--suffixById', dest='suffixById', metavar='true/false', choices=('true', 'false'),
|
||||||
|
help='enhancement option for --splitToFiles; will generate filename-suffix from values in key column')
|
||||||
|
PARSER.add_option_group(group6)
|
||||||
|
|
||||||
|
#noinspection PyPep8Naming
|
||||||
|
def main():
|
||||||
|
"""Command line interface."""
|
||||||
|
|
||||||
|
# get environment variables in docker network
|
||||||
|
docker_host = os.environ.get('OPENREFINE_SERVER_PORT_3333_TCP_ADDR')
|
||||||
|
if docker_host:
|
||||||
|
os.environ["OPENREFINE_HOST"] = docker_host
|
||||||
|
refine.REFINE_HOST = docker_host
|
||||||
|
docker_port = os.environ.get('OPENREFINE_SERVER_PORT_3333_TCP_PORT')
|
||||||
|
if docker_port:
|
||||||
|
os.environ["OPENREFINE_PORT"] = docker_port
|
||||||
|
refine.REFINE_PORT = docker_port
|
||||||
|
|
||||||
|
options, args = PARSER.parse_args()
|
||||||
|
commands_dict = { group2_arg.dest : getattr(options, group2_arg.dest) for group2_arg in group2.option_list }
|
||||||
|
commands_dict.update({ group3_arg.dest : getattr(options, group3_arg.dest) for group3_arg in group3.option_list })
|
||||||
|
commands_dict = { k: v for k, v in commands_dict.items() if v != None }
|
||||||
|
if not commands_dict:
|
||||||
|
PARSER.print_usage()
|
||||||
|
return
|
||||||
|
if options.host:
|
||||||
|
refine.REFINE_HOST = options.host
|
||||||
|
if options.port:
|
||||||
|
refine.REFINE_PORT = options.port
|
||||||
|
if args and not str.isdigit(args[0]):
|
||||||
|
projects = refine.Refine(refine.RefineServer()).list_projects().items()
|
||||||
|
idlist = []
|
||||||
|
for project_id, project_info in projects:
|
||||||
|
if args[0] == project_info['name']:
|
||||||
|
idlist.append(str(project_id))
|
||||||
|
if len(idlist) > 1:
|
||||||
|
raise Exception('Found at least two projects. Please specify project by id.')
|
||||||
|
else:
|
||||||
|
args[0] = idlist[0]
|
||||||
|
|
||||||
|
if options.list:
|
||||||
|
client.list_projects()
|
||||||
|
if options.create:
|
||||||
|
client.create_project(options)
|
||||||
|
if options.delete:
|
||||||
|
project = refine.RefineProject(args[0])
|
||||||
|
project.delete()
|
||||||
|
if options.apply:
|
||||||
|
project = refine.RefineProject(args[0])
|
||||||
|
response = project.apply_operations(options.apply)
|
||||||
|
if response != 'ok':
|
||||||
|
print >> sys.stderr, 'Failed to apply %s: %s' \
|
||||||
|
% (options.apply, response)
|
||||||
|
return project
|
||||||
|
if options.export or options.output:
|
||||||
|
project = refine.RefineProject(args[0])
|
||||||
|
client.export_project(project, options)
|
||||||
|
return project
|
||||||
|
if options.info:
|
||||||
|
client.info(args[0])
|
||||||
|
project = refine.RefineProject(args[0])
|
||||||
|
return project
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# execute only if run as a script
|
||||||
|
main()
|
|
@ -19,200 +19,8 @@ Script to provide a command line interface to a OpenRefine server.
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>
|
# along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||||
|
|
||||||
|
|
||||||
import optparse
|
from google.refine import __main__
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
from google.refine import refine
|
|
||||||
from google.refine import client
|
|
||||||
|
|
||||||
reload(sys)
|
|
||||||
sys.setdefaultencoding('utf-8')
|
|
||||||
|
|
||||||
class myParser(optparse.OptionParser):
|
|
||||||
|
|
||||||
def format_epilog(self, formatter):
|
|
||||||
return self.epilog
|
|
||||||
|
|
||||||
PARSER = \
|
|
||||||
myParser(description='Script to provide a command line interface to an OpenRefine server.',
|
|
||||||
usage='usage: %prog [--help | OPTIONS]',
|
|
||||||
epilog="""
|
|
||||||
Examples:
|
|
||||||
--list # show list of projects (id: name)
|
|
||||||
--list -H 127.0.0.1 -P 80 # specify hostname and port
|
|
||||||
--info 2161595260364 # show metadata of project
|
|
||||||
--info "christmas gifts"
|
|
||||||
--create example.csv # create new project from file example.csv
|
|
||||||
--create example.tsv --encoding=UTF-8
|
|
||||||
--create example.xml --recordPath=collection --recordPath=record
|
|
||||||
--create example.json --recordPath=_ --recordPath=_
|
|
||||||
--create example.xlsx --sheets=0
|
|
||||||
--create example.ods --sheets=0
|
|
||||||
--apply trim.json 2161595260364 # apply rules in trim.json to project 1234...
|
|
||||||
--apply trim.json "christmas gifts"
|
|
||||||
--export 2161595260364 > project.tsv # export project 2161595260364 in tsv format
|
|
||||||
--export "christmas gifts" > project.tsv
|
|
||||||
--export --output=project.xlsx 2161595260364 # export project in xlsx format
|
|
||||||
--export --output=project.xlsx "christmas gifts"
|
|
||||||
--export "My Address Book" --template='{ "friend" : {{jsonize(cells["friend"].value)}}, "address" : {{jsonize(cells["address"].value)}} }' --prefix='{ "rows" : [' --rowSeparator ',' --suffix '] }' --filterQuery="^mary$"
|
|
||||||
--delete 2161595260364 # delete project
|
|
||||||
--delete "christmas gifts"
|
|
||||||
""")
|
|
||||||
|
|
||||||
group1 = optparse.OptionGroup(PARSER, 'Connection options')
|
|
||||||
group1.add_option('-H', '--host', dest='host', metavar='127.0.0.1',
|
|
||||||
help='OpenRefine hostname (default: 127.0.0.1)')
|
|
||||||
group1.add_option('-P', '--port', dest='port', metavar='3333',
|
|
||||||
help='OpenRefine port (default: 3333)')
|
|
||||||
PARSER.add_option_group(group1)
|
|
||||||
|
|
||||||
group2 = optparse.OptionGroup(PARSER, 'Commands')
|
|
||||||
group2.add_option('-c', '--create', dest='create', metavar='[FILE]',
|
|
||||||
help='Create project from file. The filename ending (e.g. .csv) defines the input format (csv,tsv,xml,json,txt,xls,xlsx,ods)')
|
|
||||||
group2.add_option('-l', '--list', dest='list', action='store_true',
|
|
||||||
help='List projects')
|
|
||||||
PARSER.add_option_group(group2)
|
|
||||||
|
|
||||||
group3 = optparse.OptionGroup(PARSER, 'Commands with argument [PROJECTID/PROJECTNAME]')
|
|
||||||
group3.add_option('-d', '--delete', dest='delete', action='store_true',
|
|
||||||
help='Delete project')
|
|
||||||
group3.add_option('-f', '--apply', dest='apply', metavar='[FILE]',
|
|
||||||
help='Apply JSON rules to OpenRefine project')
|
|
||||||
group3.add_option('-E', '--export', dest='export', action='store_true',
|
|
||||||
help='Export project in tsv format to stdout.')
|
|
||||||
group3.add_option('-o', '--output', dest='output', metavar='[FILE]',
|
|
||||||
help='Export project to file. The filename ending (e.g. .tsv) defines the output format (csv,tsv,xls,xlsx,html)')
|
|
||||||
group3.add_option('--info', dest='info', action='store_true',
|
|
||||||
help='show project metadata')
|
|
||||||
PARSER.add_option_group(group3)
|
|
||||||
|
|
||||||
group4 = optparse.OptionGroup(PARSER, 'Create options')
|
|
||||||
group4.add_option('--columnWidths', dest='columnWidths',
|
|
||||||
help='(txt/fixed-width) please provide widths separated by comma (e.g. 7,5)')
|
|
||||||
group4.add_option('--encoding', dest='encoding',
|
|
||||||
help='(csv,tsv,txt), please provide short encoding name (e.g. UTF-8)')
|
|
||||||
group4.add_option('--guessCellValueTypes', dest='guessCellValueTypes', metavar='true/false', choices=('true', 'false'),
|
|
||||||
help='(xml,csv,tsv,txt,json, default: false)')
|
|
||||||
group4.add_option('--headerLines', dest='headerLines', type="int",
|
|
||||||
help='(csv,tsv,txt/fixed-width,xls,xlsx,ods), default: 1, default txt/fixed-width: 0')
|
|
||||||
group4.add_option('--ignoreLines', dest='ignoreLines', type="int",
|
|
||||||
help='(csv,tsv,txt,xls,xlsx,ods), default: -1')
|
|
||||||
group4.add_option('--includeFileSources', dest='includeFileSources', metavar='true/false', choices=('true', 'false'),
|
|
||||||
help='(all formats), default: false')
|
|
||||||
group4.add_option('--limit', dest='limit', type="int",
|
|
||||||
help='(all formats), default: -1')
|
|
||||||
group4.add_option('--linesPerRow', dest='linesPerRow', type="int",
|
|
||||||
help='(txt/line-based), default: 1')
|
|
||||||
group4.add_option('--processQuotes', dest='processQuotes', metavar='true/false', choices=('true', 'false'),
|
|
||||||
help='(csv,tsv), default: true')
|
|
||||||
group4.add_option('--projectName', dest='project_name',
|
|
||||||
help='(all formats), default: filename')
|
|
||||||
group4.add_option('--recordPath', dest='recordPath', action='append',
|
|
||||||
help='(xml,json), please provide path in multiple arguments without slashes, e.g. /collection/record/ should be entered like this: --recordPath=collection --recordPath=record, default xml: record, default json: _ _')
|
|
||||||
group4.add_option('--separator', dest='separator',
|
|
||||||
help='(csv,tsv), default csv: , default tsv: \\t')
|
|
||||||
group4.add_option('--sheets', dest='sheets', action='append', type="int",
|
|
||||||
help='(xls,xlsx,ods), please provide sheets in multiple arguments, e.g. --sheets=0 --sheets=1, default: 0 (first sheet)')
|
|
||||||
group4.add_option('--skipDataLines', dest='skipDataLines', type="int",
|
|
||||||
help='(csv,tsv,txt,xls,xlsx,ods), default: 0, default line-based: -1')
|
|
||||||
group4.add_option('--storeBlankRows', dest='storeBlankRows', metavar='true/false', choices=('true', 'false'),
|
|
||||||
help='(csv,tsv,txt,xls,xlsx,ods), default: true')
|
|
||||||
group4.add_option('--storeBlankCellsAsNulls', dest='storeBlankCellsAsNulls', metavar='true/false', choices=('true', 'false'),
|
|
||||||
help='(csv,tsv,txt,xls,xlsx,ods), default: true')
|
|
||||||
group4.add_option('--storeEmptyStrings', dest='storeEmptyStrings', metavar='true/false', choices=('true', 'false'),
|
|
||||||
help='(xml,json), default: true')
|
|
||||||
group4.add_option('--trimStrings', dest='trimStrings', metavar='true/false', choices=('true', 'false'),
|
|
||||||
help='(xml,json), default: false')
|
|
||||||
PARSER.add_option_group(group4)
|
|
||||||
|
|
||||||
group5 = optparse.OptionGroup(PARSER, 'Legacy options')
|
|
||||||
group5.add_option('--format', dest='input_format',
|
|
||||||
help='Specify input format (csv,tsv,xml,json,line-based,fixed-width,xls,xlsx,ods)')
|
|
||||||
PARSER.add_option_group(group5)
|
|
||||||
|
|
||||||
group6= optparse.OptionGroup(PARSER, 'Templating export options')
|
|
||||||
group6.add_option('--template', dest='template',
|
|
||||||
help='mandatory; (big) text string that you enter in the *row template* textfield in the export/templating menu in the browser app)')
|
|
||||||
group6.add_option('--mode', dest='mode', metavar='row-based/record-based', choices=('row-based', 'record-based'),
|
|
||||||
help='engine mode (default: row-based)')
|
|
||||||
group6.add_option('--prefix', dest='prefix',
|
|
||||||
help='text string that you enter in the *prefix* textfield in the browser app')
|
|
||||||
group6.add_option('--rowSeparator', dest='rowSeparator',
|
|
||||||
help='text string that you enter in the *row separator* textfield in the browser app')
|
|
||||||
group6.add_option('--suffix', dest='suffix',
|
|
||||||
help='text string that you enter in the *suffix* textfield in the browser app')
|
|
||||||
group6.add_option('--filterQuery', dest='filterQuery', metavar='REGEX',
|
|
||||||
help='Simple RegEx text filter on filterColumn, e.g. ^12015$'),
|
|
||||||
group6.add_option('--filterColumn', dest='filterColumn', metavar='COLUMNNAME',
|
|
||||||
help='column name for filterQuery (default: name of first column)')
|
|
||||||
group6.add_option('--facets', dest='facets',
|
|
||||||
help='facets config in json format (may be extracted with browser dev tools in browser app)')
|
|
||||||
group6.add_option('--splitToFiles', dest='splitToFiles', metavar='true/false', choices=('true', 'false'),
|
|
||||||
help='will split each row/record into a single file; it specifies a presumably unique character series for splitting; --prefix and --suffix will be applied to all files; filename-prefix can be specified with --output (default: %Y%m%d)')
|
|
||||||
group6.add_option('--suffixById', dest='suffixById', metavar='true/false', choices=('true', 'false'),
|
|
||||||
help='enhancement option for --splitToFiles; will generate filename-suffix from values in key column')
|
|
||||||
PARSER.add_option_group(group6)
|
|
||||||
|
|
||||||
#noinspection PyPep8Naming
|
|
||||||
def main():
|
|
||||||
"""Command line interface."""
|
|
||||||
|
|
||||||
# get environment variables in docker network
|
|
||||||
docker_host = os.environ.get('OPENREFINE_SERVER_PORT_3333_TCP_ADDR')
|
|
||||||
if docker_host:
|
|
||||||
os.environ["OPENREFINE_HOST"] = docker_host
|
|
||||||
refine.REFINE_HOST = docker_host
|
|
||||||
docker_port = os.environ.get('OPENREFINE_SERVER_PORT_3333_TCP_PORT')
|
|
||||||
if docker_port:
|
|
||||||
os.environ["OPENREFINE_PORT"] = docker_port
|
|
||||||
refine.REFINE_PORT = docker_port
|
|
||||||
|
|
||||||
options, args = PARSER.parse_args()
|
|
||||||
commands_dict = { group2_arg.dest : getattr(options, group2_arg.dest) for group2_arg in group2.option_list }
|
|
||||||
commands_dict.update({ group3_arg.dest : getattr(options, group3_arg.dest) for group3_arg in group3.option_list })
|
|
||||||
commands_dict = { k: v for k, v in commands_dict.items() if v != None }
|
|
||||||
if not commands_dict:
|
|
||||||
PARSER.print_usage()
|
|
||||||
return
|
|
||||||
if options.host:
|
|
||||||
refine.REFINE_HOST = options.host
|
|
||||||
if options.port:
|
|
||||||
refine.REFINE_PORT = options.port
|
|
||||||
if args and not str.isdigit(args[0]):
|
|
||||||
projects = refine.Refine(refine.RefineServer()).list_projects().items()
|
|
||||||
idlist = []
|
|
||||||
for project_id, project_info in projects:
|
|
||||||
if args[0] == project_info['name']:
|
|
||||||
idlist.append(str(project_id))
|
|
||||||
if len(idlist) > 1:
|
|
||||||
raise Exception('Found at least two projects. Please specify project by id.')
|
|
||||||
else:
|
|
||||||
args[0] = idlist[0]
|
|
||||||
|
|
||||||
if options.list:
|
|
||||||
client.list_projects()
|
|
||||||
if options.create:
|
|
||||||
client.create_project(options)
|
|
||||||
if options.delete:
|
|
||||||
project = refine.RefineProject(args[0])
|
|
||||||
project.delete()
|
|
||||||
if options.apply:
|
|
||||||
project = refine.RefineProject(args[0])
|
|
||||||
response = project.apply_operations(options.apply)
|
|
||||||
if response != 'ok':
|
|
||||||
print >> sys.stderr, 'Failed to apply %s: %s' \
|
|
||||||
% (options.apply, response)
|
|
||||||
return project
|
|
||||||
if options.export or options.output:
|
|
||||||
project = refine.RefineProject(args[0])
|
|
||||||
client.export_project(project, options)
|
|
||||||
return project
|
|
||||||
if options.info:
|
|
||||||
client.info(args[0])
|
|
||||||
project = refine.RefineProject(args[0])
|
|
||||||
return project
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# return project so that it's available interactively, python -i refine.py
|
# return project so that it's available interactively, python -i refine.py
|
||||||
refine_project = main()
|
refine_project = __main__.main()
|
||||||
|
|
15
setup.py
15
setup.py
|
@ -25,23 +25,34 @@ def read(filename):
|
||||||
return open(os.path.join(os.path.dirname(__file__), filename)).read()
|
return open(os.path.join(os.path.dirname(__file__), filename)).read()
|
||||||
|
|
||||||
setup(name='openrefine-client',
|
setup(name='openrefine-client',
|
||||||
version='0.3.4',
|
version='0.3.6',
|
||||||
description=('The OpenRefine Python Client Library provides an '
|
description=('The OpenRefine Python Client Library provides an '
|
||||||
'interface to communicating with an OpenRefine server. '
|
'interface to communicating with an OpenRefine server. '
|
||||||
'This fork extends the command line interface (CLI).'),
|
'This fork extends the command line interface (CLI).'),
|
||||||
long_description=read('README.md'),
|
long_description=read('README.md'),
|
||||||
|
long_description_content_type='text/markdown',
|
||||||
author='Felix Lohmeier',
|
author='Felix Lohmeier',
|
||||||
author_email='felix.lohmeier@opencultureconsulting.com',
|
author_email='felix.lohmeier@opencultureconsulting.com',
|
||||||
url='https://github.com/opencultureconsulting/openrefine-client',
|
url='https://github.com/opencultureconsulting/openrefine-client',
|
||||||
packages=find_packages(exclude=['tests']),
|
packages=find_packages(exclude=['tests']),
|
||||||
install_requires=['urllib2_file'],
|
install_requires=['urllib2_file'],
|
||||||
|
python_requires='>2.6, !=3.*',
|
||||||
|
entry_points={
|
||||||
|
'console_scripts': [ 'openrefine-client = google.refine.__main__:main' ]
|
||||||
|
},
|
||||||
platforms=['Any'],
|
platforms=['Any'],
|
||||||
|
keywords='openrefine client batch processing docker etl code4lib',
|
||||||
classifiers = [
|
classifiers = [
|
||||||
'Development Status :: 3 - Alpha',
|
'Development Status :: 4 - Beta',
|
||||||
|
'Environment :: Console (Text Based)',
|
||||||
'Intended Audience :: Developers',
|
'Intended Audience :: Developers',
|
||||||
|
'Intended Audience :: System Administrators',
|
||||||
'License :: OSI Approved :: GNU General Public License (GPL)',
|
'License :: OSI Approved :: GNU General Public License (GPL)',
|
||||||
|
'License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)',
|
||||||
'Operating System :: OS Independent',
|
'Operating System :: OS Independent',
|
||||||
'Programming Language :: Python',
|
'Programming Language :: Python',
|
||||||
|
'Programming Language :: Python :: 2',
|
||||||
|
'Programming Language :: Python :: 2.7',
|
||||||
'Topic :: Software Development :: Libraries :: Python Modules',
|
'Topic :: Software Development :: Libraries :: Python Modules',
|
||||||
'Topic :: Text Processing',
|
'Topic :: Text Processing',
|
||||||
],
|
],
|
||||||
|
|
Loading…
Reference in New Issue