Skip to content

Commit

Permalink
added pandas process (#111)
Browse files Browse the repository at this point in the history
* added pandas process

Co-authored-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com>
  • Loading branch information
cehbrecht and Zeitsperre authored Mar 30, 2021
1 parent efaa0ab commit 38a72ee
Show file tree
Hide file tree
Showing 7 changed files with 453 additions and 3 deletions.
2 changes: 2 additions & 0 deletions emu/processes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from .wps_ncml import NcMLAgg
from .wps_translation import Translation
from .wps_geodata import GeoData
from .wps_pandas import Pandas


processes = [
Expand All @@ -43,4 +44,5 @@
NcMLAgg(),
Translation(),
GeoData(),
Pandas(),
]
50 changes: 50 additions & 0 deletions emu/processes/wps_pandas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import logging

from pywps import FORMATS, Format, ComplexInput, ComplexOutput, Process
from pywps.app.Common import Metadata

LOGGER = logging.getLogger("PYWPS")


class Pandas(Process):
"""
Notes:
Create some statisics with pandas from a CSV file.
"""
def __init__(self):
inputs = [
ComplexInput('csv', 'CSV document',
abstract='A CSV document',
supported_formats=[Format('text/csv', extension='.csv'), FORMATS.TEXT]), ]
outputs = [
ComplexOutput('output', 'Output',
as_reference=True,
supported_formats=[FORMATS.JSON]), ]

super(Pandas, self).__init__(
self._handler,
identifier='pandas',
title='Pandas',
abstract="Create statisics using Pandas",
version='1.0',
metadata=[
Metadata('User Guide', 'http://emu.readthedocs.io/en/latest/')
],
inputs=inputs,
outputs=outputs,
store_supported=True,
status_supported=True)

def _handler(self, request, response):
# optional dependency
import pandas as pd
# start
response.update_status('Pandas Process started.', 0)
# read csv
df = pd.read_csv(request.inputs['csv'][0].stream)
# convert to json
response.outputs['output'].data = df.to_json(orient='records')
# done
response.update_status('Pandas Process completed.', 100)
return response
8 changes: 5 additions & 3 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,17 @@ channels:
dependencies:
- pip
- python>=3.6
- pywps>=4.4
- pywps>=4.4.0
- jinja2
- click
- psutil
- defusedxml
# GeoJSON
- geomet
# pandas
- pandas
# opendap support
- netcdf4
- xarray
# tests
- pytest
- pip:
- geomet
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ defusedxml
geomet
xarray
netCDF4
pandas
1 change: 1 addition & 0 deletions tests/test_wps_caps.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def test_wps_caps():
'ncml',
'non.py-id',
'output_formats',
'pandas',
'poly_centroid',
'show_error',
'simple_dry_run',
Expand Down
49 changes: 49 additions & 0 deletions tests/test_wps_pandas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import json
from pywps import Service
from pywps import get_ElementMakerForVersion

from .common import client_for, resource_file, get_output
from emu.processes.wps_pandas import Pandas

VERSION = "1.0.0"

WPS, OWS = get_ElementMakerForVersion(VERSION)


def test_wps_pandas_embedded():
client = client_for(Service(processes=[Pandas()]))
text = open(resource_file("penguins.csv")).read()
request_doc = WPS.Execute(
OWS.Identifier('pandas'),
WPS.DataInputs(
WPS.Input(
OWS.Identifier('csv'),
WPS.Data(WPS.ComplexData(text, mimeType='text/csv'))
)
),
WPS.ResponseForm(
WPS.RawDataOutput(
OWS.Identifier('output')
)
),
version='1.0.0'
)
resp = client.post_xml(doc=request_doc)
assert resp.status_code == 200
penguins = json.loads(resp.data)
assert penguins[0]['species'] == "Adelie"


def test_wps_pandas_as_ref():
client = client_for(Service(processes=[Pandas()]))
datainputs = "csv=@xlink:href={0}".format(
"https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv")
resp = client.get(
service='wps', request='execute', version='1.0.0',
identifier='pandas',
datainputs=datainputs,
rawdataoutput='output=@mimetype=application/json'
)
assert resp.status_code == 200
penguins = json.loads(resp.data)
assert penguins[0]['species'] == "Adelie"
Loading

0 comments on commit 38a72ee

Please sign in to comment.