0

I have the below code from which i get the data from zendesk, The thing is that whenever i run this script i takes last 30days data.Can someone let me know what all changes i have to do to make it incremental. Ideally this script should run twice or thrice a day(With the current script on each execution its getting last 30 days data which is unnecessary).

from zenpy import Zenpy
import time,datetime
import json
import psycopg2

# Connecting DB..
DSN = "dbname='postgres' user='postgres' host='localhost' password='postgres' port='5432'"
conn = psycopg2.connect(DSN)
conn.set_client_encoding('utf-8')
cur = conn.cursor()
ins_cur = conn.cursor()


script = 'DROP TABLE IF EXISTS ticket_events; CREATE TABLE ticket_events ( ID serial NOT NULL ' \
         'PRIMARY KEY, info json NOT NULL); '
cur.execute(script)
conn.commit()
print('Table dropped and recreated')

# Zenpy accepts an API token
creds = {
    'email': '[email protected]',
    'token': '*************',
    'subdomain': 'xxxxxx'
}
rday = datetime.datetime.now() - datetime.timedelta(days=30)

# Default connect
zenpy_client = Zenpy(**creds)
print('Zendesk connected via zenpy')
requests = zenpy_client.tickets.events(start_time=rday,include=None)

# loop the tickets and insert to dwh
for request in requests:
    req_json = json.dumps(request.to_dict(), sort_keys=False)
    print(req_json)
    insert_query = '''INSERT INTO ticket_events(info) VALUES ( $$ ''' + req_json + ''' $$ )'''
    cur.execute(insert_query)
    conn.commit()

conn.close()

Below is the table structure i have defined to take values, But i feel it is not so good.I want this table to be updated incrementally with the data and delete any redundant data it have(if Any).Any suggestions on this please.

drop table if exists zendesk_ticket_events;
create table  zendesk_ticket_events as
   SELECT  
    CAST (info ->> 'id' as   BIGINT) as parent_id,
    CAST (info ->> 'ticket_id' as   BIGINT) as ticket_id,
    CAST (info ->> 'updater_id' as  BIGINT) as updater_id,
    CAST (info ->> 'via' as   VARCHAR (50)) as via,
    CAST (info ->> 'event_type' as VARCHAR (50)) as parent_event_type,
    CAST (info ->> 'created_at' as timestamp without time zone) as created_at,
    CAST(enrolment_info->>'via_reference_id'as TEXT)  AS via_reference_id,
    CAST(enrolment_info->>'id'as TEXT)  AS child_id,
    CAST(enrolment_info->>'assignee_id' as BIGINT)  AS assignee_id,
    CAST(enrolment_info->>'subject' as   VARCHAR (50)) AS subject,
    CAST(enrolment_info->>'requester_id'as TEXT)  AS requester_id,
    CAST(enrolment_info->>'status' as   VARCHAR (50)) AS status,
    CAST(enrolment_info->>'priority' as   VARCHAR (50)) AS priority,
    CAST(enrolment_info->>'comment_public' as   VARCHAR (50)) AS comment_public,
    CAST(enrolment_info->>'comment_present' as   VARCHAR (50)) AS comment_present,
    CAST(enrolment_info->>'event_type' as   VARCHAR (50)) AS child_event_type,
    CAST(enrolment_info->>'previous_value'as TEXT)  AS previous_value,
    CAST(enrolment_info->>'group_id'as TEXT)  AS group_id
    FROM ticket_events t, json_array_elements(t.info -> 'child_events') AS enrolment_info;

Below is the sample data.Can anyone cross verify the below data and let me know whether the table structure above is fine or not?

    {
  "child_events": [
    {
      "id": 54334560,
      "via": "Mail",
      "via_reference_id": null,
      "comment_present": true,
      "comment_public": true,
      "event_type": "Comment"
    },
    {
      "id": 54334580,
      "via": "Mail",
      "via_reference_id": null,
      "subject": "Order 10056 on 20.03.20",
      "event_type": "Create"
    },
    {
      "id": 54334600,
      "via": "Mail",
      "via_reference_id": null,
      "requester_id": 369854,
      "event_type": "Create"
    },
    {
      "id": 54334620,
      "via": "Mail",
      "via_reference_id": null,
      "locale_id": "8",
      "event_type": "Create"
    },
    {
      "id": 543342310640,
      "via": "Mail",
      "via_reference_id": null,
      "status": "new",
      "event_type": "Create"
    },
    {
      "id": 54334660,
      "via": "Mail",
      "via_reference_id": null,
      "priority": null,
      "event_type": "Create"
    },
    {
      "id": 54334700,
      "via": "Mail",
      "via_reference_id": null,
      "type": null,
      "event_type": "Create"
    },
    {
      "id": 54334740,
      "via": "Mail",
      "via_reference_id": null,
      "tags": [
        "bestellung"
      ],
      "added_tags": [
        "Orders"
      ],
      "removed_tags": [

      ],
      "event_type": "Create"
    },
    {
      "id": 54334860,
      "via": "Rule",
      "via_reference_id": 44967,
      "group_id": 2117,
      "rel": "trigger",
      "revision_id": 1,
      "event_type": "Change",
      "previous_value": null
    }
  ],
  "id": 54334540,
  "ticket_id": 159978,
  "updater_id": 369854,
  "via": "Mail",
  "created_at": "2020-03-29T18:41:22Z",
  "event_type": "Audit",
  "timestamp": 1585507282,
  "system": {
    "client": "Microsoft Outlook 14.0",
    "ip_address": null,
    "latitude": 48.3074,
    "location": "Linz, 4, Austria",
    "longitude": 14.285
  }
}
2
  • how are you currently executing this ? is it a single .py file ?? and you're manually running it ?? and what OS is it on ?? Commented May 25, 2020 at 13:55
  • @SowjanyaRBhat Yes, it is a single .py script. This script is a test version, i have scheduled this script via JENKINS and it is testing in MAC & Live version is in Linux. Commented May 25, 2020 at 14:17

1 Answer 1

1

From where you are creating table - upto your rday variable creation - i have changed code to this :

create_table_sql = 'CREATE TABLE IF NOT EXISTS ' \
          'ticket_events ( ID serial NOT NULL ' \
         'PRIMARY KEY, info json NOT NULL); ' # create table only if not present already

cur.execute(create_table_sql)
conn.commit()

# Zenpy accepts an API token
creds = {
    'email': '[email protected]',
    'token': '*************',
    'subdomain': 'xxxxxx'
}


select_max_created = 'SELECT MAX(created_at) FROM ticket_events;' #get max-created-date of your DB 
cur.execute(create_table_sql)
row = cur.fetchone() # single row 
created_at = datetime.datetime(row[0])  
rday = created_at + datetime.timedelta(hours=1) # buffer of 1 hour 
Sign up to request clarification or add additional context in comments.

4 Comments

Thanks a lot for the answer! But i want this script to be incremental,Only load the data greater than the max time inside the DB and i want the underlined table structure to be reviewed and instead of dropping and recreating it ,using upsert or insert .I have pasted a sample response could you please have a look at it and let me know whether the table structure i defined is fine or does it need some changes.
can you show how the data looks on your table ? few rows with dummy values . it'll be lot easier to get a picture of your problem
yes, to load data greater than max time you can add query before creating 'rday' - with logic - select max(created_at) from ticket_events - you'll get created_at value - use that in 'rday' variable - may be you can add buffer of +2 hours , just to make it not miss any data.
I have mentioned the sample response and the table structure i use, Can you please have a look and let me know if it is fine or is there a better way of making this in a table format.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.