Google Datastore combine (union) multiple sets of entity results to achieve OR condition
Asked Answered
F

3

2

I am working with NodeJS on Google App Engine with the Datastore database.

Due to the fact that Datastore does not have support the OR operator, I need to run multiple queries and combine the results.

I am planning to run multiple queries and then combine the results into a single array of entity objects. I have a single query working already.

Question: What is a reasonably efficient way to combine two (or more) sets of entities returned by Datastore including de-duplication? I believe this would be a "union" operation in terms of set theory.

Here is the basic query outline that will be run multiple times with some varying filters to achieve the OR conditions required.

  //Set requester username
  const requester = req.user.userName;
  //Create datastore query on Transfer Request kind table
  const task_history = datastore.createQuery('Task');
  //Set query conditions
  task_history.filter('requester', requester);
  //Run datastore query
  datastore.runQuery(task_history, function(err, entities) {
    if(err) {
      console.log('Task History JSON unable to return data results. Error message: ', err);
      return;
      //If query works and returns any entities
    } else if (entities[0]) {
      //Else if query works but does not return any entities return empty JSON response
      res.json(entities); //HOW TO COMBINE (UNION) MULTIPLE SETS OF ENTITIES EFFICIENTLY?
      return;
    }
  });

Here is my original post: Google Datastore filter with OR condition

Ferromagnetic answered 22/11, 2017 at 14:29 Comment(0)
C
3

IMHO the most efficient way would be to use Keys-only queries in the 1st stage, then perform the combination of the keys obtained into a single list (including deduplication), followed by obtaining the entities simply by key lookup. From Projection queries:

Keys-only queries

A keys-only query (which is a type of projection query) returns just the keys of the result entities instead of the entities themselves, at lower latency and cost than retrieving entire entities.

It is often more economical to do a keys-only query first, and then fetch a subset of entities from the results, rather than executing a general query which may fetch more entities than you actually need.

Here's how to create a keys-only query:

const query = datastore.createQuery()
  .select('__key__')
  .limit(1);

This method addresses several problems you may encounter when trying to directly combine lists of entities obtained through regular, non-keys-only queries:

  • you can't de-duplicate properly because you can't tell the difference between different entities with identical values and the same entity appearing in multiply query results
  • comparing entities by property values can be tricky and is definitely slower/more computing expensive than comparing just entity keys
  • if you can't process all the results in a single request you're incurring unnecessary datastore costs for reading them without actually using them
  • it is much simpler to split processing of entities in multiple requests (via task queues, for example) when handling just entity keys

There are some disadvantages as well:

  • it may be a bit slower because you're going to the datastore twice: once for the keys and once to get the actual entities
  • you can't take advantage of getting just the properties you need via non-keys-only projection queries
Cassiopeia answered 22/11, 2017 at 15:11 Comment(0)
F
1

Here is the solution I created based on the advice provided in the accepted answer.

/*History JSON*/
module.exports.treqHistoryJSON = function(req, res) {
  if (!req.user) {
    req.user = {};
    res.json();
    return;
  }

  //Set Requester username
  const loggedin_username = req.user.userName;

  //Get records matching Requester OR Dataowner
  //Google Datastore OR Conditions are not supported
  //Workaround separate parallel queries get records matching Requester and Dataowner then combine results
  async.parallel({
    //Get entity keys matching Requester
    requesterKeys: function(callback) {
      getKeysOnly('TransferRequest', 'requester_username', loggedin_username, (treqs_by_requester) => {
        //Callback pass in response as parameter
        callback(null, treqs_by_requester)
      });
    },
    //Get entity keys matching Dataowner
    dataownerKeys: function(callback) {
      getKeysOnly('TransferRequest', 'dataowner_username', loggedin_username, (treqs_by_dataowner) => {
        callback(null, treqs_by_dataowner)
      });
    }
  }, function(err, getEntities) {
    if (err) {
      console.log('Transfer Request History JSON unable to get entity keys Transfer Request. Error message: ', err);
      return;
    } else {
      //Combine two arrays of entity keys into a single de-duplicated array of entity keys
      let entity_keys_union = unionEntityKeys(getEntities.requesterKeys, getEntities.dataownerKeys);
      //Get key values from entity key 'symbol' object type
      let entity_keys_only = entity_keys_union.map((ent) => {
        return ent[datastore.KEY];
      });
      //Pass in array of entity keys to get full entities
      datastore.get(entity_keys_only, function(err, entities) {
        if(err) {
          console.log('Transfer Request History JSON unable to lookup multiple entities by key for Transfer Request. Error message: ', err);
          return;
          //If query works and returns any entities
        } else {
          processEntitiesToDisplay(res, entities);
        }
      });
    }
  });

};

/*
 * Get keys-only entities by kind and property
 * @kind string name of kind
 * @property_type string property filtering by in query
 * @filter_value string of filter value to match in query
 * getEntitiesCallback callback to collect results
 */
function getKeysOnly(kind, property_type, filter_value, getEntitiesCallback) {
  //Create datastore query
  const keys_query = datastore.createQuery(kind);
  //Set query conditions
  keys_query.filter(property_type, filter_value);
  //Select KEY only
  keys_query.select('__key__');
  datastore.runQuery(keys_query, function(err, entities) {
    if(err) {
      console.log('Get Keys Only query unable to return data results. Error message: ', err);
      return;
    } else {
      getEntitiesCallback(entities);
    }
  });
}

/*
 * Union two arrays of entity keys de-duplicate based on ID value
 * @arr1 array of entity keys
 * @arr2 array of entity keys
 */
function unionEntityKeys(arr1, arr2) {
  //Create new array
  let arr3 = [];
  //For each element in array 1
  for(let i in arr1) {
    let shared = false;
      for (let j in arr2)
        //If ID in array 1 is same as array 2 then this is a duplicate
        if (arr2[j][datastore.KEY]['id'] == arr1[i][datastore.KEY]['id']) {
          shared = true;
          break;
        }
      //If IDs are not the same add element to new array
      if(!shared) {
        arr3.push(arr1[i])
      }
    }
  //Concat array 2 and new array 3
  arr3 = arr3.concat(arr2);
  return arr3;
}
Ferromagnetic answered 28/11, 2017 at 19:19 Comment(0)
R
1

I just wanted to write in for folks who stumble upon this...

There is a workaround for some cases of not having the OR operator if you can restructure your data a bit, using Array properties: https://cloud.google.com/datastore/docs/concepts/entities#array_properties

From the documentation:

Array properties can be useful, for instance, when performing queries with equality filters: an entity satisfies the query if any of its values for a property matches the value specified in the filter.

So, if you needed to query for all entities bearing one of multiple potential values, putting all of the possibilities for each entity into an Array property and then indexing it for your query should yield the results you want. (But, you'd need to maintain that additional property, or replace your existing properties with that Array implementation if it could work for all of what you need.)

Runnels answered 30/12, 2022 at 6:3 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.