Data model

A schema is applied to a datamart and defines what mediarithmics should index and make available through queries.

‌Each schema is a Graph Query Language schema defining an object tree index that will allow you to run fast Object Tree Query Language queries to search users.

This schema defines all available mediarithmics objects with the standard properties. When defining your schema, you can start from this schema and add/remove properties based on your needs and the data you ingest into the platform.

The number of referenced properties has an impact on query performance. It would be best only to have the properties you need to use when defining your schema. Don't just copy the default ones.

User point is the root element of any mediarithmics schema, and only one index can be created. This may change in future releases to allow you to build different indexes.

type UserPoint @TreeIndexRoot(index:"USER_INDEX"){
  id: ID!
  creation_ts: Timestamp! @TreeIndex(index:"USER_INDEX")
  creation_date:Date! @Function(name:"ISODate", params:["creation_ts"])
  
  # User identifiers
  accounts: [UserAccount!]!
  emails: [UserEmail!]!
  devices: [UserDevicePoint!]!

  # User content
  activities: [UserActivity!]!
  events: [UserEvent!]!
  profiles: [UserProfile!]!
  choices: [UserChoice!]!
  
  # Technical objects
  scenarios: [UserScenario!]!
  segments: [UserSegment!]!
  
  # Deprecated identifiers
  # agents: [UserAgent!]!
}


### User identifiers

type UserAccount {
  id:ID! @TreeIndex(index:"USER_INDEX")
  creation_ts: Timestamp! @TreeIndex(index:"USER_INDEX")
  creation_date:Date! @TreeIndex(index:"USER_INDEX") @Function(params:["creation_ts"], name:"ISODate")
  
  compartment_id: String! @TreeIndex(index:"USER_INDEX") @ReferenceTable(model_type:"COMPARTMENTS", type:"CORE_OBJECT")
  user_account_id: String! @TreeIndex(index:"USER_INDEX")
}

type UserEmail {
  id:ID! @TreeIndex(index:"USER_INDEX")
  creation_ts: Timestamp! @TreeIndex(index:"USER_INDEX")
  last_activity_ts: Timestamp @TreeIndex(index:"USER_INDEX")
  
  email: String @TreeIndex(index:"USER_INDEX")
}

type UserDevicePoint  {
  id:ID! @TreeIndex(index:"USER_INDEX")
  creation_ts:Timestamp! @TreeIndex(index:"USER_INDEX")
  creation_date:Date! @Function(name:"ISODate", params:["creation_ts"])

  device_info:DeviceInfo
  technical_identifiers:[UserDeviceTechnicalId!]!
  mappings:[UserAgentMapping!]! @Function(name:"ThirdPartyCookieMappings", params:["id"])
}

type DeviceInfo  {
  brand:String @TreeIndex(index:"USER_INDEX")
  browser_version:String @TreeIndex(index:"USER_INDEX")
  carrier:String @TreeIndex(index:"USER_INDEX")
  model:String @TreeIndex(index:"USER_INDEX")
  os_version:String @TreeIndex(index:"USER_INDEX")

  agent_type:UserAgentType @TreeIndex(index:"USER_INDEX")
  browser_family:BrowserFamily @TreeIndex(index:"USER_INDEX")
  form_factor:FormFactor @TreeIndex(index:"USER_INDEX")
  os_family:OperatingSystemFamily @TreeIndex(index:"USER_INDEX")
}

type UserDeviceTechnicalId  {
   id:ID! @TreeIndex(index:"USER_INDEX")
   creation_ts:Timestamp! @TreeIndex(index:"USER_INDEX")
   expiration_ts:Timestamp! @TreeIndex(index:"USER_INDEX")
   last_seen_ts:Timestamp! @TreeIndex(index:"USER_INDEX")

   registry_id:String! @TreeIndex(index:"USER_INDEX")
   type:String! @TreeIndex(index:"USER_INDEX")
}

type UserAgentMapping  {
   last_seen:Timestamp
   user_agent_id:String @TreeIndex(index:"USER_INDEX")
   vector_id:String
}


### User content

type UserActivity {
  id: ID!
  type: UserActivityType!
  channel_id:String @TreeIndex(index:"USER_INDEX") @ReferenceTable(type:"CORE_OBJECT", model_type:"CHANNELS") @Property(paths:["$site_id", "$app_id"])
  source: UserActivitySource!
  ts: Timestamp! @TreeIndex(index:"USER_INDEX")
  duration: Int @TreeIndex(index:"USER_INDEX")
  
  events: [UserEvent!]!
}

type UserEvent {
  id: ID!
  ts: Timestamp! @TreeIndex(index:"USER_INDEX")
  date:Date! @Function(params:["ts"], name:"ISODate")

  name:String! @TreeIndex(index:"USER_INDEX")
  channel_id:String @TreeIndex(index:"USER_INDEX") @ReferenceTable(model_type:"CHANNELS", type:"CORE_OBJECT") @Property(paths:["[parent].$site_id", "[parent].$app_id"])
  url: String @TreeIndex(index:"USER_INDEX")
  referrer:String @TreeIndex(index:"USER_INDEX")
}

type UserProfile {
  id: ID! 
  creation_ts: Timestamp! @TreeIndex(index:"USER_INDEX")
  last_modified_ts: Timestamp! @TreeIndex(index:"USER_INDEX")
  
  compartment_id: String! @TreeIndex(index:"USER_INDEX") @ReferenceTable(model_type:"COMPARTMENTS", type:"CORE_OBJECT")
  user_account_id: String @TreeIndex(index:"USER_INDEX")
}

type UserChoice {
  id: ID! 
  creation_ts: Timestamp! @TreeIndex(index:"USER_INDEX")
  choice_ts: Timestamp! @TreeIndex(index:"USER_INDEX")
  processing_id: String! @TreeIndex(index:"USER_INDEX")
  choice_acceptance_value: Boolean! @TreeIndex(index:"USER_INDEX")

  user_account_id: String
  compartment_id: String
  email_hash: String
  user_agent_id: String
  channel_id: String
}


  ### Technical objects
  
type UserSegment {
  id: ID! @TreeIndex(index:"USER_INDEX") @ReferenceTable(model_type:"SEGMENTS", type:"CORE_OBJECT")
  creation_ts: Timestamp! @TreeIndex(index:"USER_INDEX")
  last_modified_ts: Timestamp! @TreeIndex(index:"USER_INDEX")
  expiration_ts: Timestamp @TreeIndex(index:"USER_INDEX")
}

type UserScenario {
  id: ID! @TreeIndex(index:"USER_INDEX")
  scenario_id: String! @TreeIndex(index:"USER_INDEX")
  execution_id: String! @TreeIndex(index:"USER_INDEX")
  node_id: String! @TreeIndex(index:"USER_INDEX")
  callback_ts: Timestamp @TreeIndex(index:"USER_INDEX")
  start_ts: Timestamp! @TreeIndex(index:"USER_INDEX")
  node_start_ts: Timestamp! @TreeIndex(index:"USER_INDEX")
  active: Boolean @TreeIndex(index:"USER_INDEX")
}


### Deprecated identifiers

# type UserAgent {
#  id:ID!
#  creation_ts: Timestamp! 
#  last_activity_ts: Timestamp
#  user_agent_info:UserAgentInfo @Function(name:"DeviceInfo", params:["id"])
# }

# type UserAgentInfo  {
#   form_factor:FormFactor
#   brand:String
#   browser_family:BrowserFamily
#   browser_version:String
#   carrier:String
#   model:String
#   os_family:OperatingSystemFamily
#   os_version:String
#   agent_type:UserAgentType
# }

Syntax highlights

The ! operator

The ! operator marks elements as mandatory. That means the element is expected not to be null.

type MyType {
    user_account_id: String # doesn't necessarily have a user account
    user_account_id: String! # has a user account
    events: [UserEvent!]! # has a list of events, in which each event can't be null
    events: [UserEvent!] # doesn't necessarily have a list of events, but lists can't have null elements
}

If you add the ! operator to a field that happens to have null values, the entire object won't be indexed.

It is hard to ensure a field will always have a value in all the data you'll put into the platform, whatever the ingestion method. Therefore, we recommend not using this operator in your schema for fields other than the predefined ones.

The ID type

This type is treated as a keyword string, but marks data that is not understandable for a user, as it is an identifier.

type UserChoice {
  id: ID! 
}

Basic types

There is existing multiple native type you can use in your schema.

type UserProfile {
  id: ID!
  creation_ts: Timestamp
  email: String
  age: Int
  active: Boolean
}

Timestamps and dates

A best practice is to import objects with dates as Timestamp

To display the value as date and time when running queries or in exports, you can use the Date type.

// Origin activity
{
    ...
    "$ts": 1632753811859,
    "other_date": "2021-09-27T14:43:31.859Z",
    "other_ts": 1632753811859
    ...
}
type UserActivity {
    ...
    ts: Timestamp @TreeIndex(index:"USER_INDEX")
    other_date: Date
    other_ts: Timestamp
    date: Date @Function(name:"ISODate", params:["ts"]) 
    ...
}

## Doing SELECT { ts other_date other_ts date } ...
## returns 
##     "ts": 1632753811859,
##     "other_date": "2021-09-27T14:43:31.859Z",
##     "other_ts": 1632753811859,
##     "date": "2021-09-27T14:43:31.859Z",

You usually get data as Timestamp and generate the Date type from the Timestamp with the ISODate function. If not, then ensure you get data in the correct format. There is no implicit conversion between timestamps and dates.

// Origin activity
{
    ...
    "other_date": 1632753811859,
    ...
}


type UserActivity {
    # This won't work as received data is a timestamp.
    other_date: Date
}

## SELECT { other_date } ...
## throws an error

Both types can are compatible with Date operators in queries. Only use one @TreeIndex directive when creating a date from a timestamp : this will save space in the index and both types have the same capabilities in the query.

Directives

@TreeIndexRoot

This directive marks the root element of an Object Tree Index. The index property marks the name of the Object Tree Index

It should always be USER_INDEX as multiple indexes are not currently supported.

type UserPoint @TreeIndexRoot(index:"USER_INDEX"){
}

@TreeIndex

This directive makes a field available in the WHERE clause and in Aggregation operations of your OTQL queries. Fields that don't have this directive can't be used in the WHERE clause but can still be retrieved in the SELECT clause.

type UserEvent {
   id:ID!
   ts:Timestamp! 
   # url and referrr properties are now available in WHERE clauses
   url:String @TreeIndex(index:"USER_INDEX")
   referrer:String @TreeIndex(index:"USER_INDEX")
}

Don't mark every field with this directive. Some fields, like first name, last name ... will never be used in WHERE clauses and would only make your index larger.

The @TreeIndex directive is mandatory for some default properties. They already have that directive in the default schema, and you shouldn't remove it, or your schema won't be validated.

The value of the index in @TreeIndex should always be USER_INDEX.

When registering a String in a Tree Index with the directive @TreeIndex, you can specify how the field should be indexed, depending on how you want to use it later.

Two modes are available, text and keyword.

type myType {
   mystring:String @TreeIndex(index:"USER_INDEX", data_type: "text")
   secondstring:String @TreeIndex(index:"USER_INDEX", data_type: "keyword")
}

String indexed as text

This mode is considering your value as a set of words (e.g. a text). For example, the value 'The QUICK brown fox JuMpS, over the Lazy doG.' will be considered as the list of:

  • the

  • quick

  • brown

  • fox

  • jumps

  • over

  • lazy

  • dog

As you can see, some transformations were done before storing the data:

  • all the words were put in lowercase -> all string operators will be case insensitive on a field indexed with data_type: text

  • the original string was split, and the splitting characters were removed (here, it was , . and ,)

The method used to split the words together is described in great details here. The most common characters that trigger a split are (non-exhaustive list):

    • .

  • -

  • '

  • "

  • ,

  • ;

  • ?

  • !

The data_type: "text" mode should be used when you're working with:

  • Full sentences (ex. a Page Title)

  • URLs

  • List of keywords (separated by a splitting character as listed above)

  • similar text

Generally, this mode is used when you don't have great control over the value being collected in this field, and you want to do "broad" queries based on it.

String indexed as keyword

This mode is used to consider your value as a single word. No transformation is done with the provided value. The data_type: "keyword" mode should be used when you're working with:

  • Single values

  • Ids passed as text (ex: UUIDs, productId, categoryId, etc.)

  • Every time that you already know the values that are passed in the field (e.g. when the field data is linked to a taxonomy)

  • etc.

Generally, this mode is used when you have great control over the value being collected in this field, and you want to do exact queries on it later by doing exact equality in queries.

@Property

By default, the path associated with each of your properties is the name of these properties. You can change this behavior with the @Property directive.

type UserEvent {
   id:ID!
   ts:Timestamp! 
   name:String!
   # We are creating shortcuts to the $url, $referrer and $items properties
   # that are normaly in a $properties object in the user event.
   # This will make them easier to query
   url:String @Property(path:"$properties.$url")
   referrer:String @Property(path:"$properties.$referrer")
   products:[Product] @Property(path:"$properties.$items")
}

type Product {
   # Here we simply change the name into id and name instead of $id and $name
   id: String @TreeIndex(index:"USER_INDEX") @Property(path:"$id")
   name: String @TreeIndex(index:"USER_INDEX") @Property(path:"$name")
}

All the properties in the default schema already redefine their path. For example, the creation_ts property in the UserPoint object points to the $creation_ts property in the stored data. The declaration should theoretically have used the @Property directive, but it is unnecessary to do the work for you.

type UserPoint {
    # What should have been declared
    creation_ts: Timestamp! @Property(path:"$creation_ts")
    # What  is declared as a shortcut
    creation_ts: Timestamp!
}
type Product {
   # We do have to use the @Property directive as those properties
   # don't exist in the default schema for a Product object type
   id: String @Property(path:"$id")
   name: String @Property(path:"$name")
}

Taking value from multiple paths

You can define multiple paths to get the data from. If the first path is empty, the second one will be used and so one.

In this example, user activities channel ID is either the site ID or the app ID depending on the user activity's context.

type MyType {
    channel_id: String @Property(paths:["$site_id", "$app_id"])
}

Available tokens

You can use the [parent] token to go up in the object tree when defining a path

type MyType {
    creative_id:String @Property(path:"[parent].[parent].$origin.$creative_id")
}

@Mirror

This directive allows you to create custom types based on predefined types.

# UserEvent type has been renamed ArticleView
# Not really interesting and should be avoided
type ArticleView @Mirror(object_type:"UserEvent"){}

# More advanced usage : ArticleView object are UserEvents
# with a name of "navigation.article"
type ArticleView @Mirror(object_type:"UserEvent", filter:"name == \"navigation.article\""){}

Sample usage: custom types with filters

type UserPoint @TreeIndexRoot(index:"USER_INDEX"){
  ###
  basketviews: [BasketView]
  productviews: [ProductView]
}

type BasketView @Mirror(object_type:"UserEvent", filter:"name == \"$basket_view\""){}
type ProductView @Mirror(object_type:"UserEvent", filter:"name == \"$page_view\""){}

@Function

The @Function directive is used to declare a calculated field with a set of predefined functions.

ISODate

This function creates a date from a timestamp.

type MyType {
    # creation_date is a Date created from the timestamp creation_ts
    creation_date:Date! @Function(name:"ISODate", params:["creation_ts"])
}

ThirdPartyCookieMappings

In order to retrieve third party cookie mappings for a given device point, the ThirdPartyCookieMapping function can be used:

type UserDevicePoint  {
  id:ID! @TreeIndex(index:"USER_INDEX")
  ...
  mappings:[UserAgentMapping!]! @Function(name:"ThirdPartyCookieMappings", params:["id"])
}

type UserAgentMapping  {
   last_seen:Timestamp
   user_agent_id:String
   vector_id:String
}

The function works on device points that have a device technical id of type MUM_ID attached to them. It translates the MUM_ID into a vector_id (mum:-1234 -> vec:1234) and retrieves attached partners' 3P cookies.

See user_agent_id section in the device identifiers documentation for more information on the user_agent_id formatting (including partners' 3P cookies).

DeviceInfo (legacy)

This function is only used on datamarts referencing the legacy type UserAgent.

For datamarts referencing the new type UserDevicePoint, we suggest to use the previous function: ThirdPartyCookieMappings.

This function extracts device information for an agent identifier.

type UserAgent  {
   id:ID! @TreeIndex(index:"USER_INDEX")
   user_agent_info:UserAgentInfo @Function(name:"DeviceInfo", params:["id"])
}

The UserAgentInfo class has the following properties:

type UserAgentInfo  {
   form_factor:FormFactor
   brand:String
   browser_family:BrowserFamily
   browser_version:String
   carrier:String
   model:String
   os_family:OperatingSystemFamily
   os_version:String
   agent_type:UserAgentType
}

### The following enums are predefined.
### It is not necessary to define them 

enum FormFactor {
    WEARABLE_COMPUTER
    TABLET
    SMARTPHONE
    GAME_CONSOLE
    SMART_TV
    PERSONAL_COMPUTER
    OTHER
}

enum BrowserFamily {
    OTHER
    CHROME
    IE
    FIREFOX
    SAFARI
    OPERA
    STOCK_ANDROID
    BOT
    EMAIL_CLIENT
    MICROSOFT_EDGE
}

enum OperatingSystemFamily {
    OTHER
    WINDOWS
    MAC_OS
    LINUX
    ANDROID
    IOS
}

enum UserAgentType {
    WEB_BROWSER
    MOBILE_APP
}

@ReferenceTable

When users create their queries using your schema, they usually remember some elements they search for but don't know their identifiers.

You can add the @ReferenceTable directive to fields storing channels, compartments and segment identifiers. That way, the user will have an autocomplete with the element's name instead of their identifier when creating his queries.

type UserSegment  {
   id:ID! @ReferenceTable(type:"CORE_OBJECT", model_type:"SEGMENTS") @TreeIndex(index:"USER_INDEX")
}

type UserActivity  {
   channel_id:String @ReferenceTable(model_type:"CHANNELS", type:"CORE_OBJECT") @TreeIndex(index:"USER_INDEX") @Property(paths:["$site_id", "$app_id"])
}

type UserProfile  {
   compartment_id:String! @ReferenceTable(model_type:"COMPARTMENTS", type:"CORE_OBJECT") @TreeIndex(index:"USER_INDEX")
}

type UserEvent {
   channel_id:String @ReferenceTable(model_type:"CHANNELS", type:"CORE_OBJECT") @Property(paths:["[parent].$site_id", "[parent].$app_id"]) @TreeIndex(index:"USER_INDEX")
}

@EdgeAvailability

This directive marks properties as usable in queries when creating Edge segments.

type UserAccount  {
   id:ID!
   # This property won't be usable in Edge segment queries
   compartment_id:String! 
   # This property will be usable in Edge segment queries
   user_account_id:String! @TreeIndex(index:"USER_INDEX") @EdgeAvailability
}

Best practices

Do not index ISODate function result

Do not index the output of the ISODate Function. You should index the timestamp value only.

# DO
type UserAgent  {
   creation_ts:Timestamp! @TreeIndex(index:"USER_INDEX")
   creation_date:Date! @Function(name:"ISODate", params:["creation_ts"])
   user_agent_info:UserAgentInfo @Function(params:["id"], name:"DeviceInfo")
   id:ID!
   last_activity_ts:Timestamp
}

# DON'T
type UserAgent  {
   creation_ts:Timestamp! 
   creation_date:Date! @Function(name:"ISODate", params:["creation_ts"]) @TreeIndex(index:"USER_INDEX")
   user_agent_info:UserAgentInfo @Function(params:["id"], name:"DeviceInfo")
   id:ID!
   last_activity_ts:Timestamp
}

UserEvent indexed twice

In some scenarios, you could have events directly in the UserPoint and in user activities. For example, to use frequency OTQL directives on UserEvents and build queries on several events that occurred on a single activity.

In any other case, do not duplicate the UserEvent. Either use it in the user point or the user activity.

# Only do if in a specific scenario requiring it
type UserPoint @TreeIndexRoot(index:"USER_INDEX"){
  ###
  activities: [UserActivity!]!
  events:[UserEvent!]!
}

type UserActivity {
  ###
  events: [UserEvent!]!
}

type UserEvent @Mirror(object_type:"UserEvent") {
   name:String! @TreeIndex(index:"USER_INDEX")
   id:ID!
   ts:Timestamp!
}

Last updated