Back to Documentation
    DocumentationintegrationsFeb 11, 2025

    GA4 BigQuery Export Schema Reference

    Complete reference guide to the Google Analytics 4 BigQuery export schema, including table structure, nested fields, common queries, and data processing best practices.

    Overview

    Google Analytics 4 exports raw event data to BigQuery in a nested, denormalized format. Understanding this schema is essential for effective analysis and integration with Cogny.

    Daily Tables: analytics_PROPERTY_ID.events_YYYYMMDD Intraday Tables: analytics_PROPERTY_ID.events_intraday_YYYYMMDD

    Table Structure

    events_* Table Schema

    Each row represents a single event with nested fields for event parameters, user properties, and e-commerce data.

    -- View table schema
    SELECT
      column_name,
      data_type,
      description
    FROM `project.analytics_123456789.INFORMATION_SCHEMA.COLUMNS`
    WHERE table_name LIKE 'events_%'
    ORDER BY ordinal_position
    

    Top-Level Fields

    FieldTypeDescription
    event_dateSTRINGDate when the event was logged (YYYYMMDD format)
    event_timestampINTEGERTime when the event was logged (microseconds since Unix epoch)
    event_nameSTRINGName of the event (e.g., 'page_view', 'purchase')
    event_paramsARRAY<STRUCT>Array of event parameters
    event_previous_timestampINTEGERTimestamp of previous event by this user
    event_value_in_usdFLOATValue of the event in USD
    event_bundle_sequence_idINTEGERSequential ID of the event bundle
    event_server_timestamp_offsetINTEGERTimestamp offset between collection and server
    user_idSTRINGUser ID set via setUserId API
    user_pseudo_idSTRINGPseudonymous ID for the user (cookie-based)
    user_propertiesARRAY<STRUCT>Array of user properties
    user_first_touch_timestampINTEGERFirst time user visited (microseconds)
    user_ltvSTRUCTUser lifetime value information
    deviceSTRUCTDevice information
    geoSTRUCTGeographic information
    app_infoSTRUCTApp information (mobile apps)
    traffic_sourceSTRUCTTraffic source information
    stream_idSTRINGNumeric ID of the data stream
    platformSTRINGPlatform (web, ios, android)
    ecommerceSTRUCTE-commerce transaction data
    itemsARRAY<STRUCT>Array of item (product) details

    Nested Structures

    event_params

    Event parameters stored as key-value pairs:

    -- Structure
    STRUCT<
      key STRING,
      value STRUCT<
        string_value STRING,
        int_value INT64,
        float_value FLOAT64,
        double_value FLOAT64
      >
    >
    

    Common event parameters:

    KeyTypeDescription
    page_locationstringFull URL of the page
    page_titlestringTitle of the page
    page_referrerstringReferrer URL
    engagement_time_msecintEngagement time in milliseconds
    session_engagedintWhether session was engaged (1/0)
    ga_session_idintSession ID
    ga_session_numberintSession number for user

    Example query:

    SELECT
      event_name,
      (SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_location') as page_location,
      (SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_title') as page_title,
      (SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'engagement_time_msec') as engagement_time
    FROM `project.analytics_123456789.events_*`
    WHERE _TABLE_SUFFIX = FORMAT_DATE('%Y%m%d', CURRENT_DATE() - 1)
      AND event_name = 'page_view'
    LIMIT 100
    

    user_properties

    User properties stored as key-value pairs:

    -- Structure
    STRUCT<
      key STRING,
      value STRUCT<
        string_value STRING,
        int_value INT64,
        float_value FLOAT64,
        double_value FLOAT64,
        set_timestamp_micros INT64
      >
    >
    

    Example query:

    SELECT
      user_pseudo_id,
      (SELECT value.string_value FROM UNNEST(user_properties) WHERE key = 'user_type') as user_type,
      (SELECT value.string_value FROM UNNEST(user_properties) WHERE key = 'plan_level') as plan_level
    FROM `project.analytics_123456789.events_*`
    WHERE _TABLE_SUFFIX = FORMAT_DATE('%Y%m%d', CURRENT_DATE() - 1)
    GROUP BY user_pseudo_id, user_type, plan_level
    LIMIT 100
    

    device

    Device information:

    -- Structure
    STRUCT<
      category STRING,              -- desktop, mobile, tablet
      mobile_brand_name STRING,     -- Apple, Samsung, etc.
      mobile_model_name STRING,     -- iPhone 12, Galaxy S21, etc.
      mobile_marketing_name STRING, -- Marketing name
      mobile_os_hardware_model STRING,
      operating_system STRING,      -- iOS, Android, Windows, macOS
      operating_system_version STRING,
      vendor_id STRING,
      advertising_id STRING,
      language STRING,              -- en-us, fr-fr, etc.
      is_limited_ad_tracking STRING,
      time_zone_offset_seconds INT64,
      browser STRING,               -- Chrome, Safari, Firefox
      browser_version STRING,
      web_info STRUCT<
        browser STRING,
        browser_version STRING,
        hostname STRING
      >
    >
    

    Example query:

    SELECT
      device.category as device_category,
      device.operating_system,
      device.browser,
      COUNT(*) as event_count,
      COUNT(DISTINCT user_pseudo_id) as users
    FROM `project.analytics_123456789.events_*`
    WHERE _TABLE_SUFFIX = FORMAT_DATE('%Y%m%d', CURRENT_DATE() - 1)
    GROUP BY 1, 2, 3
    ORDER BY event_count DESC
    

    geo

    Geographic information:

    -- Structure
    STRUCT<
      continent STRING,     -- Americas, Europe, Asia, etc.
      sub_continent STRING, -- Northern Europe, Western Asia, etc.
      country STRING,       -- United States, United Kingdom, etc.
      region STRING,        -- California, England, etc.
      metro STRING,         -- Metro area
      city STRING
    >
    

    Example query:

    SELECT
      geo.country,
      geo.city,
      COUNT(DISTINCT user_pseudo_id) as users,
      COUNT(*) as events
    FROM `project.analytics_123456789.events_*`
    WHERE _TABLE_SUFFIX = FORMAT_DATE('%Y%m%d', CURRENT_DATE() - 1)
    GROUP BY 1, 2
    ORDER BY users DESC
    LIMIT 100
    

    traffic_source

    Traffic source information:

    -- Structure
    STRUCT<
      name STRING,   -- Traffic source name (google, facebook, etc.)
      medium STRING, -- Traffic medium (organic, cpc, referral, etc.)
      source STRING  -- Traffic source (google, facebook.com, etc.)
    >
    

    Example query:

    SELECT
      traffic_source.source,
      traffic_source.medium,
      COUNT(DISTINCT user_pseudo_id) as users,
      COUNT(DISTINCT CASE WHEN event_name = 'purchase' THEN user_pseudo_id END) as purchasers,
      SAFE_DIVIDE(
        COUNT(DISTINCT CASE WHEN event_name = 'purchase' THEN user_pseudo_id END),
        COUNT(DISTINCT user_pseudo_id)
      ) as conversion_rate
    FROM `project.analytics_123456789.events_*`
    WHERE _TABLE_SUFFIX = FORMAT_DATE('%Y%m%d', CURRENT_DATE() - 1)
    GROUP BY 1, 2
    ORDER BY users DESC
    

    ecommerce

    E-commerce transaction data:

    -- Structure
    STRUCT<
      total_item_quantity INT64,
      purchase_revenue_in_usd FLOAT64,
      purchase_revenue FLOAT64,
      refund_value_in_usd FLOAT64,
      refund_value FLOAT64,
      shipping_value_in_usd FLOAT64,
      shipping_value FLOAT64,
      tax_value_in_usd FLOAT64,
      tax_value FLOAT64,
      unique_items INT64,
      transaction_id STRING
    >
    

    Example query:

    SELECT
      DATE(TIMESTAMP_MICROS(event_timestamp)) as date,
      COUNT(DISTINCT ecommerce.transaction_id) as transactions,
      SUM(ecommerce.purchase_revenue_in_usd) as revenue,
      AVG(ecommerce.purchase_revenue_in_usd) as avg_order_value
    FROM `project.analytics_123456789.events_*`
    WHERE _TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY))
                            AND FORMAT_DATE('%Y%m%d', CURRENT_DATE() - 1)
      AND event_name = 'purchase'
      AND ecommerce.transaction_id IS NOT NULL
    GROUP BY 1
    ORDER BY 1 DESC
    

    items

    Product/item details:

    -- Structure
    STRUCT<
      item_id STRING,
      item_name STRING,
      item_brand STRING,
      item_variant STRING,
      item_category STRING,
      item_category2 STRING,
      item_category3 STRING,
      item_category4 STRING,
      item_category5 STRING,
      price_in_usd FLOAT64,
      price FLOAT64,
      quantity INT64,
      item_revenue_in_usd FLOAT64,
      item_revenue FLOAT64,
      item_refund_in_usd FLOAT64,
      item_refund FLOAT64,
      coupon STRING,
      affiliation STRING,
      location_id STRING,
      item_list_id STRING,
      item_list_name STRING,
      item_list_index STRING,
      promotion_id STRING,
      promotion_name STRING,
      creative_name STRING,
      creative_slot STRING
    >
    

    Example query:

    SELECT
      item.item_name,
      item.item_category,
      SUM(item.quantity) as total_quantity,
      SUM(item.item_revenue_in_usd) as total_revenue,
      COUNT(DISTINCT ecommerce.transaction_id) as transactions
    FROM `project.analytics_123456789.events_*`,
      UNNEST(items) as item
    WHERE _TABLE_SUFFIX = FORMAT_DATE('%Y%m%d', CURRENT_DATE() - 1)
      AND event_name = 'purchase'
    GROUP BY 1, 2
    ORDER BY total_revenue DESC
    LIMIT 100
    

    Common Query Patterns

    1. Sessions and Users

    Calculate daily sessions and users:

    SELECT
      DATE(TIMESTAMP_MICROS(event_timestamp)) as date,
      COUNT(DISTINCT user_pseudo_id) as users,
      COUNT(DISTINCT CONCAT(user_pseudo_id,
        (SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_id'))) as sessions,
      SAFE_DIVIDE(
        COUNT(DISTINCT CONCAT(user_pseudo_id,
          (SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_id'))),
        COUNT(DISTINCT user_pseudo_id)
      ) as sessions_per_user
    FROM `project.analytics_123456789.events_*`
    WHERE _TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY))
                            AND FORMAT_DATE('%Y%m%d', CURRENT_DATE() - 1)
    GROUP BY 1
    ORDER BY 1 DESC
    

    2. Page Views

    Analyze page view patterns:

    SELECT
      (SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_location') as page_location,
      (SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_title') as page_title,
      COUNT(*) as page_views,
      COUNT(DISTINCT user_pseudo_id) as unique_users,
      AVG((SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'engagement_time_msec')) / 1000 as avg_engagement_seconds
    FROM `project.analytics_123456789.events_*`
    WHERE _TABLE_SUFFIX = FORMAT_DATE('%Y%m%d', CURRENT_DATE() - 1)
      AND event_name = 'page_view'
    GROUP BY 1, 2
    ORDER BY page_views DESC
    LIMIT 100
    

    3. Conversion Funnel

    Build conversion funnel analysis:

    WITH funnel_steps AS (
      SELECT
        user_pseudo_id,
        COUNTIF(event_name = 'page_view') as step_1_landing,
        COUNTIF(event_name = 'view_item') as step_2_product_view,
        COUNTIF(event_name = 'add_to_cart') as step_3_add_to_cart,
        COUNTIF(event_name = 'begin_checkout') as step_4_checkout,
        COUNTIF(event_name = 'purchase') as step_5_purchase
      FROM `project.analytics_123456789.events_*`
      WHERE _TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY))
                              AND FORMAT_DATE('%Y%m%d', CURRENT_DATE() - 1)
      GROUP BY user_pseudo_id
    )
    
    SELECT
      'Landing' as step,
      1 as step_number,
      COUNT(DISTINCT CASE WHEN step_1_landing > 0 THEN user_pseudo_id END) as users,
      1.0 as conversion_rate
    FROM funnel_steps
    
    UNION ALL
    
    SELECT
      'Product View' as step,
      2 as step_number,
      COUNT(DISTINCT CASE WHEN step_2_product_view > 0 THEN user_pseudo_id END) as users,
      SAFE_DIVIDE(
        COUNT(DISTINCT CASE WHEN step_2_product_view > 0 THEN user_pseudo_id END),
        COUNT(DISTINCT CASE WHEN step_1_landing > 0 THEN user_pseudo_id END)
      ) as conversion_rate
    FROM funnel_steps
    
    UNION ALL
    
    SELECT
      'Add to Cart' as step,
      3 as step_number,
      COUNT(DISTINCT CASE WHEN step_3_add_to_cart > 0 THEN user_pseudo_id END) as users,
      SAFE_DIVIDE(
        COUNT(DISTINCT CASE WHEN step_3_add_to_cart > 0 THEN user_pseudo_id END),
        COUNT(DISTINCT CASE WHEN step_2_product_view > 0 THEN user_pseudo_id END)
      ) as conversion_rate
    FROM funnel_steps
    
    UNION ALL
    
    SELECT
      'Checkout' as step,
      4 as step_number,
      COUNT(DISTINCT CASE WHEN step_4_checkout > 0 THEN user_pseudo_id END) as users,
      SAFE_DIVIDE(
        COUNT(DISTINCT CASE WHEN step_4_checkout > 0 THEN user_pseudo_id END),
        COUNT(DISTINCT CASE WHEN step_3_add_to_cart > 0 THEN user_pseudo_id END)
      ) as conversion_rate
    FROM funnel_steps
    
    UNION ALL
    
    SELECT
      'Purchase' as step,
      5 as step_number,
      COUNT(DISTINCT CASE WHEN step_5_purchase > 0 THEN user_pseudo_id END) as users,
      SAFE_DIVIDE(
        COUNT(DISTINCT CASE WHEN step_5_purchase > 0 THEN user_pseudo_id END),
        COUNT(DISTINCT CASE WHEN step_4_checkout > 0 THEN user_pseudo_id END)
      ) as conversion_rate
    FROM funnel_steps
    
    ORDER BY step_number
    

    4. User Acquisition

    Analyze user acquisition by channel:

    WITH first_visit AS (
      SELECT
        user_pseudo_id,
        MIN(event_timestamp) as first_visit_timestamp,
        ARRAY_AGG(
          STRUCT(traffic_source.source, traffic_source.medium, traffic_source.name)
          ORDER BY event_timestamp LIMIT 1
        )[OFFSET(0)] as first_traffic_source
      FROM `project.analytics_123456789.events_*`
      WHERE _TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY))
                              AND FORMAT_DATE('%Y%m%d', CURRENT_DATE() - 1)
        AND event_name = 'first_visit'
      GROUP BY user_pseudo_id
    )
    
    SELECT
      first_traffic_source.source as acquisition_source,
      first_traffic_source.medium as acquisition_medium,
      COUNT(DISTINCT user_pseudo_id) as new_users,
      COUNT(DISTINCT CASE WHEN purchase_count > 0 THEN user_pseudo_id END) as purchasers,
      SAFE_DIVIDE(
        COUNT(DISTINCT CASE WHEN purchase_count > 0 THEN user_pseudo_id END),
        COUNT(DISTINCT user_pseudo_id)
      ) as conversion_rate
    FROM first_visit
    LEFT JOIN (
      SELECT
        user_pseudo_id,
        COUNT(*) as purchase_count
      FROM `project.analytics_123456789.events_*`
      WHERE _TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY))
                              AND FORMAT_DATE('%Y%m%d', CURRENT_DATE() - 1)
        AND event_name = 'purchase'
      GROUP BY user_pseudo_id
    ) purchases USING(user_pseudo_id)
    GROUP BY 1, 2
    ORDER BY new_users DESC
    

    5. Cohort Analysis

    Build user retention cohorts:

    WITH cohorts AS (
      SELECT
        user_pseudo_id,
        DATE(TIMESTAMP_MICROS(MIN(user_first_touch_timestamp))) as cohort_date
      FROM `project.analytics_123456789.events_*`
      WHERE _TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY))
                              AND FORMAT_DATE('%Y%m%d', CURRENT_DATE() - 1)
      GROUP BY user_pseudo_id
    ),
    
    user_activity AS (
      SELECT
        user_pseudo_id,
        DATE(TIMESTAMP_MICROS(event_timestamp)) as activity_date
      FROM `project.analytics_123456789.events_*`
      WHERE _TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY))
                              AND FORMAT_DATE('%Y%m%d', CURRENT_DATE() - 1)
      GROUP BY user_pseudo_id, activity_date
    )
    
    SELECT
      cohort_date,
      DATE_DIFF(activity_date, cohort_date, DAY) as days_since_cohort,
      COUNT(DISTINCT c.user_pseudo_id) as cohort_size,
      COUNT(DISTINCT a.user_pseudo_id) as active_users,
      SAFE_DIVIDE(COUNT(DISTINCT a.user_pseudo_id), COUNT(DISTINCT c.user_pseudo_id)) as retention_rate
    FROM cohorts c
    LEFT JOIN user_activity a USING(user_pseudo_id)
    WHERE DATE_DIFF(activity_date, cohort_date, DAY) IN (0, 1, 7, 14, 30, 60, 90)
    GROUP BY 1, 2
    ORDER BY 1 DESC, 2
    

    Performance Optimization

    1. Partition Pruning

    Always use _TABLE_SUFFIX for date filtering:

    -- ✅ Good: Uses partition pruning
    SELECT COUNT(*)
    FROM `project.analytics_123456789.events_*`
    WHERE _TABLE_SUFFIX BETWEEN '20250101' AND '20250131'
    
    -- ❌ Bad: Scans all partitions
    SELECT COUNT(*)
    FROM `project.analytics_123456789.events_*`
    WHERE event_date BETWEEN '20250101' AND '20250131'
    

    2. Clustering

    GA4 tables are clustered by event_name. Filter by event name for better performance:

    -- ✅ Good: Uses clustering
    SELECT *
    FROM `project.analytics_123456789.events_*`
    WHERE _TABLE_SUFFIX = '20250211'
      AND event_name IN ('purchase', 'add_to_cart')
    
    -- Less efficient: No event_name filter
    SELECT *
    FROM `project.analytics_123456789.events_*`
    WHERE _TABLE_SUFFIX = '20250211'
    

    3. Materialized Views

    Create materialized views for frequently accessed aggregations:

    CREATE MATERIALIZED VIEW `project.analytics_123456789.daily_summary`
    AS
    SELECT
      event_date,
      traffic_source.source,
      traffic_source.medium,
      device.category,
      COUNT(DISTINCT user_pseudo_id) as users,
      COUNT(*) as events,
      COUNTIF(event_name = 'purchase') as purchases,
      SUM(CASE WHEN event_name = 'purchase' THEN ecommerce.purchase_revenue_in_usd END) as revenue
    FROM `project.analytics_123456789.events_*`
    WHERE _TABLE_SUFFIX >= FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 365 DAY))
    GROUP BY 1, 2, 3, 4
    

    4. Query Caching

    Leverage BigQuery's automatic query caching:

    -- Identical queries within 24 hours use cache (no cost)
    SELECT COUNT(*)
    FROM `project.analytics_123456789.events_*`
    WHERE _TABLE_SUFFIX = '20250211'
    

    Data Quality Checks

    1. Check for Missing Data

    -- Identify gaps in daily exports
    WITH date_range AS (
      SELECT date
      FROM UNNEST(GENERATE_DATE_ARRAY(DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY), CURRENT_DATE() - 1)) as date
    ),
    
    exported_dates AS (
      SELECT DISTINCT PARSE_DATE('%Y%m%d', _TABLE_SUFFIX) as date
      FROM `project.analytics_123456789.events_*`
      WHERE _TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY))
                              AND FORMAT_DATE('%Y%m%d', CURRENT_DATE() - 1)
    )
    
    SELECT
      dr.date,
      CASE WHEN ed.date IS NULL THEN 'Missing' ELSE 'Present' END as status
    FROM date_range dr
    LEFT JOIN exported_dates ed USING(date)
    WHERE ed.date IS NULL
    ORDER BY dr.date DESC
    

    2. Validate Event Counts

    -- Check for unusual event count variations
    WITH daily_counts AS (
      SELECT
        DATE(TIMESTAMP_MICROS(event_timestamp)) as date,
        COUNT(*) as event_count,
        COUNT(DISTINCT user_pseudo_id) as user_count
      FROM `project.analytics_123456789.events_*`
      WHERE _TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY))
                              AND FORMAT_DATE('%Y%m%d', CURRENT_DATE() - 1)
      GROUP BY 1
    )
    
    SELECT
      date,
      event_count,
      user_count,
      AVG(event_count) OVER (ORDER BY date ROWS BETWEEN 7 PRECEDING AND 1 PRECEDING) as avg_7day,
      event_count / NULLIF(AVG(event_count) OVER (ORDER BY date ROWS BETWEEN 7 PRECEDING AND 1 PRECEDING), 0) as variance_ratio
    FROM daily_counts
    ORDER BY date DESC
    

    Next Steps

    Resources

    Need Implementation Help?

    Talk to Our Technical Team

    Schedule a technical consultation to discuss your integration requirements and implementation strategy.

    Schedule Demo