Browse Source

Extract device type from user agent info (#69322)

Shahzad 4 years ago
parent
commit
f7efa3eaba
16 changed files with 1043 additions and 16 deletions
  1. 3 2
      docs/reference/ingest/common-log-format-example.asciidoc
  2. 2 1
      docs/reference/ingest/processors/user-agent.asciidoc
  3. 179 0
      modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/DeviceTypeParser.java
  4. 5 3
      modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/IngestUserAgentPlugin.java
  5. 12 7
      modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/UserAgentParser.java
  6. 6 0
      modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/UserAgentProcessor.java
  7. 67 0
      modules/ingest-user-agent/src/main/resources/device_type_regexes.yml
  8. 217 0
      modules/ingest-user-agent/src/test/java/org/elasticsearch/ingest/useragent/DeviceTypeParserTests.java
  9. 65 1
      modules/ingest-user-agent/src/test/java/org/elasticsearch/ingest/useragent/UserAgentProcessorTests.java
  10. 177 0
      modules/ingest-user-agent/src/test/resources/test-desktop-devices.yml
  11. 124 0
      modules/ingest-user-agent/src/test/resources/test-mobile-devices.yml
  12. 22 0
      modules/ingest-user-agent/src/test/resources/test-other-devices.yml
  13. 123 0
      modules/ingest-user-agent/src/test/resources/test-robot-devices.yml
  14. 39 0
      modules/ingest-user-agent/src/test/resources/test-tablet-devices.yml
  15. 1 1
      modules/ingest-user-agent/src/yamlRestTest/resources/rest-api-spec/test/ingest-useragent/20_useragent_processor.yml
  16. 1 1
      modules/ingest-user-agent/src/yamlRestTest/resources/rest-api-spec/test/ingest-useragent/30_custom_regex.yml

+ 3 - 2
docs/reference/ingest/common-log-format-example.asciidoc

@@ -53,7 +53,7 @@ the processors as follows:
 
 [options="header"]
 |====
-| Processor type | Field  | Additional options | Description  
+| Processor type | Field  | Additional options | Description
 
 | <<date-processor,**Date**>>
 | `@timestamp`
@@ -247,7 +247,8 @@ The API returns:
             },
             "name": "Chrome",
             "device": {
-              "name": "Mac"
+              "name": "Mac",
+              "type": "Desktop"
             },
             "version": "52.0.2743.116"
           }

+ 2 - 1
docs/reference/ingest/processors/user-agent.asciidoc

@@ -69,7 +69,8 @@ Which returns
         "full": "Mac OS X 10.10.5"
       },
       "device" : {
-        "name" : "Mac"
+        "name" : "Mac",
+        "type" : "Desktop"
       },
     }
   }

+ 179 - 0
modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/DeviceTypeParser.java

@@ -0,0 +1,179 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.ingest.useragent;
+
+import org.elasticsearch.ElasticsearchParseException;
+import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
+import org.elasticsearch.common.xcontent.NamedXContentRegistry;
+import org.elasticsearch.common.xcontent.XContentFactory;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.common.xcontent.XContentType;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import static org.elasticsearch.ingest.useragent.UserAgentParser.readParserConfigurations;
+import static org.elasticsearch.ingest.useragent.UserAgentParser.VersionedName;
+
+public class DeviceTypeParser {
+
+    private static final String OS_PARSERS = "os_parsers";
+    private static final String BROWSER_PARSER = "browser_parsers";
+    private static final String DEVICE_PARSER = "device_parsers";
+    private static final String AGENT_STRING_PARSER = "agent_string_parsers";
+    private static final String robot = "Robot", tablet = "Tablet", desktop = "Desktop", phone = "Phone";
+
+    private final List<String> patternListKeys = List.of(OS_PARSERS, BROWSER_PARSER, DEVICE_PARSER, AGENT_STRING_PARSER);
+
+    private final HashMap<String, ArrayList<DeviceTypeSubPattern>> deviceTypePatterns = new HashMap<>();
+
+    public void init(InputStream regexStream) throws IOException {
+        // EMPTY is safe here because we don't use namedObject
+        XContentParser yamlParser = XContentFactory.xContent(XContentType.YAML).createParser(NamedXContentRegistry.EMPTY,
+            LoggingDeprecationHandler.INSTANCE, regexStream);
+
+        XContentParser.Token token = yamlParser.nextToken();
+
+        if (token == XContentParser.Token.START_OBJECT) {
+            token = yamlParser.nextToken();
+
+            for (; token != null; token = yamlParser.nextToken()) {
+                String currentName = yamlParser.currentName();
+                if (token == XContentParser.Token.FIELD_NAME && patternListKeys.contains(currentName)) {
+                    List<Map<String, String>> parserConfigurations = readParserConfigurations(yamlParser);
+                    ArrayList<DeviceTypeSubPattern> subPatterns = new ArrayList<>();
+                    for (Map<String, String> map : parserConfigurations) {
+                        subPatterns.add(new DeviceTypeSubPattern(Pattern.compile((map.get("regex"))),
+                            map.get("replacement")));
+                    }
+                    deviceTypePatterns.put(currentName, subPatterns);
+                }
+            }
+        }
+
+        if (patternListKeys.size() != deviceTypePatterns.size()) {
+            throw new ElasticsearchParseException("not a valid regular expression file");
+        }
+    }
+
+    public String findDeviceType(String agentString, VersionedName userAgent, VersionedName os, VersionedName device) {
+        if (deviceTypePatterns.isEmpty()) {
+            return null;
+        }
+        if (agentString != null) {
+            String deviceType = findMatch(deviceTypePatterns.get(AGENT_STRING_PARSER), agentString);
+            if (deviceType != null) {
+                return deviceType;
+            }
+        }
+        return findDeviceType(userAgent, os, device);
+    }
+
+    public String findDeviceType(VersionedName userAgent, VersionedName os, VersionedName device) {
+
+        if (deviceTypePatterns.isEmpty()) {
+            return null;
+        }
+
+        ArrayList<String> extractedDeviceTypes = new ArrayList<>();
+
+        for (String patternKey : patternListKeys) {
+            String deviceType = null;
+            switch (patternKey) {
+                case OS_PARSERS:
+                    if (os != null && os.name != null) {
+                        deviceType = findMatch(deviceTypePatterns.get(patternKey), os.name);
+                    }
+                    break;
+                case BROWSER_PARSER:
+                    if (userAgent != null && userAgent.name != null) {
+                        deviceType = findMatch(deviceTypePatterns.get(patternKey), userAgent.name);
+                    }
+                    break;
+                case DEVICE_PARSER:
+                    if (device != null && device.name != null) {
+                        deviceType = findMatch(deviceTypePatterns.get(patternKey), device.name);
+                    }
+                    break;
+                default:
+                    break;
+            }
+
+            if (deviceType != null) {
+                extractedDeviceTypes.add(deviceType);
+            }
+        }
+
+
+        if (extractedDeviceTypes.contains(robot)) {
+            return robot;
+        }
+        if (extractedDeviceTypes.contains(tablet)) {
+            return tablet;
+        }
+        if (extractedDeviceTypes.contains(phone)) {
+            return phone;
+        }
+        if (extractedDeviceTypes.contains(desktop)) {
+            return desktop;
+        }
+
+        return "Other";
+    }
+
+    private String findMatch(List<DeviceTypeSubPattern> possiblePatterns, String matchString) {
+        String name;
+        for (DeviceTypeSubPattern pattern : possiblePatterns) {
+            name = pattern.match(matchString);
+            if (name != null) {
+                return name;
+            }
+        }
+        return null;
+    }
+
+    static final class DeviceTypeSubPattern {
+        private final Pattern pattern;
+        private final String nameReplacement;
+
+        DeviceTypeSubPattern(Pattern pattern, String nameReplacement) {
+            this.pattern = pattern;
+            this.nameReplacement = nameReplacement;
+        }
+
+        public String match(String matchString) {
+            String name = null;
+
+            Matcher matcher = pattern.matcher(matchString);
+
+            if (matcher.find() == false) {
+                return null;
+            }
+
+            int groupCount = matcher.groupCount();
+
+            if (nameReplacement != null) {
+                if (nameReplacement.contains("$1") && groupCount >= 1 && matcher.group(1) != null) {
+                    name = nameReplacement.replaceFirst("\\$1", Matcher.quoteReplacement(matcher.group(1)));
+                } else {
+                    name = nameReplacement;
+                }
+            }
+
+            return name;
+        }
+    }
+
+}

+ 5 - 3
modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/IngestUserAgentPlugin.java

@@ -55,7 +55,8 @@ public class IngestUserAgentPlugin extends Plugin implements IngestPlugin {
         Map<String, UserAgentParser> userAgentParsers = new HashMap<>();
 
         UserAgentParser defaultParser = new UserAgentParser(DEFAULT_PARSER_NAME,
-                IngestUserAgentPlugin.class.getResourceAsStream("/regexes.yml"), cache);
+            IngestUserAgentPlugin.class.getResourceAsStream("/regexes.yml"),
+            IngestUserAgentPlugin.class.getResourceAsStream("/device_type_regexes.yml"), cache);
         userAgentParsers.put(DEFAULT_PARSER_NAME, defaultParser);
 
         if (Files.exists(userAgentConfigDirectory) && Files.isDirectory(userAgentConfigDirectory)) {
@@ -66,8 +67,9 @@ public class IngestUserAgentPlugin extends Plugin implements IngestPlugin {
                 Iterable<Path> iterable = regexFiles::iterator;
                 for (Path path : iterable) {
                     String parserName = path.getFileName().toString();
-                    try (InputStream regexStream = Files.newInputStream(path, StandardOpenOption.READ)) {
-                        userAgentParsers.put(parserName, new UserAgentParser(parserName, regexStream, cache));
+                    try (InputStream regexStream = Files.newInputStream(path, StandardOpenOption.READ);
+                         InputStream deviceTypeRegexStream = IngestUserAgentPlugin.class.getResourceAsStream("/device_type_regexes.yml")) {
+                        userAgentParsers.put(parserName, new UserAgentParser(parserName, regexStream, deviceTypeRegexStream, cache));
                     }
                 }
             }

+ 12 - 7
modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/UserAgentParser.java

@@ -26,17 +26,21 @@ import java.util.regex.Pattern;
 final class UserAgentParser {
 
     private final UserAgentCache cache;
+    private final DeviceTypeParser deviceTypeParser = new DeviceTypeParser();
     private final List<UserAgentSubpattern> uaPatterns = new ArrayList<>();
     private final List<UserAgentSubpattern> osPatterns = new ArrayList<>();
     private final List<UserAgentSubpattern> devicePatterns = new ArrayList<>();
     private final String name;
 
-    UserAgentParser(String name, InputStream regexStream, UserAgentCache cache) {
+    UserAgentParser(String name, InputStream regexStream, InputStream deviceTypeRegexStream, UserAgentCache cache) {
         this.name = name;
         this.cache = cache;
 
         try {
             init(regexStream);
+            if (deviceTypeRegexStream != null) {
+                deviceTypeParser.init(deviceTypeRegexStream);
+            }
         } catch (IOException e) {
             throw new ElasticsearchParseException("error parsing regular expression file", e);
         }
@@ -96,8 +100,8 @@ final class UserAgentParser {
         }
     }
 
-    private List<Map<String, String>> readParserConfigurations(XContentParser yamlParser) throws IOException {
-        List <Map<String, String>> patternList = new ArrayList<>();
+    static List<Map<String, String>> readParserConfigurations(XContentParser yamlParser) throws IOException {
+        List<Map<String, String>> patternList = new ArrayList<>();
 
         XContentParser.Token token = yamlParser.nextToken();
         if (token != XContentParser.Token.START_ARRAY) {
@@ -156,9 +160,8 @@ final class UserAgentParser {
             VersionedName userAgent = findMatch(uaPatterns, agentString);
             VersionedName operatingSystem = findMatch(osPatterns, agentString);
             VersionedName device = findMatch(devicePatterns, agentString);
-
-            details = new Details(userAgent, operatingSystem, device);
-
+            String deviceType = deviceTypeParser.findDeviceType(agentString, userAgent, operatingSystem, device);
+            details = new Details(userAgent, operatingSystem, device, deviceType);
             cache.put(name, agentString, details);
         }
 
@@ -182,11 +185,13 @@ final class UserAgentParser {
         public final VersionedName userAgent;
         public final VersionedName operatingSystem;
         public final VersionedName device;
+        public final String deviceType;
 
-        Details(VersionedName userAgent, VersionedName operatingSystem, VersionedName device) {
+        Details(VersionedName userAgent, VersionedName operatingSystem, VersionedName device, String deviceType) {
             this.userAgent = userAgent;
             this.operatingSystem = operatingSystem;
             this.device = device;
+            this.deviceType = deviceType;
         }
     }
 

+ 6 - 0
modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/UserAgentProcessor.java

@@ -125,8 +125,14 @@ public class UserAgentProcessor extends AbstractProcessor {
                     Map<String, String> deviceDetails = new HashMap<>(1);
                     if (uaClient.device != null && uaClient.device.name != null) {
                         deviceDetails.put("name", uaClient.device.name);
+                        deviceDetails.put("type", uaClient.deviceType);
                     } else {
                         deviceDetails.put("name", "Other");
+                        if (uaClient.deviceType != null) {
+                            deviceDetails.put("type", uaClient.deviceType);
+                        } else {
+                            deviceDetails.put("type", "Other");
+                        }
                     }
                     uaDetails.put("device", deviceDetails);
                     break;

+ 67 - 0
modules/ingest-user-agent/src/main/resources/device_type_regexes.yml

@@ -0,0 +1,67 @@
+# Apache License, Version 2.0
+# ===========================
+#
+# Copyright 2009 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+## Custom parser being added to support device types
+
+os_parsers:
+  #  Robot
+  - regex: 'Bot|bot|spider|Spider|Crawler|crawler|AppEngine-Google'
+    replacement: 'Robot'
+  #  Desktop OS, Most Common
+  - regex: '^(Windows$|Windows NT$|Mac OS X|Linux$|Chrome OS|Fedora$|Ubuntu$)'
+    replacement: 'Desktop'
+  #  Phone OS
+  - regex: '^(Android$|iOS|Windows Phone|Firefox OS|BlackBerry OS|KaiOS|Sailfish$|Maemo)'
+    replacement: 'Phone'
+  #  Desktop OS, Not Common
+  - regex: '^(Windows XP|Windows 7|Windows 10|FreeBSD|OpenBSD|Arch Linux|Solaris|NetBSD|SUSE|SunOS|BeOS\/Haiku)'
+    replacement: 'Desktop'
+  - regex: 'Tablet|BlackBerry Tablet OS|iPad|FireOS|Crosswalk'
+    replacement: 'Tablet'
+
+browser_parsers:
+  #  Robot
+  - regex: 'Bot|bot|spider|Spider|Crawler|crawler|AppEngine-Google'
+    replacement: 'Robot'
+  #  Desktop Browsers
+  - regex: '^(Chrome$|Chromium$|Edge$|Firefox$|IE$|Maxthon$|Opera$|Safari$|SeaMonkey$|Vivaldi$|Yandex Browser$)'
+    replacement: 'Desktop'
+  #  Phone Browsers, Most Common
+  - regex: '^(Chrome Mobile$|Chrome Mobile iOS|Firefox Mobile|Firefox iOS|Edge Mobile|Android|Facebook|Instagram|IE Mobile)'
+    replacement: 'Phone'
+  #  Phone Browsers, Not Common
+  - regex: '^(BlackBerry WebKit|OktaMobile|Sailfish Browser|Amazon Silk|Pinterest|Flipboard)'
+    replacement: 'Phone'
+  - regex: 'Tablet|BlackBerry Tablet OS|iPad|FireOS|Crosswalk'
+    replacement: 'Tablet'
+
+device_parsers:
+  - regex: 'Tablet|BlackBerry Tablet OS|iPad|FireOS|Crosswalk|Kindle'
+    replacement: 'Tablet'
+    # Samsung tablets
+  - regex: 'SM-T\d+|SM-P\d+|GT-P\d+'
+    replacement: 'Tablet'
+    # other tablets
+  - regex: 'Asus Nexus \d+|Lenovo TB'
+    replacement: 'Tablet'
+
+agent_string_parsers:
+  - regex: 'Synthetic|Scanner|Crawler|Site24x7|PagePeeker|SpeedCurve|RuxitSynthetic|Google Web Preview|Synthetic|SiteChecker|Parser'
+    replacement: 'Robot'
+  - regex: 'Tablet'
+    replacement: 'Tablet'
+

+ 217 - 0
modules/ingest-user-agent/src/test/java/org/elasticsearch/ingest/useragent/DeviceTypeParserTests.java

@@ -0,0 +1,217 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.ingest.useragent;
+import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
+import org.elasticsearch.common.xcontent.NamedXContentRegistry;
+import org.elasticsearch.common.xcontent.XContentFactory;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.common.xcontent.XContentType;
+import org.elasticsearch.test.ESTestCase;
+
+import org.junit.BeforeClass;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+
+import static org.elasticsearch.ingest.useragent.UserAgentParser.VersionedName;
+
+
+import static org.elasticsearch.ingest.useragent.UserAgentParser.readParserConfigurations;
+import static org.hamcrest.Matchers.is;
+
+public class DeviceTypeParserTests extends ESTestCase {
+
+    private static DeviceTypeParser deviceTypeParser;
+
+    private ArrayList<HashMap<String, String>> readTestDevices(InputStream regexStream, String keyName) throws IOException {
+        XContentParser yamlParser = XContentFactory.xContent(XContentType.YAML).createParser(NamedXContentRegistry.EMPTY,
+            LoggingDeprecationHandler.INSTANCE, regexStream);
+
+        XContentParser.Token token = yamlParser.nextToken();
+
+        ArrayList<HashMap<String, String>> testDevices = new ArrayList<>();
+
+        if (token == XContentParser.Token.START_OBJECT) {
+            token = yamlParser.nextToken();
+
+            for (; token != null; token = yamlParser.nextToken()) {
+                String currentName = yamlParser.currentName();
+                if (token == XContentParser.Token.FIELD_NAME && currentName.equals(keyName)) {
+                    List<Map<String, String>> parserConfigurations = readParserConfigurations(yamlParser);
+
+                    for (Map<String, String> map : parserConfigurations) {
+                        HashMap<String, String> testDevice = new HashMap<>();
+
+                        testDevice.put("type", map.get("type"));
+                        testDevice.put("os", map.get("os"));
+                        testDevice.put("browser", map.get("browser"));
+                        testDevice.put("device", map.get("device"));
+                        testDevices.add(testDevice);
+
+                    }
+                }
+            }
+        }
+
+        return testDevices;
+    }
+
+    private static VersionedName getVersionName(String name){
+        return new VersionedName(name, null, null, null, null);
+    }
+
+    @BeforeClass
+    public static void setupDeviceParser() throws IOException {
+        InputStream deviceTypeRegexStream = UserAgentProcessor.class.getResourceAsStream("/device_type_regexes.yml");
+
+        assertNotNull(deviceTypeRegexStream);
+        assertNotNull(deviceTypeRegexStream);
+
+        deviceTypeParser = new DeviceTypeParser();
+        deviceTypeParser.init(deviceTypeRegexStream);
+    }
+
+    @SuppressWarnings("unchecked")
+    public void testMacDesktop() throws Exception {
+        VersionedName os = getVersionName("Mac OS X");
+
+        VersionedName userAgent = getVersionName("Chrome");
+
+        String deviceType = deviceTypeParser.findDeviceType(userAgent, os, null);
+
+        assertThat(deviceType, is("Desktop"));
+    }
+
+    @SuppressWarnings("unchecked")
+    public void testAndroidMobile() throws Exception {
+
+        VersionedName os = getVersionName("iOS");
+
+        VersionedName userAgent = getVersionName("Safari");
+
+        String deviceType = deviceTypeParser.findDeviceType(userAgent, os, null);
+
+        assertThat(deviceType, is("Phone"));
+    }
+
+    @SuppressWarnings("unchecked")
+    public void testIPadTablet() throws Exception {
+
+        VersionedName os = getVersionName("iOS");
+
+        VersionedName userAgent = getVersionName("Safari");
+
+        VersionedName device = getVersionName("iPad");
+
+        String deviceType = deviceTypeParser.findDeviceType(userAgent, os, device);
+
+        assertThat(deviceType, is("Tablet"));
+    }
+
+    @SuppressWarnings("unchecked")
+    public void testWindowDesktop() throws Exception {
+
+        VersionedName os = getVersionName("Mac OS X");
+
+        VersionedName userAgent = getVersionName("Chrome");
+
+        String deviceType = deviceTypeParser.findDeviceType(userAgent, os, null);
+
+        assertThat(deviceType, is("Desktop"));
+    }
+
+    @SuppressWarnings("unchecked")
+    public void testRobotAgentString() throws Exception {
+
+        String deviceType = deviceTypeParser.findDeviceType(
+            "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:63.0.247) Gecko/20100101 Firefox/63.0.247 Site24x7", null, null, null);
+
+        assertThat(deviceType, is("Robot"));
+    }
+
+    @SuppressWarnings("unchecked")
+    public void testRobotDevices() throws Exception {
+
+        InputStream testRobotDevices = IngestUserAgentPlugin.class.getResourceAsStream("/test-robot-devices.yml");
+
+        ArrayList<HashMap<String, String>> testDevices = readTestDevices(testRobotDevices, "robot_devices");
+
+        for (HashMap<String, String> testDevice : testDevices) {
+            VersionedName os = getVersionName(testDevice.get("os"));
+
+            VersionedName userAgent = getVersionName(testDevice.get("browser"));
+
+            String deviceType = deviceTypeParser.findDeviceType(userAgent, os, null);
+
+            assertThat(deviceType, is("Robot"));
+        }
+    }
+
+    @SuppressWarnings("unchecked")
+    public void testDesktopDevices() throws Exception {
+
+        InputStream testDesktopDevices = IngestUserAgentPlugin.class.getResourceAsStream("/test-desktop-devices.yml");
+
+        ArrayList<HashMap<String, String>> testDevices = readTestDevices(testDesktopDevices, "desktop_devices");
+
+        for (HashMap<String, String> testDevice : testDevices) {
+            VersionedName os = getVersionName(testDevice.get("os"));
+
+            VersionedName userAgent = getVersionName(testDevice.get("browser"));
+
+            String deviceType = deviceTypeParser.findDeviceType(userAgent, os, null);
+
+            assertThat(deviceType, is("Desktop"));
+        }
+    }
+
+    @SuppressWarnings("unchecked")
+    public void testMobileDevices() throws Exception {
+
+        InputStream testMobileDevices = IngestUserAgentPlugin.class.getResourceAsStream("/test-mobile-devices.yml");
+
+        ArrayList<HashMap<String, String>> testDevices = readTestDevices(testMobileDevices, "mobile_devices");
+
+        for (HashMap<String, String> testDevice : testDevices) {
+            VersionedName os = getVersionName(testDevice.get("os"));
+
+            VersionedName userAgent = getVersionName(testDevice.get("browser"));
+
+            String deviceType = deviceTypeParser.findDeviceType(userAgent, os, null);
+
+            assertThat(deviceType, is("Phone"));
+        }
+    }
+
+    @SuppressWarnings("unchecked")
+    public void testTabletDevices() throws Exception {
+
+        InputStream testTabletDevices = IngestUserAgentPlugin.class.getResourceAsStream("/test-tablet-devices.yml");
+
+        ArrayList<HashMap<String, String>> testDevices = readTestDevices(testTabletDevices, "tablet_devices");
+
+        for (HashMap<String, String> testDevice : testDevices) {
+            VersionedName os = getVersionName(testDevice.get("os"));
+
+            VersionedName userAgent = getVersionName(testDevice.get("browser"));
+
+            VersionedName device = getVersionName(testDevice.get("device"));
+
+            String deviceType = deviceTypeParser.findDeviceType(userAgent, os, device);
+
+            assertThat(deviceType, is("Tablet"));
+        }
+    }
+
+}

+ 65 - 1
modules/ingest-user-agent/src/test/java/org/elasticsearch/ingest/useragent/UserAgentProcessorTests.java

@@ -32,9 +32,12 @@ public class UserAgentProcessorTests extends ESTestCase {
     @BeforeClass
     public static void setupProcessor() throws IOException {
         InputStream regexStream = UserAgentProcessor.class.getResourceAsStream("/regexes.yml");
+        InputStream deviceTypeRegexStream = UserAgentProcessor.class.getResourceAsStream("/device_type_regexes.yml");
+
         assertNotNull(regexStream);
+        assertNotNull(deviceTypeRegexStream);
 
-        UserAgentParser parser = new UserAgentParser(randomAlphaOfLength(10), regexStream, new UserAgentCache(1000));
+        UserAgentParser parser = new UserAgentParser(randomAlphaOfLength(10), regexStream, deviceTypeRegexStream, new UserAgentCache(1000));
 
         processor = new UserAgentProcessor(randomAlphaOfLength(10), null, "source_field", "target_field", parser,
                 EnumSet.allOf(UserAgentProcessor.Property.class), false);
@@ -101,6 +104,34 @@ public class UserAgentProcessorTests extends ESTestCase {
         assertThat(target.get("os"), is(os));
         Map<String, String> device = new HashMap<>();
         device.put("name", "Mac");
+        device.put("type", "Desktop");
+        assertThat(target.get("device"), is(device));
+    }
+
+    @SuppressWarnings("unchecked")
+    public void testWindowsOS() throws Exception {
+        Map<String, Object> document = new HashMap<>();
+        document.put("source_field",
+            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36");
+        IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
+
+        processor.execute(ingestDocument);
+        Map<String, Object> data = ingestDocument.getSourceAndMetadata();
+
+        assertThat(data, hasKey("target_field"));
+        Map<String, Object> target = (Map<String, Object>) data.get("target_field");
+
+        assertThat(target.get("name"), is("Chrome"));
+        assertThat(target.get("version"), is("87.0.4280.141"));
+
+        Map<String, String> os = new HashMap<>();
+        os.put("name", "Windows");
+        os.put("version", "10");
+        os.put("full", "Windows 10");
+        assertThat(target.get("os"), is(os));
+        Map<String, String> device = new HashMap<>();
+        device.put("name", "Other");
+        device.put("type", "Desktop");
         assertThat(target.get("device"), is(device));
     }
 
@@ -129,6 +160,7 @@ public class UserAgentProcessorTests extends ESTestCase {
 
         Map<String, String> device = new HashMap<>();
         device.put("name", "Motorola Xoom");
+        device.put("type", "Phone");
         assertThat(target.get("device"), is(device));
     }
 
@@ -152,6 +184,37 @@ public class UserAgentProcessorTests extends ESTestCase {
 
         Map<String, String> device = new HashMap<>();
         device.put("name", "Spider");
+        device.put("type", "Robot");
+        assertThat(target.get("device"), is(device));
+    }
+
+    @SuppressWarnings("unchecked")
+    public void testTablet() throws Exception {
+        Map<String, Object> document = new HashMap<>();
+        document.put("source_field",
+            "Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) " +
+                "Version/12.1 Mobile/15E148 Safari/604.1");
+        IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
+
+        processor.execute(ingestDocument);
+        Map<String, Object> data = ingestDocument.getSourceAndMetadata();
+
+        assertThat(data, hasKey("target_field"));
+        Map<String, Object> target = (Map<String, Object>) data.get("target_field");
+
+        assertThat(target.get("name"), is("Mobile Safari"));
+
+        assertThat(target.get("version"), is("12.1"));
+
+        Map<String, String> os = new HashMap<>();
+        os.put("name", "iOS");
+        os.put("version", "12.2");
+        os.put("full", "iOS 12.2");
+        assertThat(target.get("os"), is(os));
+
+        Map<String, String> device = new HashMap<>();
+        device.put("name", "iPad");
+        device.put("type", "Tablet");
         assertThat(target.get("device"), is(device));
     }
 
@@ -177,6 +240,7 @@ public class UserAgentProcessorTests extends ESTestCase {
         assertNull(target.get("os"));
         Map<String, String> device = new HashMap<>();
         device.put("name", "Other");
+        device.put("type", "Other");
         assertThat(target.get("device"), is(device));
     }
 }

+ 177 - 0
modules/ingest-user-agent/src/test/resources/test-desktop-devices.yml

@@ -0,0 +1,177 @@
+# Apache License, Version 2.0
+# ===========================
+#
+# Copyright 2009 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+desktop_devices:
+  - type: Desktop
+    os: Windows
+    browser: Chrome
+  - type: Desktop
+    os: Mac OS X
+    browser: Chrome
+  - type: Desktop
+    os: Linux
+    browser: Cypress
+  - type: Desktop
+    os: Ubuntu
+    browser: Firefox
+  - type: Desktop
+    os: Chrome OS
+    browser: Chrome
+  - type: Desktop
+    os: Fedora
+    browser: Firefox
+  - type: Desktop
+    os: FreeBSD
+    browser: Chrome
+  - type: Desktop
+    os: OpenBSD
+    browser: Firefox
+  - type: Desktop
+    os: Arch Linux
+    browser: Firefox
+  - type: Desktop
+    os: Solaris
+    browser: Firefox
+  - type: Desktop
+    os: NetBSD
+    browser: Firefox
+  - type: Desktop
+    os: SUSE
+    browser: Epiphany
+  - type: Desktop
+    browser: Chrome
+    os: Mac OS X
+  - type: Desktop
+    browser: Firefox
+    os: Windows NT
+  - type: Desktop
+    browser: Edge
+    os: Windows NT
+  - type: Desktop
+    browser: HeadlessChrome
+    os: Linux
+  - type: Desktop
+    browser: Safari
+    os: Mac OS X
+  - type: Desktop
+    browser: Electron
+    os: Linux
+  - type: Desktop
+    browser: Opera
+    os: Linux
+  - type: Desktop
+    browser: Samsung Internet
+    os: Linux
+  - type: Desktop
+    browser: Chromium
+    os: Ubuntu
+  - type: Desktop
+    browser: Yandex Browser
+    os: Windows NT
+  - type: Desktop
+    browser: Whale
+    os: Windows NT
+  - type: Desktop
+    browser: Sogou Explorer
+    os: Windows NT
+  - type: Desktop
+    browser: QQ Browser
+    os: Windows NT
+  - type: Desktop
+    browser: IE
+    os: Windows NT
+  - type: Desktop
+    browser: Yeti
+    os: Windows NT
+  - type: Desktop
+    browser: Apple Mail
+    os: Mac OS X
+  - type: Desktop
+    browser: Coc Coc
+    os: Windows NT
+  - type: Desktop
+    browser: Maxthon
+    os: Windows NT
+  - type: Desktop
+    browser: Waterfox
+    os: Linux
+  - type: Desktop
+    browser: Iron
+    os: Mac OS X
+  - type: Desktop
+    browser: UC Browser
+    os: Windows NT
+  - type: Desktop
+    browser: Pale Moon
+    os: Linux
+  - type: Desktop
+    browser: WordPress
+    os: Linux
+  - type: Desktop
+    browser: Vivaldi
+    os: Windows NT
+  - type: Desktop
+    browser: Dragon
+    os: Windows NT
+  - type: Desktop
+    browser: SeaMonkey
+    os: Windows NT
+  - type: Desktop
+    browser: Sleipnir
+    os: Windows NT
+  - type: Desktop
+    browser: Thunderbird
+    os: Linux
+  - type: Desktop
+    browser: Epiphany
+    os: Linux
+  - type: Desktop
+    browser: Datanyze
+    os: Linux
+  - type: Desktop
+    browser: Basilisk
+    os: Windows NT
+  - type: Desktop
+    browser: Swiftfox
+    os: Linux
+  - type: Desktop
+    browser: Netscape
+    os: SunOS
+  - type: Desktop
+    browser: Puffin
+    os: Linux
+  - type: Desktop
+    browser: Seznam prohlížeč
+    os: Windows NT
+  - type: Desktop
+    browser: iCab
+    os: Mac OS X
+  - type: Desktop
+    browser: Opera Neon
+    os: Windows NT
+  - type: Desktop
+    browser: Mail.ru Chromium Browser
+    os: Windows NT
+  - type: Desktop
+    browser: Otter
+    os: BeOS/Haiku
+  - type: Desktop
+    browser: Iceweasel
+    os: Linux
+  - type: Desktop
+    browser: Chrome Mobile WebView
+    os: Linux

+ 124 - 0
modules/ingest-user-agent/src/test/resources/test-mobile-devices.yml

@@ -0,0 +1,124 @@
+# Apache License, Version 2.0
+# ===========================
+#
+# Copyright 2009 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+mobile_devices:
+  - type: Phone
+    os: Android
+    browser: Chrome
+  - type: Phone
+    os: iOS
+    browser: Firefox
+#  - type: Phone
+#    os: Windows Phone
+#    browser: Edge
+  - type: Phone
+    os: KaiOS
+    browser: Firefox
+  - type: Phone
+    os: Sailfish
+    browser: SailfishBrowser
+  - type: Phone
+    os: Maemo
+    browser: Fennec
+  - type: Phone
+    os: BlackBerry OS
+    browser: Mobile Safari
+  - type: Phone
+    browser: Chrome Mobile
+    os: Android
+  - type: Phone
+    browser: Mobile Safari
+    os: iOS
+  - type: Phone
+    browser: Chrome Mobile WebView
+    os: Android
+  - type: Phone
+    browser: Firefox Mobile
+    os: Android
+  - type: Phone
+    browser: Chrome Mobile iOS
+    os: iOS
+  - type: Phone
+    browser: Facebook
+    os: Android
+  - type: Phone
+    browser: Mobile Safari UI/WKWebView
+    os: iOS
+  - type: Phone
+    browser: Firefox iOS
+    os: iOS
+  - type: Phone
+    browser: Opera Mobile
+    os: Android
+  - type: Phone
+    browser: MiuiBrowser
+    os: Android
+  - type: Phone
+    browser: Edge Mobile
+    os: Android
+  - type: Phone
+    browser: Android
+    os: Android
+  - type: Phone
+    browser: LINE
+    os: iOS
+  - type: Phone
+    browser: QQ Browser Mobile
+    os: Android
+  - type: Phone
+    browser: Flipboard
+    os: Android
+  - type: Phone
+    browser: Instagram
+    os: iOS
+  - type: Phone
+    browser: Pinterest
+    os: iOS
+  - type: Phone
+    browser: OktaMobile
+    os: iOS
+  - type: Phone
+    browser: Twitter
+    os: Android
+  - type: Phone
+    browser: Mint Browser
+    os: Android
+  - type: Phone
+    browser: Snapchat
+    os: iOS
+  - type: Phone
+    browser: IE Mobile
+    os: Windows Phone
+  - type: Phone
+    browser: Sailfish Browser
+    os: Linux
+  - type: Phone
+    browser: MobileIron
+    os: iOS
+  - type: Phone
+    browser: charlotte
+    os: Android
+  - type: Phone
+    browser: BlackBerry WebKit
+    os: BlackBerry
+  - type: Phone
+    browser: YandexSearch
+    os: Android
+  - type: Phone
+    browser: Salesforce
+    os: iOS
+

+ 22 - 0
modules/ingest-user-agent/src/test/resources/test-other-devices.yml

@@ -0,0 +1,22 @@
+# Apache License, Version 2.0
+# ===========================
+#
+# Copyright 2009 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+robot_devices:
+  - type: Desktop
+    os: Tizen
+    browser: AppleWebKit
+

+ 123 - 0
modules/ingest-user-agent/src/test/resources/test-robot-devices.yml

@@ -0,0 +1,123 @@
+# Apache License, Version 2.0
+# ===========================
+#
+# Copyright 2009 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+robot_devices:
+  - type: Robot
+    os: CentOS
+    browser: AdsBot-Naver
+  - type: Robot
+    browser: iSec_Bot
+    os: Cloud
+  - type: Robot
+    browser: moatbot
+    os: Cloud
+  - type: Robot
+    browser: Baiduspider-render
+    os: Cloud
+  - type: Robot
+    browser: AhrefsBot
+    os: Cloud
+  - type: Robot
+    browser: Applebot
+    os: Cloud
+  - type: Robot
+    browser: Seekport Crawler
+    os: Cloud
+  - type: Robot
+    browser: Linespider
+    os: Cloud
+  - type: Robot
+    browser: pingbot
+    os: Cloud
+  - type: Robot
+    browser: YisouSpider
+    os: Cloud
+  - type: Robot
+    browser: HubSpot Crawler
+    os: Cloud
+  - type: Robot
+    browser: AdsBot
+    os: Cloud
+  - type: Robot
+    browser: net/bot
+    os: Cloud
+  - type: Robot
+    browser: Investment Crawler
+    os: Cloud
+  - type: Robot
+    browser: Bytespider
+    os: Cloud
+  - type: Robot
+    browser: IBM-Crawler
+    os: Cloud
+  - type: Robot
+    browser: BublupBot
+    os: Cloud
+  - type: Robot
+    browser: AppEngine-Google
+    os: Google Cloud
+  - type: Robot
+    browser: YandexBot
+    os: Cloud
+  - type: Robot
+    browser: Slackbot-LinkExpanding
+    os: Cloud
+  - type: Robot
+    browser: WebPageTest.org bot
+    os: Cloud
+  - type: Robot
+    browser: Baiduspider-image
+    os: Cloud
+  - type: Robot
+    browser: Pinterestbot
+    os: Cloud
+  - type: Robot
+    browser: YandexAccessibilityBot
+    os: Cloud
+  - type: Robot
+    browser: FacebookBot
+    os: Cloud
+  - type: Robot
+    browser: BLEXBot
+    os: Cloud
+  - type: Robot
+    browser: SuperBot
+    os: Cloud
+  - type: Robot
+    browser: Googlebot-News
+    os: Cloud
+  - type: Robot
+    browser: SMTBot
+    os: Cloud
+  - type: Robot
+    browser: GooglePlusBot
+    os: Cloud
+  - type: Robot
+    browser: niocBot
+    os: Cloud
+  - type: Robot
+    browser: SpiderWeb
+    os: Cloud
+  - type: Robot
+    browser: facebot
+    os: Cloud
+  - type: Robot
+    browser: MJ12bot
+    os: Cloud
+  - type: Robot
+    browser: ethical-bugbot
+    os: Linux

+ 39 - 0
modules/ingest-user-agent/src/test/resources/test-tablet-devices.yml

@@ -0,0 +1,39 @@
+# Apache License, Version 2.0
+# ===========================
+#
+# Copyright 2009 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+tablet_devices:
+  - type: Tablet
+    os: BlackBerry Tablet OS
+    browser: Edg
+  - type: Tablet
+    browser: Amazon Silk
+    os: FireOS
+  - type: Tablet
+    browser: Crosswalk
+    os: Android
+  - type: Tablet
+    browser: Chrome Mobile WebView
+    os: Android
+    device: Samsung SM-T590
+  - type: Tablet
+    browser: Amazon Silk
+    os: Linux
+    device: Kindle
+  - type: Tablet
+    browser: Chrome
+    os: Android
+    device: Samsung SM-T307U

+ 1 - 1
modules/ingest-user-agent/src/yamlRestTest/resources/rest-api-spec/test/ingest-useragent/20_useragent_processor.yml

@@ -32,7 +32,7 @@
   - match: { _source.user_agent.original: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.149 Safari/537.36" }
   - match: { _source.user_agent.os: {"name":"Mac OS X", "version":"10.9.2", "full":"Mac OS X 10.9.2"} }
   - match: { _source.user_agent.version: "33.0.1750.149" }
-  - match: { _source.user_agent.device: {"name": "Mac" }}
+  - match: { _source.user_agent.device: {"name": "Mac", type: "Desktop" }}
 
 ---
 "Test user agent processor with parameters":

+ 1 - 1
modules/ingest-user-agent/src/yamlRestTest/resources/rest-api-spec/test/ingest-useragent/30_custom_regex.yml

@@ -30,6 +30,6 @@
         id: 1
   - match: { _source.field1: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.149 Safari/537.36" }
   - match: { _source.user_agent.name: "Test" }
-  - match: { _source.user_agent.device: {"name": "Other" }}
+  - match: { _source.user_agent.device: {"name": "Other", "type": "Other" }}
   - is_false: _source.user_agent.os
   - is_false: _source.user_agent.version