Java: Simple Parser for Apache HTTP Server access.log
Parser para o access.log do Apache HTTP Server
Se o formato do seu arquivo for RSYSLOG, basta alterar a variável accessLogType de apache para rsyslog.
English: Simple Parser for Apache HTTP Server access.log
If you need to parse rsyslog, you must change variable accessLogType to "rsyslog"
Código / Code
import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * * @author ebasso */ public class SimpleParserApacheLog { private static String fileAccesslog = "/var/logs/apache/access.log"; private static String accessLogType = "apache"; // For Apache combined //private static String accessLogType = "rsyslog"; // In case of rsyslog private static int REQUEST_TIME_GROUP = 0; private static int CLIENT_REQUESTGROUP = 0; private static int HTTP_STATUS_CODE_GROUP = 0; public static void main(String[] args) { try { System.out.println("SimpleParserApacheLog: start"); SimpleParserApacheLog myApp = new SimpleParserApacheLog(); // Faz a carga das variaveis de configuração myApp.processFileAccessLog(); System.out.println("SimpleParserApacheLog: end"); } catch (Exception e) { e.printStackTrace(); } } private void processFileAccessLog() throws Exception { File file = new File(fileAccesslog); FileInputStream fis = null; BufferedReader reader = null; String lineinput = ""; String response = ""; System.out.println("Read file [" + fileAccesslog + "]"); int countLines = 0; int countLinesWithDevice = 0; int countLinesUpdates = 0; ArrayList<String> listaFull = null; try { fis = new FileInputStream(file); reader = new BufferedReader(new InputStreamReader(fis)); while ((lineinput = reader.readLine()) != null) { processLine(lineinput); countLines++; } System.out.println("Total Lines: [" + countLines + "]"); } catch (FileNotFoundException e) { System.out.println("Arquivo [" + fileAccesslog + "] não existe"); } catch (IOException e) { e.printStackTrace(); } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { e.printStackTrace(); } } } } private void processLine(String lineInput) throws Exception { //String clientHost = null; String requestTime = null; String clientRequest = null; String httpStatusCode = null; //String numOfBytes = null; //String referer = null; //String agent = null; int pos = 0; String deviceId = null; Pattern accessLogPattern = Pattern.compile(getAccessLogRegex(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL); Matcher accessLogEntryMatcher; accessLogEntryMatcher = accessLogPattern.matcher(lineInput); if (!accessLogEntryMatcher.matches()) { System.out.println(" : couldn't be parsed"); } // String regexA; // System.out.print("["); // for (int i = 1; i < 12; i++) { // regexA = (String) accessLogEntryMatcher.group(i); // System.out.print("regex" + i + "=" + regexA + "||"); // } // System.out.println("]"); System.out.print("["); System.out.print((String) accessLogEntryMatcher.group(REQUEST_TIME_GROUP) + "; "); System.out.print( (String) accessLogEntryMatcher.group(CLIENT_REQUESTGROUP)+ "; "); System.out.print( (String) accessLogEntryMatcher.group(HTTP_STATUS_CODE_GROUP)); System.out.println("]"); } private String getAccessLogRegex() throws Exception { String myRegex = ""; if (accessLogType.equals("apache")) { // 172.18.10.37 - - [12/Aug/2015:07:18:45 -0300] "GET String regex1 = "^([\\d.]+)"; // Client IP String regex2 = " (\\S+)"; // - String regex3 = " (\\S+)"; // - String regex4 = " \\[([\\w:/]+\\s[+\\-]\\d{4})\\]"; // Date String regex5 = " \"(.+?)\""; // request method and url String regex6 = " (\\d{3})"; // HTTP code String regex7 = " (\\d+|(.+?))"; // Number of bytes String regex8 = " \"([^\"]+|(.+?))\""; // Referer String regex9 = " \"([^\"]+|(.+?))\""; // Agent REQUEST_TIME_GROUP = 4; CLIENT_REQUESTGROUP = 5; HTTP_STATUS_CODE_GROUP = 6; myRegex = regex1 + regex2 + regex3 + regex4 + regex5 + regex6 + regex7 + regex8 + regex9; } else if (accessLogType.equals("rsyslog")) { //Aug 11 03:33:50 server1 servidoreshttp: 186.204.167.161 - [email protected] - "POST String regex1 = "^(\\S+ \\d{2} \\S+)"; // Date String regex2 = " (\\S+)"; // server1: String regex3 = " (\\S+)"; // servidoreshttp: String regex4 = " ([\\d.]+)"; // Client IP String regex5 = " (\\S+)"; // - String regex6 = " (\\S+)"; // usuario String regex7 = " (\\S+)"; // - String regex8 = " \"(.+?)\""; // request method and url String regex9 = " (\\d{3})"; // HTTP code String regex10 = " (\\d+|(.+?))"; // Number of bytes String regex11 = " \"([^\"]+|(.+?))\""; // Referer String regex12 = " \"([^\"]+|(.+?))\""; // Agent REQUEST_TIME_GROUP = 1; CLIENT_REQUESTGROUP = 8; HTTP_STATUS_CODE_GROUP = 9; myRegex = regex1 + regex2 + regex3 + regex4 + regex5 + regex6 + regex7 + regex8 + regex9 + regex10 + regex11 + regex12; } return myRegex; } }
Running
java SimpleParserApacheLog
SimpleParserApacheLog: start Read file [/var/logs/apache/access.log] [Aug 11 03:33:50; GET /foo.html HTTP/1.1; 200] [Aug 11 03:33:50; POST /[email protected]&DeviceId=RP3NVFADFADFADFADFDSAFAFA8 HTTP/1.1; 200] ... [Aug 11 03:34:50; POST /index.php?action=update&deviceId=Android_f570dlkjadlfncc0 HTTP/1.1; 200] Total Lines: [10] SimpleParserApacheLog: end