Java: Simple Parser for Apache HTTP Server access.log

Parser para o access.log do Apache HTTP Server

Se o formato do seu arquivo for RSYSLOG, basta alterar a variável accessLogType de apache para rsyslog.

English: Simple Parser for Apache HTTP Server access.log

If you need to parse rsyslog, you must change variable accessLogType to "rsyslog"

= Código / Code =

import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * * @author ebasso */ public class SimpleParserApacheLog { private static String fileAccesslog = "/var/logs/apache/access.log"; private static String accessLogType = "apache";  // For Apache combined //private static String accessLogType = "rsyslog";  // In case of rsyslog private static int REQUEST_TIME_GROUP = 0; private static int CLIENT_REQUESTGROUP = 0; private static int HTTP_STATUS_CODE_GROUP = 0; public static void main(String[] args) { try { System.out.println("SimpleParserApacheLog: start"); SimpleParserApacheLog myApp = new SimpleParserApacheLog; // Faz a carga das variaveis de configuração myApp.processFileAccessLog; System.out.println("SimpleParserApacheLog: end"); } catch (Exception e) { e.printStackTrace; }   }    private void processFileAccessLog throws Exception { File file = new File(fileAccesslog); FileInputStream fis = null; BufferedReader reader = null; String lineinput = ""; String response = ""; System.out.println("Read file [" + fileAccesslog + "]"); int countLines = 0; int countLinesWithDevice = 0; int countLinesUpdates = 0; ArrayList listaFull = null; try { fis = new FileInputStream(file); reader = new BufferedReader(new InputStreamReader(fis)); while ((lineinput = reader.readLine) != null) { processLine(lineinput); countLines++; }           System.out.println("Total Lines: [" + countLines + "]"); } catch (FileNotFoundException e) { System.out.println("Arquivo [" + fileAccesslog + "] não existe"); } catch (IOException e) { e.printStackTrace; } finally { if (reader != null) { try { reader.close; } catch (IOException e) { e.printStackTrace; }           }        }    }    private void processLine(String lineInput) throws Exception { //String clientHost = null; String requestTime = null; String clientRequest = null; String httpStatusCode = null; //String numOfBytes = null; //String referer = null; //String agent = null; int pos = 0; String deviceId = null; Pattern accessLogPattern = Pattern.compile(getAccessLogRegex, Pattern.CASE_INSENSITIVE | Pattern.DOTALL); Matcher accessLogEntryMatcher; accessLogEntryMatcher = accessLogPattern.matcher(lineInput); if (!accessLogEntryMatcher.matches) { System.out.println(" : couldn't be parsed"); } //       String regexA; //       System.out.print("["); //       for (int i = 1; i < 12; i++) { //           regexA = (String) accessLogEntryMatcher.group(i); //           System.out.print("regex" + i + "=" + regexA + "||"); //       } //        System.out.println("]"); System.out.print("["); System.out.print((String) accessLogEntryMatcher.group(REQUEST_TIME_GROUP) + "; "); System.out.print( (String) accessLogEntryMatcher.group(CLIENT_REQUESTGROUP)+ "; "); System.out.print( (String) accessLogEntryMatcher.group(HTTP_STATUS_CODE_GROUP)); System.out.println("]"); }   private String getAccessLogRegex throws Exception { String myRegex = ""; if (accessLogType.equals("apache")) { // 172.18.10.37 - - [12/Aug/2015:07:18:45 -0300] "GET           String regex1 = "^([\\d.]+)"; // Client IP            String regex2 = " (\\S+)"; // -            String regex3 = " (\\S+)"; // -            String regex4 = " \\[([\\w:/]+\\s[+\\-]\\d{4})\\]"; // Date            String regex5 = " \"(.+?)\""; // request method and url            String regex6 = " (\\d{3})"; // HTTP code            String regex7 = " (\\d+|(.+?))"; // Number of bytes            String regex8 = " \"([^\"]+|(.+?))\""; // Referer String regex9 = " \"([^\"]+|(.+?))\""; // Agent           REQUEST_TIME_GROUP = 4;            CLIENT_REQUESTGROUP = 5;            HTTP_STATUS_CODE_GROUP = 6;            myRegex = regex1 + regex2 + regex3 + regex4 + regex5 + regex6 + regex7 + regex8 + regex9;        } else if (accessLogType.equals("rsyslog")) {            //Aug 11 03:33:50 server1 servidoreshttp: 186.204.167.161 - hammes@empresax.com.br - "POST String regex1 = "^(\\S+ \\d{2} \\S+)"; // Date String regex2 = " (\\S+)"; // server1: String regex3 = " (\\S+)"; // servidoreshttp: String regex4 = " ([\\d.]+)"; // Client IP           String regex5 = " (\\S+)"; // - String regex6 = " (\\S+)"; // usuario String regex7 = " (\\S+)"; // - String regex8 = " \"(.+?)\""; // request method and url String regex9 = " (\\d{3})"; // HTTP code String regex10 = " (\\d+|(.+?))"; // Number of bytes String regex11 = " \"([^\"]+|(.+?))\""; // Referer           String regex12 = " \"([^\"]+|(.+?))\""; // Agent REQUEST_TIME_GROUP = 1; CLIENT_REQUESTGROUP = 8; HTTP_STATUS_CODE_GROUP = 9; myRegex = regex1 + regex2 + regex3 + regex4 + regex5 + regex6 + regex7 + regex8 + regex9 + regex10 + regex11 + regex12; }       return myRegex; } }

= Executando / Running =

java SimpleParserApacheLog

Resultado / Result:

SimpleParserApacheLog: start Read file [/var/logs/apache/access.log] [Aug 11 03:33:50; GET /foo.html HTTP/1.1; 200] [Aug 11 03:33:50; POST /index.php?User=hammes@empresax.com.br&DeviceId=RP3NVFADFADFADFADFDSAFAFA8 HTTP/1.1; 200] ... [Aug 11 03:34:50; POST /index.php?action=update&deviceId=Android_f570dlkjadlfncc0 HTTP/1.1; 200] Total Lines: [10] SimpleParserApacheLog: end

= Ver também =


 * Mais Artigos sobre Java